bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/lib/bio/db/embl/uniprot.rb
CHANGED
|
@@ -1,7 +1,34 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/embl/uniprot.rb - UniProt database class
|
|
2
|
+
# = bio/db/embl/uniprot.rb - UniProt database class
|
|
3
3
|
#
|
|
4
|
-
# Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
|
|
4
|
+
# Copyright:: Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
# $Id: uniprot.rb,v 1.2 2006/01/28 06:40:39 nakao Exp $
|
|
8
|
+
#
|
|
9
|
+
# == Description
|
|
10
|
+
#
|
|
11
|
+
# Name space for UniProtKB/SwissProt specific methods.
|
|
12
|
+
#
|
|
13
|
+
# UniProtKB/SwissProt specific methods are defined in this class.
|
|
14
|
+
# Shared methods for UniProtKB/SwissProt and TrEMBL classes are
|
|
15
|
+
# defined in Bio::SPTR class.
|
|
16
|
+
#
|
|
17
|
+
# == Examples
|
|
18
|
+
#
|
|
19
|
+
# str = File.read("p53_human.swiss")
|
|
20
|
+
# obj = Bio::UniProt.new(str)
|
|
21
|
+
# obj.entry_id #=> "P53_HUMAN"
|
|
22
|
+
#
|
|
23
|
+
# == Referencees
|
|
24
|
+
#
|
|
25
|
+
# * UniProt
|
|
26
|
+
# http://uniprot.org/
|
|
27
|
+
#
|
|
28
|
+
# * The UniProtKB/SwissProt/TrEMBL User Manual
|
|
29
|
+
# http://www.expasy.org/sprot/userman.html
|
|
30
|
+
|
|
31
|
+
#--
|
|
5
32
|
#
|
|
6
33
|
# This library is free software; you can redistribute it and/or
|
|
7
34
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -17,13 +44,15 @@
|
|
|
17
44
|
# License along with this library; if not, write to the Free Software
|
|
18
45
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
46
|
#
|
|
20
|
-
|
|
47
|
+
#++
|
|
21
48
|
#
|
|
22
49
|
|
|
23
50
|
require 'bio/db/embl/sptr'
|
|
24
51
|
|
|
25
52
|
module Bio
|
|
26
53
|
|
|
54
|
+
# Parser class for SwissProt database entry.
|
|
55
|
+
# See also Bio::SPTR class.
|
|
27
56
|
class UniProt < SPTR
|
|
28
57
|
# Nothing to do (UniProt format is abstracted in SPTR)
|
|
29
58
|
end
|
data/lib/bio/db/fasta.rb
CHANGED
|
@@ -1,24 +1,66 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/db/fasta.rb - FASTA format class
|
|
2
|
+
# = bio/db/fasta.rb - FASTA format class
|
|
3
3
|
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2002
|
|
5
|
+
# GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>,
|
|
6
|
+
# KATAYAMA Toshiaki <k@bioruby.org>
|
|
7
|
+
# Lisence:: Ruby's
|
|
6
8
|
#
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
9
|
+
# $Id: fasta.rb,v 1.25 2006/02/22 08:44:46 ngoto Exp $
|
|
10
|
+
#
|
|
11
|
+
# == Description
|
|
12
|
+
#
|
|
13
|
+
# FASTA format class.
|
|
11
14
|
#
|
|
12
|
-
#
|
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
-
# Lesser General Public License for more details.
|
|
15
|
+
# == Examples
|
|
16
16
|
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
#
|
|
17
|
+
# rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
|
18
|
+
# rub.entry_id ==> 'gi|671595'
|
|
19
|
+
# rub.get('emb') ==> 'CAA85678.1'
|
|
20
|
+
# rub.emb ==> 'CAA85678.1'
|
|
21
|
+
# rub.gi ==> '671595'
|
|
22
|
+
# rub.accession ==> 'CAA85678'
|
|
23
|
+
# rub.accessions ==> [ 'CAA85678' ]
|
|
24
|
+
# rub.acc_version ==> 'CAA85678.1'
|
|
25
|
+
# rub.locus ==> nil
|
|
26
|
+
# rub.list_ids ==> [["gi", "671595"],
|
|
27
|
+
# ["emb", "CAA85678.1", nil],
|
|
28
|
+
# ["Perovskia abrotanoides"]]
|
|
20
29
|
#
|
|
21
|
-
#
|
|
30
|
+
# ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
|
31
|
+
# ckr.entry_id ==> "gi|2495000"
|
|
32
|
+
# ckr.sp ==> "CCKR_CAVPO"
|
|
33
|
+
# ckr.pir ==> "I51898"
|
|
34
|
+
# ckr.gb ==> "AAB29504.1"
|
|
35
|
+
# ckr.gi ==> "2495000"
|
|
36
|
+
# ckr.accession ==> "AAB29504"
|
|
37
|
+
# ckr.accessions ==> ["Q63931", "AAB29504"]
|
|
38
|
+
# ckr.acc_version ==> "AAB29504.1"
|
|
39
|
+
# ckr.locus ==> nil
|
|
40
|
+
# ckr.description ==>
|
|
41
|
+
# "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
|
42
|
+
# ckr.descriptions ==>
|
|
43
|
+
# ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
|
44
|
+
# "cholecystokinin A receptor - guinea pig",
|
|
45
|
+
# "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
|
46
|
+
# ckr.words ==>
|
|
47
|
+
# ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
|
48
|
+
# "receptor", "type"]
|
|
49
|
+
# ckr.id_strings ==>
|
|
50
|
+
# ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
|
51
|
+
# "544724", "AAB29504.1", "Cavia"]
|
|
52
|
+
# ckr.list_ids ==>
|
|
53
|
+
# [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
|
54
|
+
# ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
|
55
|
+
# ["gb", "AAB29504.1", nil], ["Cavia"]]
|
|
56
|
+
#
|
|
57
|
+
# == References
|
|
58
|
+
#
|
|
59
|
+
# * FASTA format (WikiPedia)
|
|
60
|
+
# http://en.wikipedia.org/wiki/FASTA_format
|
|
61
|
+
#
|
|
62
|
+
# * Fasta format description (NCBI)
|
|
63
|
+
# http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
|
|
22
64
|
#
|
|
23
65
|
|
|
24
66
|
require 'bio/db'
|
|
@@ -26,34 +68,132 @@ require 'bio/sequence'
|
|
|
26
68
|
|
|
27
69
|
module Bio
|
|
28
70
|
|
|
71
|
+
|
|
72
|
+
# Treats a FASTA formatted entry, such as:
|
|
73
|
+
#
|
|
74
|
+
# >id and/or some comments <== comment line
|
|
75
|
+
# ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
|
|
76
|
+
# ATGCATGCATGCATGCATGCATGCATGCATGCATGC
|
|
77
|
+
# ATGCATGCATGC
|
|
78
|
+
#
|
|
79
|
+
# The precedent '>' can be omitted and the trailing '>' will be removed
|
|
80
|
+
# automatically.
|
|
81
|
+
#
|
|
82
|
+
# === Examples
|
|
83
|
+
#
|
|
84
|
+
# f_str = <<END
|
|
85
|
+
# >sce:YBR160W CDC28, SRM5; cyclin-dependent protein kinase catalytic subunit [EC:2.7.1.-] [SP:CC28_YEAST]
|
|
86
|
+
# MSGELANYKRLEKVGEGTYGVVYKALDLRPGQGQRVVALKKIRLESEDEG
|
|
87
|
+
# VPSTAIREISLLKELKDDNIVRLYDIVHSDAHKLYLVFEFLDLDLKRYME
|
|
88
|
+
# GIPKDQPLGADIVKKFMMQLCKGIAYCHSHRILHRDLKPQNLLINKDGNL
|
|
89
|
+
# KLGDFGLARAFGVPLRAYTHEIVTLWYRAPEVLLGGKQYSTGVDTWSIGC
|
|
90
|
+
# IFAEMCNRKPIFSGDSEIDQIFKIFRVLGTPNEAIWPDIVYLPDFKPSFP
|
|
91
|
+
# QWRRKDLSQVVPSLDPRGIDLLDKLLAYDPINRISARRAAIHPYFQES
|
|
92
|
+
# >sce:YBR274W CHK1; probable serine/threonine-protein kinase [EC:2.7.1.-] [SP:KB9S_YEAST]
|
|
93
|
+
# MSLSQVSPLPHIKDVVLGDTVGQGAFACVKNAHLQMDPSIILAVKFIHVP
|
|
94
|
+
# TCKKMGLSDKDITKEVVLQSKCSKHPNVLRLIDCNVSKEYMWIILEMADG
|
|
95
|
+
# GDLFDKIEPDVGVDSDVAQFYFQQLVSAINYLHVECGVAHRDIKPENILL
|
|
96
|
+
# DKNGNLKLADFGLASQFRRKDGTLRVSMDQRGSPPYMAPEVLYSEEGYYA
|
|
97
|
+
# DRTDIWSIGILLFVLLTGQTPWELPSLENEDFVFFIENDGNLNWGPWSKI
|
|
98
|
+
# EFTHLNLLRKILQPDPNKRVTLKALKLHPWVLRRASFSGDDGLCNDPELL
|
|
99
|
+
# AKKLFSHLKVSLSNENYLKFTQDTNSNNRYISTQPIGNELAELEHDSMHF
|
|
100
|
+
# QTVSNTQRAFTSYDSNTNYNSGTGMTQEAKWTQFISYDIAALQFHSDEND
|
|
101
|
+
# CNELVKRHLQFNPNKLTKFYTLQPMDVLLPILEKALNLSQIRVKPDLFAN
|
|
102
|
+
# FERLCELLGYDNVFPLIINIKTKSNGGYQLCGSISIIKIEEELKSVGFER
|
|
103
|
+
# KTGDPLEWRRLFKKISTICRDIILIPN
|
|
104
|
+
# END
|
|
105
|
+
#
|
|
106
|
+
# f = Bio::FastaFormat.new(f_str)
|
|
107
|
+
# puts "### FastaFormat"
|
|
108
|
+
# puts "# entry"
|
|
109
|
+
# puts f.entry
|
|
110
|
+
# puts "# entry_id"
|
|
111
|
+
# p f.entry_id
|
|
112
|
+
# puts "# definition"
|
|
113
|
+
# p f.definition
|
|
114
|
+
# puts "# data"
|
|
115
|
+
# p f.data
|
|
116
|
+
# puts "# seq"
|
|
117
|
+
# p f.seq
|
|
118
|
+
# puts "# seq.type"
|
|
119
|
+
# p f.seq.type
|
|
120
|
+
# puts "# length"
|
|
121
|
+
# p f.length
|
|
122
|
+
# puts "# aaseq"
|
|
123
|
+
# p f.aaseq
|
|
124
|
+
# puts "# aaseq.type"
|
|
125
|
+
# p f.aaseq.type
|
|
126
|
+
# puts "# aaseq.composition"
|
|
127
|
+
# p f.aaseq.composition
|
|
128
|
+
# puts "# aalen"
|
|
129
|
+
# p f.aalen
|
|
130
|
+
#
|
|
131
|
+
# === References
|
|
132
|
+
#
|
|
133
|
+
# * FASTA format (WikiPedia)
|
|
134
|
+
# http://en.wikipedia.org/wiki/FASTA_format
|
|
135
|
+
#
|
|
29
136
|
class FastaFormat < DB
|
|
30
137
|
|
|
138
|
+
# Entry delimiter in flatfile text.
|
|
31
139
|
DELIMITER = RS = "\n>"
|
|
32
140
|
|
|
141
|
+
# (Integer) excess read size included in DELIMITER.
|
|
142
|
+
DELIMITER_OVERRUN = 1 # '>'
|
|
143
|
+
|
|
144
|
+
# The comment line of the FASTA formatted data.
|
|
145
|
+
attr_accessor :definition
|
|
146
|
+
|
|
147
|
+
# The seuqnce lines in text.
|
|
148
|
+
attr_accessor :data
|
|
149
|
+
|
|
150
|
+
attr_reader :entry_overrun
|
|
151
|
+
|
|
152
|
+
# Stores the comment and sequence information from one entry of the
|
|
153
|
+
# FASTA format string. If the argument contains more than one
|
|
154
|
+
# entry, only the first entry is used.
|
|
33
155
|
def initialize(str)
|
|
34
156
|
@definition = str[/.*/].sub(/^>/, '').strip # 1st line
|
|
35
157
|
@data = str.sub(/.*/, '') # rests
|
|
36
158
|
@data.sub!(/^>.*/m, '') # remove trailing entries for sure
|
|
37
159
|
@entry_overrun = $&
|
|
38
160
|
end
|
|
39
|
-
attr_accessor :definition, :data
|
|
40
|
-
attr_reader :entry_overrun
|
|
41
161
|
|
|
162
|
+
# Returns the stored one entry as a FASTA format. (same as to_s)
|
|
42
163
|
def entry
|
|
43
164
|
@entry = ">#{@definition}\n#{@data.strip}\n"
|
|
44
165
|
end
|
|
45
166
|
alias to_s entry
|
|
46
167
|
|
|
168
|
+
|
|
169
|
+
# Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
|
|
170
|
+
# factory object.
|
|
171
|
+
#
|
|
172
|
+
# #!/usr/bin/env ruby
|
|
173
|
+
# require 'bio'
|
|
174
|
+
#
|
|
175
|
+
# factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
|
|
176
|
+
# flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
|
|
177
|
+
# flatfile.each do |entry|
|
|
178
|
+
# p entry.definition
|
|
179
|
+
# result = entry.fasta(factory)
|
|
180
|
+
# result.each do |hit|
|
|
181
|
+
# print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
|
|
182
|
+
# p hit.lap_at
|
|
183
|
+
# end
|
|
184
|
+
# end
|
|
185
|
+
#
|
|
47
186
|
def query(factory)
|
|
48
187
|
factory.query(@entry)
|
|
49
188
|
end
|
|
50
189
|
alias fasta query
|
|
51
190
|
alias blast query
|
|
52
191
|
|
|
192
|
+
# Returns a joined sequence line as a String.
|
|
53
193
|
def seq
|
|
54
194
|
unless defined?(@seq)
|
|
55
195
|
unless /\A\s*^\#/ =~ @data then
|
|
56
|
-
@seq = Sequence.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
|
|
196
|
+
@seq = Sequence::Generic.new(@data.tr(" \t\r\n0-9", '')) # lazy clean up
|
|
57
197
|
else
|
|
58
198
|
a = @data.split(/(^\#.*$)/)
|
|
59
199
|
i = 0
|
|
@@ -69,37 +209,61 @@ module Bio
|
|
|
69
209
|
end
|
|
70
210
|
end
|
|
71
211
|
@comment = cmnt
|
|
72
|
-
@seq = Bio::Sequence.new(s.join(''))
|
|
212
|
+
@seq = Bio::Sequence::Generic.new(s.join(''))
|
|
73
213
|
end
|
|
74
214
|
end
|
|
75
215
|
@seq
|
|
76
216
|
end
|
|
77
217
|
|
|
218
|
+
# Returns comments.
|
|
78
219
|
def comment
|
|
79
220
|
seq
|
|
80
221
|
@comment
|
|
81
222
|
end
|
|
82
223
|
|
|
224
|
+
# Returns sequence length.
|
|
83
225
|
def length
|
|
84
226
|
seq.length
|
|
85
227
|
end
|
|
86
228
|
|
|
229
|
+
# Returens the Bio::Sequence::NA.
|
|
87
230
|
def naseq
|
|
88
231
|
Sequence::NA.new(seq)
|
|
89
232
|
end
|
|
90
233
|
|
|
234
|
+
# Returens the length of Bio::Sequence::NA.
|
|
91
235
|
def nalen
|
|
92
236
|
self.naseq.length
|
|
93
237
|
end
|
|
94
238
|
|
|
239
|
+
# Returens the Bio::Sequence::AA.
|
|
95
240
|
def aaseq
|
|
96
241
|
Sequence::AA.new(seq)
|
|
97
242
|
end
|
|
98
243
|
|
|
244
|
+
# Returens the length of Bio::Sequence::AA.
|
|
99
245
|
def aalen
|
|
100
246
|
self.aaseq.length
|
|
101
247
|
end
|
|
102
248
|
|
|
249
|
+
# Returns sequence as a Bio::Sequence object.
|
|
250
|
+
#
|
|
251
|
+
# Note: If you modify the returned Bio::Sequence object,
|
|
252
|
+
# the sequence or definition in this FastaFormat object
|
|
253
|
+
# might also be changed (but not always be changed)
|
|
254
|
+
# because of efficiency.
|
|
255
|
+
#
|
|
256
|
+
def to_seq
|
|
257
|
+
seq
|
|
258
|
+
obj = Bio::Sequence.new(@seq)
|
|
259
|
+
obj.definition = self.definition
|
|
260
|
+
obj
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Parsing FASTA Defline, and extract IDs.
|
|
264
|
+
# IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
|
265
|
+
# or ":"-separated IDs.
|
|
266
|
+
# It returns a Bio::FastaDefline instance.
|
|
103
267
|
def identifiers
|
|
104
268
|
unless defined?(@ids) then
|
|
105
269
|
@ids = FastaDefline.new(@definition)
|
|
@@ -107,34 +271,69 @@ module Bio
|
|
|
107
271
|
@ids
|
|
108
272
|
end
|
|
109
273
|
|
|
274
|
+
# Parsing FASTA Defline (using #identifiers method), and
|
|
275
|
+
# shows a possibly unique identifier.
|
|
276
|
+
# It returns a string.
|
|
110
277
|
def entry_id
|
|
111
278
|
identifiers.entry_id
|
|
112
279
|
end
|
|
113
280
|
|
|
281
|
+
# Parsing FASTA Defline (using #identifiers method), and
|
|
282
|
+
# shows GI/locus/accession/accession with version number.
|
|
283
|
+
# If a entry has more than two of such IDs,
|
|
284
|
+
# only the first ID are shown.
|
|
285
|
+
# It returns a string or nil.
|
|
114
286
|
def gi
|
|
115
287
|
identifiers.gi
|
|
116
288
|
end
|
|
117
289
|
|
|
290
|
+
# Returns an accession number.
|
|
118
291
|
def accession
|
|
119
292
|
identifiers.accession
|
|
120
293
|
end
|
|
121
294
|
|
|
295
|
+
# Parsing FASTA Defline (using #identifiers method), and
|
|
296
|
+
# shows accession numbers.
|
|
297
|
+
# It returns an array of strings.
|
|
122
298
|
def accessions
|
|
123
299
|
identifiers.accessions
|
|
124
300
|
end
|
|
125
301
|
|
|
302
|
+
# Returns accession number with version.
|
|
126
303
|
def acc_version
|
|
127
304
|
identifiers.acc_version
|
|
128
305
|
end
|
|
129
306
|
|
|
307
|
+
# Returns locus.
|
|
130
308
|
def locus
|
|
131
309
|
identifiers.locus
|
|
132
310
|
end
|
|
133
311
|
|
|
134
312
|
end #class FastaFormat
|
|
135
313
|
|
|
314
|
+
# Treats a FASTA formatted numerical entry, such as:
|
|
315
|
+
#
|
|
316
|
+
# >id and/or some comments <== comment line
|
|
317
|
+
# 24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
|
|
318
|
+
# 22 17 15 25 27 32 26 32 29 29 25
|
|
319
|
+
#
|
|
320
|
+
# The precedent '>' can be omitted and the trailing '>' will be removed
|
|
321
|
+
# automatically.
|
|
322
|
+
#
|
|
323
|
+
# --- Bio::FastaNumericFormat.new(entry)
|
|
324
|
+
#
|
|
325
|
+
# Stores the comment and the list of the numerical data.
|
|
326
|
+
#
|
|
327
|
+
# --- Bio::FastaNumericFormat#definition
|
|
328
|
+
#
|
|
329
|
+
# The comment line of the FASTA formatted data.
|
|
330
|
+
#
|
|
331
|
+
# * FASTA format (Wikipedia)
|
|
332
|
+
# http://en.wikipedia.org/wiki/FASTA_format
|
|
136
333
|
class FastaNumericFormat < FastaFormat
|
|
137
334
|
|
|
335
|
+
# Returns the list of the numerical data (typically the quality score
|
|
336
|
+
# of its corresponding sequence) as an Array.
|
|
138
337
|
def data
|
|
139
338
|
unless @list
|
|
140
339
|
@list = @data.strip.split(/\s+/).map {|x| x.to_i}
|
|
@@ -142,16 +341,19 @@ module Bio
|
|
|
142
341
|
@list
|
|
143
342
|
end
|
|
144
343
|
|
|
344
|
+
# Returns the number of elements in the numerical data.
|
|
145
345
|
def length
|
|
146
346
|
data.length
|
|
147
347
|
end
|
|
148
348
|
|
|
349
|
+
# Yields on each elements of the numerical data.
|
|
149
350
|
def each
|
|
150
351
|
data.each do |x|
|
|
151
352
|
yield x
|
|
152
353
|
end
|
|
153
354
|
end
|
|
154
355
|
|
|
356
|
+
# Returns the n-th element.
|
|
155
357
|
def [](n)
|
|
156
358
|
data[n]
|
|
157
359
|
end
|
|
@@ -160,11 +362,69 @@ module Bio
|
|
|
160
362
|
|
|
161
363
|
end #class FastaNumericFormat
|
|
162
364
|
|
|
163
|
-
class FastaDefline
|
|
164
365
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
366
|
+
# Parsing FASTA Defline, and extract IDs and other informations.
|
|
367
|
+
# IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
|
368
|
+
# or ":"-separated IDs.
|
|
369
|
+
#
|
|
370
|
+
# specs are described in:
|
|
371
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
|
372
|
+
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
|
373
|
+
#
|
|
374
|
+
# === Examples
|
|
375
|
+
#
|
|
376
|
+
# rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
|
377
|
+
# rub.entry_id ==> 'gi|671595'
|
|
378
|
+
# rub.get('emb') ==> 'CAA85678.1'
|
|
379
|
+
# rub.emb ==> 'CAA85678.1'
|
|
380
|
+
# rub.gi ==> '671595'
|
|
381
|
+
# rub.accession ==> 'CAA85678'
|
|
382
|
+
# rub.accessions ==> [ 'CAA85678' ]
|
|
383
|
+
# rub.acc_version ==> 'CAA85678.1'
|
|
384
|
+
# rub.locus ==> nil
|
|
385
|
+
# rub.list_ids ==> [["gi", "671595"],
|
|
386
|
+
# ["emb", "CAA85678.1", nil],
|
|
387
|
+
# ["Perovskia abrotanoides"]]
|
|
388
|
+
#
|
|
389
|
+
# ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
|
390
|
+
# ckr.entry_id ==> "gi|2495000"
|
|
391
|
+
# ckr.sp ==> "CCKR_CAVPO"
|
|
392
|
+
# ckr.pir ==> "I51898"
|
|
393
|
+
# ckr.gb ==> "AAB29504.1"
|
|
394
|
+
# ckr.gi ==> "2495000"
|
|
395
|
+
# ckr.accession ==> "AAB29504"
|
|
396
|
+
# ckr.accessions ==> ["Q63931", "AAB29504"]
|
|
397
|
+
# ckr.acc_version ==> "AAB29504.1"
|
|
398
|
+
# ckr.locus ==> nil
|
|
399
|
+
# ckr.description ==>
|
|
400
|
+
# "CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
|
401
|
+
# ckr.descriptions ==>
|
|
402
|
+
# ["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
|
403
|
+
# "cholecystokinin A receptor - guinea pig",
|
|
404
|
+
# "cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
|
405
|
+
# ckr.words ==>
|
|
406
|
+
# ["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
|
407
|
+
# "receptor", "type"]
|
|
408
|
+
# ckr.id_strings ==>
|
|
409
|
+
# ["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
|
410
|
+
# "544724", "AAB29504.1", "Cavia"]
|
|
411
|
+
# ckr.list_ids ==>
|
|
412
|
+
# [["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
|
413
|
+
# ["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
|
414
|
+
# ["gb", "AAB29504.1", nil], ["Cavia"]]
|
|
415
|
+
#
|
|
416
|
+
# === Refereneces
|
|
417
|
+
#
|
|
418
|
+
# * Fasta format description (NCBI)
|
|
419
|
+
# http://www.ncbi.nlm.nih.gov/BLAST/fasta.shtml
|
|
420
|
+
#
|
|
421
|
+
# * Frequently Asked Questions: Indexing of Sequence Identifiers (by Warren R. Gish.)
|
|
422
|
+
# http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
|
423
|
+
#
|
|
424
|
+
# * README.formatdb
|
|
425
|
+
# ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
|
426
|
+
#
|
|
427
|
+
class FastaDefline
|
|
168
428
|
|
|
169
429
|
NSIDs = {
|
|
170
430
|
# NCBI and WU-BLAST
|
|
@@ -197,6 +457,15 @@ module Bio
|
|
|
197
457
|
'ri' => [ 'entry_id', 'rearray_id', 'len' ], # RIKEN FANTOM DB
|
|
198
458
|
}
|
|
199
459
|
|
|
460
|
+
# Shows array that contains IDs (or ID-like strings).
|
|
461
|
+
# Returns an array of arrays of strings.
|
|
462
|
+
attr_reader :list_ids
|
|
463
|
+
|
|
464
|
+
# Shows a possibly unique identifier.
|
|
465
|
+
# Returns a string.
|
|
466
|
+
attr_reader :entry_id
|
|
467
|
+
|
|
468
|
+
# Parses given string.
|
|
200
469
|
def initialize(str)
|
|
201
470
|
@deflines = []
|
|
202
471
|
@info = {}
|
|
@@ -210,9 +479,7 @@ module Bio
|
|
|
210
479
|
end
|
|
211
480
|
end #def initialize
|
|
212
481
|
|
|
213
|
-
|
|
214
|
-
attr_reader :entry_id
|
|
215
|
-
|
|
482
|
+
# Parses given string and adds parsed data.
|
|
216
483
|
def add_defline(str)
|
|
217
484
|
case str
|
|
218
485
|
when /^\>?\s*((?:[^\|\s]*\|)+[^\s]+)\s*(.*)$/
|
|
@@ -343,6 +610,10 @@ module Bio
|
|
|
343
610
|
end #def parse_NSIDs
|
|
344
611
|
private :parse_NSIDs
|
|
345
612
|
|
|
613
|
+
|
|
614
|
+
# Shows original string.
|
|
615
|
+
# Note that the result of this method may be different from
|
|
616
|
+
# original string which is given in FastaDefline.new method.
|
|
346
617
|
def to_s
|
|
347
618
|
@deflines.collect { |a|
|
|
348
619
|
s = a[0]
|
|
@@ -350,16 +621,20 @@ module Bio
|
|
|
350
621
|
}.join("\x01")
|
|
351
622
|
end
|
|
352
623
|
|
|
624
|
+
# Shows description.
|
|
353
625
|
def description
|
|
354
626
|
@deflines[0].to_a[-1]
|
|
355
627
|
end
|
|
356
628
|
|
|
629
|
+
# Returns descriptions.
|
|
357
630
|
def descriptions
|
|
358
631
|
@deflines.collect do |a|
|
|
359
632
|
a[-1]
|
|
360
633
|
end
|
|
361
634
|
end
|
|
362
635
|
|
|
636
|
+
# Shows ID-like strings.
|
|
637
|
+
# Returns an array of strings.
|
|
363
638
|
def id_strings
|
|
364
639
|
r = []
|
|
365
640
|
@list_ids.each do |a|
|
|
@@ -401,6 +676,7 @@ module Bio
|
|
|
401
676
|
/\A[A-Z][A-Z0-9]*\_[A-Z0-9\_]+\z/
|
|
402
677
|
]
|
|
403
678
|
|
|
679
|
+
# Shows words used in the defline. Returns an Array.
|
|
404
680
|
def words(case_sensitive = nil, kill_regexp = self.class::KillRegexpArray,
|
|
405
681
|
kwhash = self.class::KillWordsHash)
|
|
406
682
|
a = descriptions.join(' ').split(/[\.\,\;\:\(\)\[\]\{\}\<\>\"\'\`\~\/\|\?\!\&\@\#\s\x00-\x1f\x7f]+/)
|
|
@@ -426,8 +702,9 @@ module Bio
|
|
|
426
702
|
a
|
|
427
703
|
end
|
|
428
704
|
|
|
429
|
-
|
|
430
|
-
|
|
705
|
+
# Returns identifires by a database name.
|
|
706
|
+
def get(dbname)
|
|
707
|
+
db = dbname.to_s
|
|
431
708
|
r = nil
|
|
432
709
|
unless r = @info[db] then
|
|
433
710
|
di = @list_ids.find { |x| x[0] == db.to_s }
|
|
@@ -449,10 +726,11 @@ module Bio
|
|
|
449
726
|
r
|
|
450
727
|
end
|
|
451
728
|
|
|
452
|
-
|
|
729
|
+
# Returns an identifier by given type.
|
|
730
|
+
def get_by_type(type_str)
|
|
453
731
|
@list_ids.each do |x|
|
|
454
732
|
if labels = self.class::NSIDs[x[0]] then
|
|
455
|
-
if i = labels.index(
|
|
733
|
+
if i = labels.index(type_str) then
|
|
456
734
|
return x[i+1]
|
|
457
735
|
end
|
|
458
736
|
end
|
|
@@ -460,11 +738,12 @@ module Bio
|
|
|
460
738
|
nil
|
|
461
739
|
end
|
|
462
740
|
|
|
463
|
-
|
|
741
|
+
# Returns identifiers by given type.
|
|
742
|
+
def get_all_by_type(*type_strarg)
|
|
464
743
|
d = []
|
|
465
744
|
@list_ids.each do |x|
|
|
466
745
|
if labels = self.class::NSIDs[x[0]] then
|
|
467
|
-
|
|
746
|
+
type_strarg.each do |y|
|
|
468
747
|
if i = labels.index(y) then
|
|
469
748
|
d << x[i+1] if x[i+1]
|
|
470
749
|
end
|
|
@@ -474,6 +753,10 @@ module Bio
|
|
|
474
753
|
d
|
|
475
754
|
end
|
|
476
755
|
|
|
756
|
+
# Shows locus.
|
|
757
|
+
# If the entry has more than two of such IDs,
|
|
758
|
+
# only the first ID are shown.
|
|
759
|
+
# Returns a string or nil.
|
|
477
760
|
def locus
|
|
478
761
|
unless defined?(@locus)
|
|
479
762
|
@locus = get_by_type('locus')
|
|
@@ -481,6 +764,10 @@ module Bio
|
|
|
481
764
|
@locus
|
|
482
765
|
end
|
|
483
766
|
|
|
767
|
+
# Shows GI.
|
|
768
|
+
# If the entry has more than two of such IDs,
|
|
769
|
+
# only the first ID are shown.
|
|
770
|
+
# Returns a string or nil.
|
|
484
771
|
def gi
|
|
485
772
|
unless defined?(@gi) then
|
|
486
773
|
@gi = get_by_type('gi')
|
|
@@ -488,6 +775,10 @@ module Bio
|
|
|
488
775
|
@gi
|
|
489
776
|
end
|
|
490
777
|
|
|
778
|
+
# Shows accession with version number.
|
|
779
|
+
# If the entry has more than two of such IDs,
|
|
780
|
+
# only the first ID are shown.
|
|
781
|
+
# Returns a string or nil.
|
|
491
782
|
def acc_version
|
|
492
783
|
unless defined?(@acc_version) then
|
|
493
784
|
@acc_version = get_by_type('acc_version')
|
|
@@ -495,6 +786,8 @@ module Bio
|
|
|
495
786
|
@acc_version
|
|
496
787
|
end
|
|
497
788
|
|
|
789
|
+
# Shows accession numbers.
|
|
790
|
+
# Returns an array of strings.
|
|
498
791
|
def accessions
|
|
499
792
|
unless defined?(@accessions) then
|
|
500
793
|
@accessions = get_all_by_type('accession', 'acc_version')
|
|
@@ -503,6 +796,7 @@ module Bio
|
|
|
503
796
|
@accessions
|
|
504
797
|
end
|
|
505
798
|
|
|
799
|
+
# Shows an accession number.
|
|
506
800
|
def accession
|
|
507
801
|
unless defined?(@accession) then
|
|
508
802
|
if acc_version then
|
|
@@ -523,6 +817,7 @@ module Bio
|
|
|
523
817
|
end
|
|
524
818
|
r
|
|
525
819
|
end
|
|
820
|
+
|
|
526
821
|
|
|
527
822
|
end #class FastaDefline
|
|
528
823
|
|
|
@@ -610,260 +905,3 @@ END
|
|
|
610
905
|
|
|
611
906
|
end
|
|
612
907
|
|
|
613
|
-
=begin
|
|
614
|
-
|
|
615
|
-
= Bio::FastaFormat
|
|
616
|
-
|
|
617
|
-
Treats a FASTA formatted entry, such as:
|
|
618
|
-
|
|
619
|
-
>id and/or some comments <== comment line
|
|
620
|
-
ATGCATGCATGCATGCATGCATGCATGCATGCATGC <== sequence lines
|
|
621
|
-
ATGCATGCATGCATGCATGCATGCATGCATGCATGC
|
|
622
|
-
ATGCATGCATGC
|
|
623
|
-
|
|
624
|
-
The precedent '>' can be omitted and the trailing '>' will be removed
|
|
625
|
-
automatically.
|
|
626
|
-
|
|
627
|
-
--- Bio::FastaFormat.new(entry)
|
|
628
|
-
|
|
629
|
-
Stores the comment and sequence information from one entry of the
|
|
630
|
-
FASTA format string. If the argument contains more than one
|
|
631
|
-
entry, only the first entry is used.
|
|
632
|
-
|
|
633
|
-
--- Bio::FastaFormat#entry
|
|
634
|
-
|
|
635
|
-
Returns the stored one entry as a FASTA format. (same as to_s)
|
|
636
|
-
|
|
637
|
-
--- Bio::FastaFormat#definition
|
|
638
|
-
|
|
639
|
-
Returns the comment line of the FASTA formatted data.
|
|
640
|
-
|
|
641
|
-
--- Bio::FastaFormat#seq
|
|
642
|
-
|
|
643
|
-
Returns a joined sequence line as a String.
|
|
644
|
-
|
|
645
|
-
--- Bio::FastaFormat#query(factory)
|
|
646
|
-
--- Bio::FastaFormat#fasta(factory)
|
|
647
|
-
--- Bio::FastaFormat#blast(factory)
|
|
648
|
-
|
|
649
|
-
Executes FASTA/BLAST search by using a Bio::Fasta or a Bio::Blast
|
|
650
|
-
factory object.
|
|
651
|
-
|
|
652
|
-
#!/usr/bin/env ruby
|
|
653
|
-
|
|
654
|
-
require 'bio'
|
|
655
|
-
|
|
656
|
-
factory = Bio::Fasta.local('fasta34', 'db/swissprot.f')
|
|
657
|
-
flatfile = Bio::FlatFile.open(Bio::FastaFormat, 'queries.f')
|
|
658
|
-
flatfile.each do |entry|
|
|
659
|
-
p entry.definition
|
|
660
|
-
result = entry.fasta(factory)
|
|
661
|
-
result.each do |hit|
|
|
662
|
-
print "#{hit.query_id} : #{hit.evalue}\t#{hit.target_id} at "
|
|
663
|
-
p hit.lap_at
|
|
664
|
-
end
|
|
665
|
-
end
|
|
666
|
-
|
|
667
|
-
--- Bio::FastaFormat#length
|
|
668
|
-
|
|
669
|
-
Returns sequence length.
|
|
670
|
-
|
|
671
|
-
--- Bio::FastaFormat#naseq
|
|
672
|
-
--- Bio::FastaFormat#nalen
|
|
673
|
-
--- Bio::FastaFormat#aaseq
|
|
674
|
-
--- Bio::FastaFormat#aalen
|
|
675
|
-
|
|
676
|
-
If you know whether the sequence is NA or AA, use these methods.
|
|
677
|
-
'naseq' and 'aaseq' methods returen the Bio::Sequence::NA or
|
|
678
|
-
Bio::Sequence::AA object respectively. 'nalen' and 'aalen' methods
|
|
679
|
-
return the length of them.
|
|
680
|
-
|
|
681
|
-
--- Bio::FastaFormat#identifiers
|
|
682
|
-
|
|
683
|
-
Parsing FASTA Defline, and extract IDs.
|
|
684
|
-
IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
|
685
|
-
or ":"-separated IDs.
|
|
686
|
-
It returns a Bio::FastaDefline instance.
|
|
687
|
-
|
|
688
|
-
--- Bio::FastaFormat#entry_id
|
|
689
|
-
|
|
690
|
-
Parsing FASTA Defline (using #identifiers method), and
|
|
691
|
-
shows a possibly unique identifier.
|
|
692
|
-
It returns a string.
|
|
693
|
-
|
|
694
|
-
--- Bio::FastaFormat#gi
|
|
695
|
-
--- Bio::FastaFormat#locus
|
|
696
|
-
--- Bio::FastaFormat#accession
|
|
697
|
-
--- Bio::FastaFormat#acc_version
|
|
698
|
-
|
|
699
|
-
Parsing FASTA Defline (using #identifiers method), and
|
|
700
|
-
shows GI/locus/accession/accession with version number.
|
|
701
|
-
If a entry has more than two of such IDs,
|
|
702
|
-
only the first ID are shown.
|
|
703
|
-
It returns a string or nil.
|
|
704
|
-
|
|
705
|
-
--- Bio::FastaFormat#accessions
|
|
706
|
-
|
|
707
|
-
Parsing FASTA Defline (using #identifiers method), and
|
|
708
|
-
shows accession numbers.
|
|
709
|
-
It returns an array of strings.
|
|
710
|
-
|
|
711
|
-
--- Bio::FastaFormat
|
|
712
|
-
|
|
713
|
-
= Bio::FastaNumericFormat
|
|
714
|
-
|
|
715
|
-
Treats a FASTA formatted numerical entry, such as:
|
|
716
|
-
|
|
717
|
-
>id and/or some comments <== comment line
|
|
718
|
-
24 15 23 29 20 13 20 21 21 23 22 25 13 <== numerical data
|
|
719
|
-
22 17 15 25 27 32 26 32 29 29 25
|
|
720
|
-
|
|
721
|
-
The precedent '>' can be omitted and the trailing '>' will be removed
|
|
722
|
-
automatically.
|
|
723
|
-
|
|
724
|
-
--- Bio::FastaNumericFormat.new(entry)
|
|
725
|
-
|
|
726
|
-
Stores the comment and the list of the numerical data.
|
|
727
|
-
|
|
728
|
-
--- Bio::FastaNumericFormat#definition
|
|
729
|
-
|
|
730
|
-
The comment line of the FASTA formatted data.
|
|
731
|
-
|
|
732
|
-
--- Bio::FastaNumericFormat#data
|
|
733
|
-
|
|
734
|
-
Returns the list of the numerical data (typically the quality score
|
|
735
|
-
of its corresponding sequence) as an Array.
|
|
736
|
-
|
|
737
|
-
--- Bio::FastaNumericFormat#length
|
|
738
|
-
|
|
739
|
-
Returns the number of elements in the numerical data.
|
|
740
|
-
|
|
741
|
-
--- Bio::FastaNumericFormat#each
|
|
742
|
-
|
|
743
|
-
Yields on each elements of the numerical data.
|
|
744
|
-
|
|
745
|
-
--- Bio::FastaNumericFormat#[](n)
|
|
746
|
-
|
|
747
|
-
Returns the n-th element.
|
|
748
|
-
|
|
749
|
-
--- Bio::FastaNumericFormat#identifiers
|
|
750
|
-
--- Bio::FastaNumericFormat#entry_id
|
|
751
|
-
--- Bio::FastaNumericFormat#gi
|
|
752
|
-
--- Bio::FastaNumericFormat#locus
|
|
753
|
-
--- Bio::FastaNumericFormat#accession
|
|
754
|
-
--- Bio::FastaNumericFormat#acc_version
|
|
755
|
-
--- Bio::FastaNumericFormat#accessions
|
|
756
|
-
|
|
757
|
-
Same as Bio::FastaFormat.
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
= Bio::FastaDefline
|
|
761
|
-
|
|
762
|
-
Parsing FASTA Defline, and extract IDs and other informations.
|
|
763
|
-
IDs are NSIDs (NCBI standard FASTA sequence identifiers)
|
|
764
|
-
or ":"-separated IDs.
|
|
765
|
-
|
|
766
|
-
--- see also:
|
|
767
|
-
ftp://ftp.ncbi.nih.gov/blast/documents/README.formatdb
|
|
768
|
-
http://blast.wustl.edu/doc/FAQ-Indexing.html#Identifiers
|
|
769
|
-
|
|
770
|
-
--- Bio::FastaDefline.new(str)
|
|
771
|
-
|
|
772
|
-
Parses given string.
|
|
773
|
-
|
|
774
|
-
--- Bio::FastaFormat#entry_id
|
|
775
|
-
|
|
776
|
-
Shows a possibly unique identifier.
|
|
777
|
-
Returns a string.
|
|
778
|
-
|
|
779
|
-
--- Bio::FastaDefline#gi
|
|
780
|
-
--- Bio::FastaDefline#locus
|
|
781
|
-
--- Bio::FastaDefline#accession
|
|
782
|
-
--- Bio::FastaDefline#acc_version
|
|
783
|
-
|
|
784
|
-
Shows GI/locus/accession/accession with version number.
|
|
785
|
-
If the entry has more than two of such IDs,
|
|
786
|
-
only the first ID are shown.
|
|
787
|
-
Returns a string or nil.
|
|
788
|
-
|
|
789
|
-
--- Bio::FastaFormat#accessions
|
|
790
|
-
|
|
791
|
-
Shows accession numbers.
|
|
792
|
-
Returns an array of strings.
|
|
793
|
-
|
|
794
|
-
--- Bio::FastaDefline#add_defline(str)
|
|
795
|
-
|
|
796
|
-
Parses given string and adds parsed data.
|
|
797
|
-
|
|
798
|
-
--- Bio::FastaDefline#to_s
|
|
799
|
-
|
|
800
|
-
Shows original string.
|
|
801
|
-
Note that the result of this method may be different from
|
|
802
|
-
original string which is given in FastaDefline.new method.
|
|
803
|
-
|
|
804
|
-
--- Bio::FastaDefline#id_strings
|
|
805
|
-
|
|
806
|
-
Shows ID-like strings.
|
|
807
|
-
Returns an array of strings.
|
|
808
|
-
|
|
809
|
-
--- Bio::FastaDefline#list_ids
|
|
810
|
-
|
|
811
|
-
Shows array that contains IDs (or ID-like strings).
|
|
812
|
-
Returns an array of arrays of strings.
|
|
813
|
-
|
|
814
|
-
--- Bio::FastaDefline#description
|
|
815
|
-
--- Bio::FastaDefline#descriptions
|
|
816
|
-
|
|
817
|
-
--- Bio::FastaDefline#words(case_sensitive = nil,
|
|
818
|
-
kill_words_regexp_array, kill_words_hash)
|
|
819
|
-
|
|
820
|
-
--- Bio::FastaDefline#get(tag_of_id)
|
|
821
|
-
|
|
822
|
-
--- Bio::FastaDefline#get_by_type(type_of_id)
|
|
823
|
-
|
|
824
|
-
--- Bio::FastaDefline#get_all_by_type(type_of_id)
|
|
825
|
-
|
|
826
|
-
--- examples:
|
|
827
|
-
rub = Bio::FastaDefline.new('>gi|671595|emb|CAA85678.1| rubisco large subunit [Perovskia abrotanoides]')
|
|
828
|
-
rub.entry_id ==> 'gi|671595'
|
|
829
|
-
rub.get('emb') ==> 'CAA85678.1'
|
|
830
|
-
rub.emb ==> 'CAA85678.1'
|
|
831
|
-
rub.gi ==> '671595'
|
|
832
|
-
rub.accession ==> 'CAA85678'
|
|
833
|
-
rub.accessions ==> [ 'CAA85678' ]
|
|
834
|
-
rub.acc_version ==> 'CAA85678.1'
|
|
835
|
-
rub.locus ==> nil
|
|
836
|
-
rub.list_ids ==> [["gi", "671595"],
|
|
837
|
-
["emb", "CAA85678.1", nil],
|
|
838
|
-
["Perovskia abrotanoides"]]
|
|
839
|
-
|
|
840
|
-
ckr = Bio::FastaDefline.new(">gi|2495000|sp|Q63931|CCKR_CAVPO CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)\001gi|2147182|pir||I51898 cholecystokinin A receptor - guinea pig\001gi|544724|gb|AAB29504.1| cholecystokinin A receptor; CCK-A receptor [Cavia]")
|
|
841
|
-
ckr.entry_id ==> "gi|2495000"
|
|
842
|
-
ckr.sp ==> "CCKR_CAVPO"
|
|
843
|
-
ckr.pir ==> "I51898"
|
|
844
|
-
ckr.gb ==> "AAB29504.1"
|
|
845
|
-
ckr.gi ==> "2495000"
|
|
846
|
-
ckr.accession ==> "AAB29504"
|
|
847
|
-
ckr.accessions ==> ["Q63931", "AAB29504"]
|
|
848
|
-
ckr.acc_version ==> "AAB29504.1"
|
|
849
|
-
ckr.locus ==> nil
|
|
850
|
-
ckr.description ==>
|
|
851
|
-
"CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)"
|
|
852
|
-
ckr.descriptions ==>
|
|
853
|
-
["CHOLECYSTOKININ TYPE A RECEPTOR (CCK-A RECEPTOR) (CCK-AR)",
|
|
854
|
-
"cholecystokinin A receptor - guinea pig",
|
|
855
|
-
"cholecystokinin A receptor; CCK-A receptor [Cavia]"]
|
|
856
|
-
ckr.words ==>
|
|
857
|
-
["cavia", "cck-a", "cck-ar", "cholecystokinin", "guinea", "pig",
|
|
858
|
-
"receptor", "type"]
|
|
859
|
-
ckr.id_strings ==>
|
|
860
|
-
["2495000", "Q63931", "CCKR_CAVPO", "2147182", "I51898",
|
|
861
|
-
"544724", "AAB29504.1", "Cavia"]
|
|
862
|
-
ckr.list_ids ==>
|
|
863
|
-
[["gi", "2495000"], ["sp", "Q63931", "CCKR_CAVPO"],
|
|
864
|
-
["gi", "2147182"], ["pir", nil, "I51898"], ["gi", "544724"],
|
|
865
|
-
["gb", "AAB29504.1", nil], ["Cavia"]]
|
|
866
|
-
|
|
867
|
-
=end
|
|
868
|
-
|
|
869
|
-
|