bio 0.7.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/lib/bio/io/fastacmd.rb
CHANGED
@@ -1,8 +1,47 @@
|
|
1
1
|
#
|
2
|
-
# bio/io/fastacmd.rb - NCBI fastacmd wrapper class
|
2
|
+
# = bio/io/fastacmd.rb - NCBI fastacmd wrapper class
|
3
3
|
#
|
4
|
-
#
|
5
|
-
#
|
4
|
+
# Copyright:: Copyright (C) 2005, 2006
|
5
|
+
# Shuji SHIGENOBU <shige@nibb.ac.jp>,
|
6
|
+
# Toshiaki Katayama <k@bioruby.org>,
|
7
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
8
|
+
# Lisence:: LGPL
|
9
|
+
#
|
10
|
+
# $Id: fastacmd.rb,v 1.10 2006/01/28 08:12:21 nakao Exp $
|
11
|
+
#
|
12
|
+
# == Description
|
13
|
+
#
|
14
|
+
# Retrives FASTA formatted sequences from a blast database using
|
15
|
+
# NCBI fastacmd command.
|
16
|
+
#
|
17
|
+
# This class requires 'fastacmd' command and a blast database
|
18
|
+
# (formatted using the '-o' option of 'formatdb').
|
19
|
+
#
|
20
|
+
# == Examples
|
21
|
+
#
|
22
|
+
# database = ARGV.shift || "/db/myblastdb"
|
23
|
+
# entry_id = ARGV.shift || "sp:128U_DROME"
|
24
|
+
# ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
25
|
+
#
|
26
|
+
# fastacmd = Bio::Blast::Fastacmd.new(database)
|
27
|
+
#
|
28
|
+
# entry = fastacmd.get_by_id(entry_id)
|
29
|
+
# fastacmd.fetch(entry_id)
|
30
|
+
# fastacmd.fetch(ent_list)
|
31
|
+
#
|
32
|
+
# fastacmd.fetch(ent_list).each do |fasta|
|
33
|
+
# puts fasta
|
34
|
+
# end
|
35
|
+
#
|
36
|
+
# == References
|
37
|
+
#
|
38
|
+
# * NCBI tool
|
39
|
+
# ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
|
40
|
+
#
|
41
|
+
# * fastacmd.html
|
42
|
+
# http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
|
43
|
+
#
|
44
|
+
#--
|
6
45
|
#
|
7
46
|
# This library is free software; you can redistribute it and/or
|
8
47
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -18,7 +57,7 @@
|
|
18
57
|
# License along with this library; if not, write to the Free Software
|
19
58
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
20
59
|
#
|
21
|
-
|
60
|
+
#++
|
22
61
|
#
|
23
62
|
|
24
63
|
require 'bio/db/fasta'
|
@@ -28,23 +67,52 @@ require 'bio/command'
|
|
28
67
|
module Bio
|
29
68
|
class Blast
|
30
69
|
|
70
|
+
# NCBI fastacmd wrapper class
|
71
|
+
#
|
31
72
|
class Fastacmd
|
32
73
|
|
33
74
|
include Enumerable
|
34
75
|
include Bio::Command::Tools
|
35
76
|
|
36
|
-
|
37
|
-
|
77
|
+
# Database file path.
|
78
|
+
attr_accessor :database
|
79
|
+
|
80
|
+
# fastcmd command file path.
|
81
|
+
attr_accessor :fastacmd
|
82
|
+
|
83
|
+
#
|
84
|
+
attr_accessor :errorlog
|
85
|
+
|
86
|
+
# Initalize a fastacmd object.
|
87
|
+
#
|
88
|
+
# fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
|
89
|
+
def initialize(blast_database_file_path)
|
90
|
+
@database = blast_database_file_path
|
38
91
|
@fastacmd = 'fastacmd'
|
39
92
|
end
|
40
|
-
attr_accessor :database, :fastacmd, :errorlog
|
41
93
|
|
42
|
-
|
94
|
+
|
95
|
+
# get an entry_id and returns a Bio::FastaFormat object.
|
96
|
+
#
|
97
|
+
# entry_id = "sp:128U_DROME"
|
98
|
+
# entry = fastacmd.get_by_id(entry_id)
|
43
99
|
def get_by_id(entry_id)
|
44
100
|
fetch(entry_id).shift
|
45
101
|
end
|
46
102
|
|
47
|
-
# get one or more entry_id and returns an Array of Bio::FastaFormat objects
|
103
|
+
# get one or more entry_id and returns an Array of Bio::FastaFormat objects.
|
104
|
+
#
|
105
|
+
# Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
|
106
|
+
# object even when the result is a single entry.
|
107
|
+
#
|
108
|
+
# p fastacmd.fetch(entry_id)
|
109
|
+
#
|
110
|
+
# Fastacmd#fetch method also accepts a list of entry_id and returns
|
111
|
+
# an Array of Bio::FastaFormat objects.
|
112
|
+
#
|
113
|
+
# ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
114
|
+
# p fastacmd.fetch(ent_list)
|
115
|
+
#
|
48
116
|
def fetch(list)
|
49
117
|
if list.respond_to?(:join)
|
50
118
|
entry_id = list.join(",")
|
@@ -59,13 +127,20 @@ class Fastacmd
|
|
59
127
|
end
|
60
128
|
end
|
61
129
|
|
130
|
+
# Iterates each entry.
|
131
|
+
#
|
132
|
+
# You can also iterate on all sequences in the database!
|
133
|
+
# fastacmd.each do |fasta|
|
134
|
+
# p [ fasta.definition[0..30], fasta.seq.size ]
|
135
|
+
# end
|
136
|
+
#
|
62
137
|
def each_entry
|
63
138
|
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
|
64
139
|
call_command_local(cmd) do |inn, out|
|
65
140
|
inn.close_write
|
66
141
|
Bio::FlatFile.open(Bio::FastaFormat, out) do |f|
|
67
|
-
f.each_entry do |
|
68
|
-
yield
|
142
|
+
f.each_entry do |entry|
|
143
|
+
yield entry
|
69
144
|
end
|
70
145
|
end
|
71
146
|
end
|
@@ -73,51 +148,9 @@ class Fastacmd
|
|
73
148
|
end
|
74
149
|
alias each each_entry
|
75
150
|
|
76
|
-
end
|
151
|
+
end # class Fastacmd
|
77
152
|
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
|
82
|
-
if __FILE__ == $0
|
83
|
-
|
84
|
-
database = ARGV.shift || "/db/myblastdb"
|
85
|
-
entry_id = ARGV.shift || "sp:128U_DROME"
|
86
|
-
ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
87
|
-
|
88
|
-
fastacmd = Bio::Blast::Fastacmd.new(database)
|
89
|
-
|
90
|
-
### Retrieve one sequence
|
91
|
-
entry = fastacmd.get_by_id(entry_id)
|
92
|
-
|
93
|
-
# Fastacmd#get_by_id(entry_id) returns a Bio::FastaFormat object.
|
94
|
-
p entry
|
95
|
-
|
96
|
-
# Bio::FastaFormat becomes a fasta format string when printed by puts.
|
97
|
-
puts entry
|
98
|
-
|
99
|
-
# Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
|
100
|
-
# object even when the result is a single entry.
|
101
|
-
p fastacmd.fetch(entry_id)
|
102
|
-
|
103
|
-
### Retrieve more sequences
|
104
|
-
|
105
|
-
# Fastacmd#fetch method also accepts a list of entry_id and returns
|
106
|
-
# an Array of Bio::FastaFormat objects.
|
107
|
-
p fastacmd.fetch(ent_list)
|
108
|
-
|
109
|
-
# So, you can iterate on the results.
|
110
|
-
fastacmd.fetch(ent_list).each do |fasta|
|
111
|
-
puts fasta
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
### Iterates on all entries
|
116
|
-
|
117
|
-
# You can also iterate on all sequences in the database!
|
118
|
-
fastacmd.each do |fasta|
|
119
|
-
p [ fasta.definition[0..30], fasta.seq.size ]
|
120
|
-
end
|
153
|
+
end # class Blast
|
154
|
+
end # module Bio
|
121
155
|
|
122
|
-
end
|
123
156
|
|
data/lib/bio/io/flatfile.rb
CHANGED
@@ -1,32 +1,19 @@
|
|
1
1
|
#
|
2
2
|
# = bio/io/flatfile.rb - flatfile access wrapper class
|
3
3
|
#
|
4
|
-
#
|
5
|
-
# License:: LGPL
|
4
|
+
# Copyright (C) 2001-2006 Naohisa Goto <ng@bioruby.org>
|
6
5
|
#
|
7
|
-
|
8
|
-
# This library is free software; you can redistribute it and/or
|
9
|
-
# modify it under the terms of the GNU Lesser General Public
|
10
|
-
# License as published by the Free Software Foundation; either
|
11
|
-
# version 2 of the License, or (at your option) any later version.
|
6
|
+
# License:: Ruby's
|
12
7
|
#
|
13
|
-
#
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
16
|
-
# Lesser General Public License for more details.
|
8
|
+
# $Id: flatfile.rb,v 1.46 2006/02/22 10:01:27 ngoto Exp $
|
17
9
|
#
|
18
|
-
# You should have received a copy of the GNU Lesser General Public
|
19
|
-
# License along with this library; if not, write to the Free Software
|
20
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
21
|
-
#++
|
22
|
-
#
|
23
|
-
# $Id: flatfile.rb,v 1.41 2005/11/01 15:34:45 ngoto Exp $
|
24
10
|
#
|
25
11
|
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
26
12
|
# It acts like a IO object.
|
27
13
|
# It can automatically detect data format, and users do not need to tell
|
28
14
|
# the class what the data is.
|
29
15
|
#
|
16
|
+
require 'tsort'
|
30
17
|
|
31
18
|
module Bio
|
32
19
|
|
@@ -38,60 +25,407 @@ module Bio
|
|
38
25
|
|
39
26
|
include Enumerable
|
40
27
|
|
28
|
+
# Wrapper for a IO (or IO-like) object.
|
29
|
+
# It can input with a buffer.
|
30
|
+
class BufferedInputStream
|
31
|
+
# Creates a new input stream wrapper
|
32
|
+
def initialize(io, path)
|
33
|
+
@io = io
|
34
|
+
@path = path
|
35
|
+
# initialize prefetch buffer
|
36
|
+
@buffer = ''
|
37
|
+
@path = path
|
38
|
+
end
|
39
|
+
|
40
|
+
# Creates a new input stream wrapper from the given IO object.
|
41
|
+
def self.for_io(io)
|
42
|
+
begin
|
43
|
+
path = io.path
|
44
|
+
rescue NameError
|
45
|
+
path = nil
|
46
|
+
end
|
47
|
+
self.new(io, path)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Creates a new input stream wrapper to open file _filename_
|
51
|
+
# by using File.open.
|
52
|
+
# *arg is passed to File.open.
|
53
|
+
#
|
54
|
+
# Like File.open, a block can be accepted.
|
55
|
+
def self.open_file(filename, *arg)
|
56
|
+
if block_given? then
|
57
|
+
File.open(filename, *arg) do |fobj|
|
58
|
+
yield self.new(fobj, filename)
|
59
|
+
end
|
60
|
+
else
|
61
|
+
fobj = File.open(filename, *arg)
|
62
|
+
self.new(fobj, filename)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Creates a new input stream wrapper from URI specified as _uri_.
|
67
|
+
# by using OpenURI.open_uri or URI#open.
|
68
|
+
# _uri_ must be a String or URI object.
|
69
|
+
# *arg is passed to OpenURI.open_uri or URI#open.
|
70
|
+
#
|
71
|
+
# Like OpenURI.open_uri, it can accept a block.
|
72
|
+
def self.open_uri(uri, *arg)
|
73
|
+
if uri.kind_of?(URI)
|
74
|
+
if block_given?
|
75
|
+
uri.open(*arg) do |fobj|
|
76
|
+
yield self.new(fobj, uri.to_s)
|
77
|
+
end
|
78
|
+
else
|
79
|
+
fobj = uri.open(*arg)
|
80
|
+
self.new(fobj, uri.to_s)
|
81
|
+
end
|
82
|
+
else
|
83
|
+
if block_given?
|
84
|
+
OpenURI.open_uri(uri, *arg) do |fobj|
|
85
|
+
yield self.new(fobj, uri)
|
86
|
+
end
|
87
|
+
else
|
88
|
+
fobj = OpenURI.open_uri(uri, *arg)
|
89
|
+
self.new(fobj, uri)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Pathname, filename or URI to open the object.
|
95
|
+
# Like File#path, returned value isn't normalized.
|
96
|
+
attr_reader :path
|
97
|
+
|
98
|
+
# Converts to IO object if possible
|
99
|
+
def to_io
|
100
|
+
@io.to_io
|
101
|
+
end
|
102
|
+
|
103
|
+
# Closes the IO object if possible
|
104
|
+
def close
|
105
|
+
@io.close
|
106
|
+
end
|
107
|
+
|
108
|
+
# Rewinds the IO object if possible
|
109
|
+
# Internal buffer in this wrapper is cleared.
|
110
|
+
def rewind
|
111
|
+
r = @io.rewind
|
112
|
+
@buffer = ''
|
113
|
+
r
|
114
|
+
end
|
115
|
+
|
116
|
+
# Returns current file position
|
117
|
+
def pos
|
118
|
+
@io.pos - @buffer.size
|
119
|
+
end
|
120
|
+
|
121
|
+
# Sets current file position if possible
|
122
|
+
# Internal buffer in this wrapper is cleared.
|
123
|
+
def pos=(p)
|
124
|
+
r = (@io.pos = p)
|
125
|
+
@buffer = ''
|
126
|
+
r
|
127
|
+
end
|
128
|
+
|
129
|
+
# Returns true if end-of-file. Otherwise, returns false.
|
130
|
+
#
|
131
|
+
# Note that it returns false if internal buffer is this wrapper
|
132
|
+
# is not empty,
|
133
|
+
def eof?
|
134
|
+
if @buffer.size > 0
|
135
|
+
false
|
136
|
+
else
|
137
|
+
@io.eof?
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Same as IO#gets.
|
142
|
+
def gets(io_rs = $/)
|
143
|
+
if @buffer.size > 0
|
144
|
+
if io_rs == nil then
|
145
|
+
r = @buffer + @io.gets(nil).to_s
|
146
|
+
@buffer = ''
|
147
|
+
else
|
148
|
+
if io_rs == '' then
|
149
|
+
sp_rs = /\n\n/n
|
150
|
+
sp_rs_orig = "\n\n"
|
151
|
+
else
|
152
|
+
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
153
|
+
sp_rs_orig = io_rs
|
154
|
+
end
|
155
|
+
a = @buffer.split(sp_rs, 2)
|
156
|
+
if a.size > 1 then
|
157
|
+
r = a[0] + sp_rs_orig
|
158
|
+
@buffer = a[1]
|
159
|
+
else
|
160
|
+
@buffer << @io.gets(io_rs).to_s
|
161
|
+
a = @buffer.split(sp_rs, 2)
|
162
|
+
if a.size > 1 then
|
163
|
+
r = a[0] + sp_rs_orig
|
164
|
+
@buffer = a[1].to_s
|
165
|
+
else
|
166
|
+
r = @buffer
|
167
|
+
@buffer = ''
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
r
|
172
|
+
else
|
173
|
+
@io.gets(io_rs)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Pushes back given str to the internal buffer.
|
178
|
+
# Returns nil.
|
179
|
+
# str must be read previously with the wrapper object.
|
180
|
+
#
|
181
|
+
# Note that in current implementation, the str can be everything,
|
182
|
+
# but please don't depend on it.
|
183
|
+
#
|
184
|
+
def ungets(str)
|
185
|
+
@buffer = str + @buffer
|
186
|
+
nil
|
187
|
+
end
|
188
|
+
|
189
|
+
# Same as IO#getc.
|
190
|
+
def getc
|
191
|
+
if @buffer.size > 0 then
|
192
|
+
r = @buffer[0]
|
193
|
+
@buffer = @buffer[1..-1]
|
194
|
+
else
|
195
|
+
r = @io.getc
|
196
|
+
end
|
197
|
+
r
|
198
|
+
end
|
199
|
+
|
200
|
+
# Pushes back one character into the internal buffer.
|
201
|
+
# Unlike IO#getc, it can be called more than one time.
|
202
|
+
def ungetc(c)
|
203
|
+
@buffer = sprintf("%c", c) + @buffer
|
204
|
+
nil
|
205
|
+
end
|
206
|
+
|
207
|
+
# Gets current prefetch buffer
|
208
|
+
def prefetch_buffer
|
209
|
+
@buffer
|
210
|
+
end
|
211
|
+
|
212
|
+
# It does @io.gets, and addes returned string
|
213
|
+
# to the internal buffer, and returns the string.
|
214
|
+
def prefetch_gets(*arg)
|
215
|
+
r = @io.gets(*arg)
|
216
|
+
@buffer << r if r
|
217
|
+
r
|
218
|
+
end
|
219
|
+
|
220
|
+
# It does @io.readpartial, and addes returned string
|
221
|
+
# to the internal buffer, and returns the string.
|
222
|
+
def prefetch_readpartial(*arg)
|
223
|
+
r = @io.readpartial(*arg)
|
224
|
+
@buffer << r if r
|
225
|
+
r
|
226
|
+
end
|
227
|
+
|
228
|
+
# Skips space characters in the stream.
|
229
|
+
# returns nil.
|
230
|
+
def skip_spaces
|
231
|
+
ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
|
232
|
+
while r = self.getc
|
233
|
+
unless ws[r] then
|
234
|
+
self.ungetc(r)
|
235
|
+
break
|
236
|
+
end
|
237
|
+
end
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
end #class BufferedInputStream
|
241
|
+
|
242
|
+
# Splitter is a class to get entries from a buffered input stream.
|
243
|
+
module Splitter
|
244
|
+
# This is a template of splitter.
|
245
|
+
class Template
|
246
|
+
# Creates a new splitter.
|
247
|
+
def initialize(klass, bstream)
|
248
|
+
@stream = bstream
|
249
|
+
raise NotImplementedError
|
250
|
+
end
|
251
|
+
|
252
|
+
# skips leader of the entry.
|
253
|
+
def skip_leader
|
254
|
+
raise NotImplementedError
|
255
|
+
end
|
256
|
+
|
257
|
+
# Gets entry as a string
|
258
|
+
def get_entry
|
259
|
+
raise NotImplementedError
|
260
|
+
end
|
261
|
+
|
262
|
+
# the last entry read from the stream
|
263
|
+
attr_reader :entry
|
264
|
+
|
265
|
+
# start position of the entry
|
266
|
+
attr_reader :entry_start_pos
|
267
|
+
|
268
|
+
# (end position of the entry) + 1
|
269
|
+
attr_reader :entry_ended_pos
|
270
|
+
end
|
271
|
+
|
272
|
+
# Default splitter.
|
273
|
+
# It sees following constants in the given class.
|
274
|
+
# DELIMITER:: (String) delimiter indicates the end of a entry.
|
275
|
+
# FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
|
276
|
+
# DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
|
277
|
+
#
|
278
|
+
class Default < Template
|
279
|
+
# Creates a new splitter.
|
280
|
+
# klass:: database class
|
281
|
+
# bstream:: input stream. It must be a BufferedInputStream object.
|
282
|
+
def initialize(klass, bstream)
|
283
|
+
@stream = bstream
|
284
|
+
@delimiter = klass::DELIMITER rescue nil
|
285
|
+
@header = klass::FLATFILE_HEADER rescue nil
|
286
|
+
# for specific classes' benefit
|
287
|
+
unless header
|
288
|
+
if klass == Bio::GenBank or klass == Bio::GenPept
|
289
|
+
@header = 'LOCUS '
|
290
|
+
end
|
291
|
+
end
|
292
|
+
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
|
293
|
+
end
|
294
|
+
|
295
|
+
# (String) delimiter indicates the end of a entry.
|
296
|
+
attr_accessor :delimiter
|
297
|
+
|
298
|
+
# (String) start of a entry, located on head of a line.
|
299
|
+
attr_accessor :header
|
300
|
+
|
301
|
+
# (Integer) excess read data size included in delimiter.
|
302
|
+
attr_accessor :delimiter_overrun
|
303
|
+
|
304
|
+
# Skips leader of the entry.
|
305
|
+
#
|
306
|
+
# If @header is not nil, it reads till the contents of @header
|
307
|
+
# comes at the head of a line.
|
308
|
+
# If correct FLATFILE_HEADER is found, returns true.
|
309
|
+
# Otherwise, returns nil.
|
310
|
+
def skip_leader
|
311
|
+
if @header then
|
312
|
+
data = ''
|
313
|
+
while s = @stream.gets(@header)
|
314
|
+
data << s
|
315
|
+
if data.split(/[\r\n]+/)[-1] == @header then
|
316
|
+
@stream.ungets(@header)
|
317
|
+
return true
|
318
|
+
end
|
319
|
+
end
|
320
|
+
# @header was not found. For safety,
|
321
|
+
# pushes back data with removing white spaces in the head.
|
322
|
+
data.sub(/\A\s+/, '')
|
323
|
+
@stream.ungets(data)
|
324
|
+
return nil
|
325
|
+
else
|
326
|
+
@stream.skip_spaces
|
327
|
+
return nil
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# gets a entry
|
332
|
+
def get_entry
|
333
|
+
p0 = @stream.pos
|
334
|
+
e = @stream.gets(@delimiter)
|
335
|
+
if e and @delimiter_overrun then
|
336
|
+
if e[-@delimiter.size, @delimiter.size ] == @delimiter then
|
337
|
+
overrun = e[-@delimiter_overrun, @delimiter_overrun]
|
338
|
+
e[-@delimiter_overrun, @delimiter_overrun] = ''
|
339
|
+
@stream.ungets(overrun)
|
340
|
+
end
|
341
|
+
end
|
342
|
+
p1 = @stream.pos
|
343
|
+
@entry_start_pos = p0
|
344
|
+
@entry = e
|
345
|
+
@entry_ended_pos = p1
|
346
|
+
@entry
|
347
|
+
end
|
348
|
+
end #class Defalult
|
349
|
+
end #module Splitter
|
350
|
+
|
351
|
+
#
|
352
|
+
# Bio::FlatFile.open(file, *arg)
|
353
|
+
# Bio::FlatFile.open(dbclass, file, *arg)
|
354
|
+
#
|
41
355
|
# Creates a new Bio::FlatFile object to read a file or a stream
|
42
|
-
# which contains
|
356
|
+
# which contains _dbclass_ data.
|
43
357
|
#
|
44
|
-
#
|
358
|
+
# _dbclass_ should be a class (or module) or nil.
|
45
359
|
# e.g. Bio::GenBank, Bio::FastaFormat.
|
46
360
|
#
|
47
|
-
# If
|
48
|
-
# the method opens a local file named
|
49
|
-
# with
|
361
|
+
# If _file_ is a filename (which doesn't have gets method),
|
362
|
+
# the method opens a local file named _file_
|
363
|
+
# with <code>File.open(filename, *arg)</code>.
|
50
364
|
#
|
51
|
-
# When nil is given to
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
# FlatFile#
|
365
|
+
# When _dbclass_ is omitted or nil is given to _dbclass_,
|
366
|
+
# the method tries to determine database class
|
367
|
+
# (file format) automatically.
|
368
|
+
# When it fails to determine, dbclass is set to nil
|
369
|
+
# and FlatFile#next_entry would fail.
|
370
|
+
# You can still set dbclass using FlatFile#dbclass= method.
|
56
371
|
#
|
57
372
|
# * Example 1
|
58
373
|
# Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq")
|
59
374
|
# * Example 2
|
60
375
|
# Bio::FlatFile.open(nil, "embl/est_hum17.dat")
|
61
376
|
# * Example 3
|
377
|
+
# Bio::FlatFile.open("genbank/gbest40.seq")
|
378
|
+
#
|
379
|
+
# * Example 4
|
62
380
|
# Bio::FlatFile.open(Bio::GenBank, $stdin)
|
63
381
|
#
|
64
|
-
# If it is called with block, the block will be executed with
|
65
|
-
# a
|
66
|
-
#
|
382
|
+
# If it is called with a block, the block will be executed with
|
383
|
+
# a new Bio::FlatFile object. If filename is given,
|
384
|
+
# the file is automatically closed when leaving the block.
|
67
385
|
#
|
68
|
-
# * Example
|
386
|
+
# * Example 5
|
69
387
|
# Bio::FlatFile.open(nil, 'test4.fst') do |ff|
|
70
388
|
# ff.each { |e| print e.definition, "\n" }
|
71
389
|
# end
|
72
390
|
#
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
391
|
+
# * Example 6
|
392
|
+
# Bio::FlatFile.open('test4.fst') do |ff|
|
393
|
+
# ff.each { |e| print e.definition, "\n" }
|
394
|
+
# end
|
395
|
+
#
|
396
|
+
# Compatibility Note:
|
397
|
+
# <em>*arg</em> is completely passed to the <code>File.open</code>
|
398
|
+
# and you cannot specify ":raw => true" or ":raw => false".
|
399
|
+
#
|
400
|
+
def self.open(*arg, &block)
|
401
|
+
# FlatFile.open(dbclass, file, mode, perm)
|
402
|
+
# FlatFile.open(file, mode, perm)
|
403
|
+
if arg.size <= 0
|
404
|
+
raise ArgumentError, 'wrong number of arguments (0 for 1)'
|
405
|
+
end
|
406
|
+
x = arg.shift
|
407
|
+
if x.is_a?(Module) then
|
408
|
+
# FlatFile.open(dbclass, filename_or_io, ...)
|
409
|
+
dbclass = x
|
410
|
+
elsif x.nil? then
|
411
|
+
# FlatFile.open(nil, filename_or_io, ...)
|
412
|
+
dbclass = nil
|
413
|
+
else
|
414
|
+
# FlatFile.open(filename, ...)
|
415
|
+
dbclass = nil
|
416
|
+
arg.unshift(x)
|
417
|
+
end
|
418
|
+
if arg.size <= 0
|
419
|
+
raise ArgumentError, 'wrong number of arguments (1 for 2)'
|
420
|
+
end
|
421
|
+
file = arg.shift
|
422
|
+
# check if file is filename or IO object
|
81
423
|
unless file.respond_to?(:gets)
|
82
424
|
# 'file' is a filename
|
83
|
-
|
84
|
-
File.open(file, *openmode) do |fobj|
|
85
|
-
ff = self.new(dbclass, fobj, *arg)
|
86
|
-
yield ff
|
87
|
-
end
|
88
|
-
else
|
89
|
-
fobj = File.open(file, *openmode)
|
90
|
-
self.new(dbclass, fobj, *arg)
|
91
|
-
end
|
425
|
+
self.open_file(file, *arg, &block)
|
92
426
|
else
|
93
427
|
# 'file' is a IO object
|
94
|
-
ff = self.new(dbclass, file
|
428
|
+
ff = self.new(dbclass, file)
|
95
429
|
block_given? ? (yield ff) : ff
|
96
430
|
end
|
97
431
|
end
|
@@ -110,7 +444,8 @@ module Bio
|
|
110
444
|
end
|
111
445
|
|
112
446
|
# Same as FlatFile.auto(filename_or_stream, *arg).to_a
|
113
|
-
#
|
447
|
+
#
|
448
|
+
# (This method might be OBSOLETED in the future.)
|
114
449
|
def self.to_a(*arg)
|
115
450
|
self.auto(*arg) do |ff|
|
116
451
|
raise 'cannot determine file format' unless ff.dbclass
|
@@ -118,6 +453,46 @@ module Bio
|
|
118
453
|
end
|
119
454
|
end
|
120
455
|
|
456
|
+
# Same as FlatFile.auto(filename, *arg),
|
457
|
+
# except that it only accept filename and doesn't accept IO object.
|
458
|
+
# File format is automatically determined.
|
459
|
+
#
|
460
|
+
# It can accept a block.
|
461
|
+
# If a block is given, it returns the block's return value.
|
462
|
+
# Otherwise, it returns a new FlatFile object.
|
463
|
+
#
|
464
|
+
def self.open_file(filename, *arg)
|
465
|
+
if block_given? then
|
466
|
+
BufferedInputStream.open_file(filename, *arg) do |stream|
|
467
|
+
yield self.new(nil, stream)
|
468
|
+
end
|
469
|
+
else
|
470
|
+
stream = BufferedInputStream.open_file(filename, *arg)
|
471
|
+
self.new(nil, stream)
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
# Opens URI specified as _uri_.
|
476
|
+
# _uri_ must be a String or URI object.
|
477
|
+
# *arg is passed to OpenURI.open_uri or URI#open.
|
478
|
+
#
|
479
|
+
# Like FlatFile#open, it can accept a block.
|
480
|
+
#
|
481
|
+
# Note that you MUST explicitly require 'open-uri'.
|
482
|
+
# Because open-uri.rb modifies existing class,
|
483
|
+
# it isn't required by default.
|
484
|
+
#
|
485
|
+
def self.open_uri(uri, *arg)
|
486
|
+
if block_given? then
|
487
|
+
BufferedInputStream.open_uri(uri, *arg) do |stream|
|
488
|
+
yield self.new(nil, stream)
|
489
|
+
end
|
490
|
+
else
|
491
|
+
stream = BufferedInputStream.open_uri(uri, *arg)
|
492
|
+
self.new(nil, stream)
|
493
|
+
end
|
494
|
+
end
|
495
|
+
|
121
496
|
# Same as FlatFile.open, except that 'stream' should be a opened
|
122
497
|
# stream object (IO, File, ..., who have the 'gets' method).
|
123
498
|
#
|
@@ -126,62 +501,101 @@ module Bio
|
|
126
501
|
# * Example 2
|
127
502
|
# Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz"))
|
128
503
|
#
|
129
|
-
#
|
130
|
-
#
|
131
|
-
#
|
132
|
-
# default: false (not "raw mode").
|
504
|
+
# Compatibility Note:
|
505
|
+
# Now, you cannot specify ":raw => true" or ":raw => false".
|
506
|
+
# Below styles are DEPRECATED.
|
133
507
|
#
|
134
|
-
# * Example 3
|
135
|
-
# Bio::FlatFile.new(nil, $stdin, :raw=>true)
|
508
|
+
# * Example 3 (deprecated)
|
509
|
+
# # Bio::FlatFile.new(nil, $stdin, :raw=>true) # => ERROR
|
510
|
+
# # Please rewrite as below.
|
511
|
+
# ff = Bio::FlatFile.new(nil, $stdin)
|
512
|
+
# ff.raw = true
|
136
513
|
# * Example 3 in old style (deprecated)
|
137
|
-
# Bio::FlatFile.new(nil, $stdin, true)
|
514
|
+
# # Bio::FlatFile.new(nil, $stdin, true) # => ERROR
|
515
|
+
# # Please rewrite as below.
|
516
|
+
# ff = Bio::FlatFile.new(nil, $stdin)
|
517
|
+
# ff.raw = true
|
138
518
|
#
|
139
|
-
def initialize(dbclass, stream
|
519
|
+
def initialize(dbclass, stream)
|
140
520
|
# 2nd arg: IO object
|
141
|
-
@
|
142
|
-
|
143
|
-
self.raw = false
|
144
|
-
if options.is_a?(Hash) then
|
145
|
-
self.raw = options[:raw] if options.has_key?(:raw)
|
521
|
+
if @stream.kind_of?(BufferedInputStream)
|
522
|
+
@stream = stream
|
146
523
|
else
|
147
|
-
|
524
|
+
@stream = BufferedInputStream.for_io(stream)
|
148
525
|
end
|
149
|
-
#
|
150
|
-
|
526
|
+
# default is raw mode
|
527
|
+
self.raw = false
|
151
528
|
# 1st arg: database class (or file format autodetection)
|
152
529
|
if dbclass then
|
153
|
-
|
530
|
+
self.dbclass = dbclass
|
154
531
|
else
|
155
|
-
|
532
|
+
autodetect
|
156
533
|
end
|
534
|
+
#
|
535
|
+
@skip_leader_mode = :firsttime
|
536
|
+
@firsttime_flag = true
|
537
|
+
end
|
538
|
+
|
539
|
+
# The mode how to skip leader of the data.
|
540
|
+
# :firsttime :: (DEFAULT) only head of file (= first time to read)
|
541
|
+
# :everytime :: everytime to read entry
|
542
|
+
# nil :: never skip
|
543
|
+
attr_accessor :skip_leader_mode
|
544
|
+
|
545
|
+
# (DEPRECATED) IO object in the flatfile object.
|
546
|
+
#
|
547
|
+
# Compatibility Note: Bio::FlatFile#io is deprecated.
|
548
|
+
# Please use Bio::FlatFile#to_io instead.
|
549
|
+
def io
|
550
|
+
warn "Bio::FlatFile#io is deprecated."
|
551
|
+
@stream.to_io
|
157
552
|
end
|
158
553
|
|
159
554
|
# IO object in the flatfile object.
|
160
|
-
|
555
|
+
#
|
556
|
+
# Compatibility Note: Bio::FlatFile#io is deprecated.
|
557
|
+
def to_io
|
558
|
+
@stream.to_io
|
559
|
+
end
|
560
|
+
|
561
|
+
# Pathname, filename or URI (or nil).
|
562
|
+
def path
|
563
|
+
@stream.path
|
564
|
+
end
|
161
565
|
|
162
566
|
# Get next entry.
|
163
567
|
def next_entry
|
164
|
-
@
|
165
|
-
|
568
|
+
if @skip_leader_mode and
|
569
|
+
((@firsttime_flag and @skip_leader_mode == :firsttime) or
|
570
|
+
@skip_leader_mode == :everytime)
|
571
|
+
@splitter.skip_leader
|
572
|
+
end
|
573
|
+
r = @splitter.get_entry
|
574
|
+
@firsttime_flag = false
|
575
|
+
return nil unless r
|
166
576
|
if raw then
|
167
|
-
|
577
|
+
r
|
168
578
|
else
|
169
|
-
|
170
|
-
|
171
|
-
s = e.entry_overrun
|
172
|
-
rescue NameError
|
173
|
-
s = nil
|
174
|
-
end
|
175
|
-
if s then
|
176
|
-
@entry_raw[-(s.length), s.length] = ''
|
177
|
-
ungets(s)
|
178
|
-
end
|
179
|
-
e
|
579
|
+
@entry = @dbclass.new(r)
|
580
|
+
@entry
|
180
581
|
end
|
181
582
|
end
|
583
|
+
attr_reader :entry
|
182
584
|
|
183
585
|
# Returns the last raw entry as a string.
|
184
|
-
|
586
|
+
def entry_raw
|
587
|
+
@splitter.entry
|
588
|
+
end
|
589
|
+
|
590
|
+
# start position of the last entry
|
591
|
+
def entry_start_pos
|
592
|
+
@splitter.entry_start_pos
|
593
|
+
end
|
594
|
+
|
595
|
+
# (end position of the last entry) + 1
|
596
|
+
def entry_ended_pos
|
597
|
+
@splitter.entry_ended_pos
|
598
|
+
end
|
185
599
|
|
186
600
|
# Iterates over each entry in the flatfile.
|
187
601
|
#
|
@@ -193,23 +607,23 @@ module Bio
|
|
193
607
|
# end
|
194
608
|
def each_entry
|
195
609
|
while e = self.next_entry
|
196
|
-
|
610
|
+
yield e
|
197
611
|
end
|
198
612
|
end
|
199
|
-
alias each each_entry
|
613
|
+
alias :each :each_entry
|
200
614
|
|
201
615
|
# Resets file pointer to the start of the flatfile.
|
202
616
|
# (similar to IO#rewind)
|
203
617
|
def rewind
|
204
|
-
r = @
|
205
|
-
@
|
618
|
+
r = @stream.rewind
|
619
|
+
@firsttime_flag = true
|
206
620
|
r
|
207
621
|
end
|
208
622
|
|
209
623
|
# Closes input stream.
|
210
624
|
# (similar to IO#close)
|
211
625
|
def close
|
212
|
-
@
|
626
|
+
@stream.close
|
213
627
|
end
|
214
628
|
|
215
629
|
# Returns current position of input stream.
|
@@ -217,9 +631,9 @@ module Bio
|
|
217
631
|
# the result is not guaranteed.
|
218
632
|
# It is similar to IO#pos.
|
219
633
|
# Note that it will not be equal to io.pos,
|
220
|
-
# because FlatFile
|
634
|
+
# because FlatFile has its own internal buffer.
|
221
635
|
def pos
|
222
|
-
@
|
636
|
+
@stream.pos
|
223
637
|
end
|
224
638
|
|
225
639
|
# (Not recommended to use it.)
|
@@ -228,86 +642,17 @@ module Bio
|
|
228
642
|
# the result is not guaranteed.
|
229
643
|
# It is similar to IO#pos=.
|
230
644
|
# Note that it will not be equal to io.pos=,
|
231
|
-
# because FlatFile
|
645
|
+
# because FlatFile has its own internal buffer.
|
232
646
|
def pos=(p)
|
233
|
-
|
234
|
-
@prefetch = ''
|
235
|
-
r
|
647
|
+
@stream.pos=(p)
|
236
648
|
end
|
237
649
|
|
238
650
|
# Returns true if input stream is end-of-file.
|
239
651
|
# Otherwise, returns false.
|
240
652
|
# (Similar to IO#eof?, but may not be equal to io.eof?,
|
241
|
-
# because FlatFile
|
653
|
+
# because FlatFile has its own internal buffer.)
|
242
654
|
def eof?
|
243
|
-
|
244
|
-
false
|
245
|
-
else
|
246
|
-
@io.eof?
|
247
|
-
end
|
248
|
-
end
|
249
|
-
|
250
|
-
# Similar to IO#gets.
|
251
|
-
# Internal use only. Users should not call it directly.
|
252
|
-
def gets(io_rs = $/)
|
253
|
-
if @prefetch.size > 0
|
254
|
-
if io_rs == nil then
|
255
|
-
r = @prefetch + @io.gets(nil).to_s
|
256
|
-
@prefetch = ''
|
257
|
-
else
|
258
|
-
if io_rs == '' then
|
259
|
-
sp_rs = /\n\n/n
|
260
|
-
sp_rs_orig = "\n\n"
|
261
|
-
else
|
262
|
-
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
263
|
-
sp_rs_orig = io_rs
|
264
|
-
end
|
265
|
-
a = @prefetch.split(sp_rs, 2)
|
266
|
-
if a.size > 1 then
|
267
|
-
r = a[0] + sp_rs_orig
|
268
|
-
@prefetch = a[1]
|
269
|
-
else
|
270
|
-
@prefetch << @io.gets(io_rs).to_s
|
271
|
-
a = @prefetch.split(sp_rs, 2)
|
272
|
-
if a.size > 1 then
|
273
|
-
r = a[0] + sp_rs_orig
|
274
|
-
@prefetch = a[1].to_s
|
275
|
-
else
|
276
|
-
r = @prefetch
|
277
|
-
@prefetch = ''
|
278
|
-
end
|
279
|
-
end
|
280
|
-
end
|
281
|
-
r
|
282
|
-
else
|
283
|
-
@io.gets(io_rs)
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
# Unread read data.
|
288
|
-
# Internal use only. Users must not call it.
|
289
|
-
def ungets(str)
|
290
|
-
@prefetch = str + @prefetch
|
291
|
-
nil
|
292
|
-
end
|
293
|
-
|
294
|
-
# Similar to IO#getc.
|
295
|
-
# Internal use only. Users should not call it directly.
|
296
|
-
def getc
|
297
|
-
if @prefetch.size > 0 then
|
298
|
-
r = @prefetch[0]
|
299
|
-
@prefetch = @prefetch[1..-1]
|
300
|
-
else
|
301
|
-
r = @io.getc
|
302
|
-
end
|
303
|
-
r
|
304
|
-
end
|
305
|
-
|
306
|
-
# Similar to IO#ungetc.
|
307
|
-
# Internal use only. Users should not call it.
|
308
|
-
def ungetc(c)
|
309
|
-
@prefetch = sprintf("%c", c) + @prefetch
|
310
|
-
nil
|
655
|
+
@stream.eof?
|
311
656
|
end
|
312
657
|
|
313
658
|
# If true is given, the next_entry method returns
|
@@ -319,14 +664,24 @@ module Bio
|
|
319
664
|
# If true, raw mode.
|
320
665
|
attr_reader :raw
|
321
666
|
|
667
|
+
# Similar to IO#gets.
|
668
|
+
# Internal use only. Users should not call it directly.
|
669
|
+
def gets(*arg)
|
670
|
+
@stream.gets(*arg)
|
671
|
+
end
|
672
|
+
|
322
673
|
# Sets database class. Plese use only if autodetect fails.
|
323
|
-
def dbclass=(
|
324
|
-
if
|
325
|
-
|
326
|
-
|
674
|
+
def dbclass=(klass)
|
675
|
+
if klass then
|
676
|
+
@dbclass = klass
|
677
|
+
begin
|
678
|
+
@splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
|
679
|
+
rescue NameError, NoMethodError
|
680
|
+
@splitter = Splitter::Default.new(klass, @stream)
|
681
|
+
end
|
327
682
|
else
|
328
|
-
|
329
|
-
|
683
|
+
@dbclass = nil
|
684
|
+
@splitter = nil
|
330
685
|
end
|
331
686
|
end
|
332
687
|
|
@@ -340,157 +695,518 @@ module Bio
|
|
340
695
|
#
|
341
696
|
# The method can be called anytime if you want (but not recommended).
|
342
697
|
# This might be useful if input file is a mixture of muitiple format data.
|
343
|
-
def autodetect(lines = 31)
|
344
|
-
r =
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
if line and line.strip.size > 0 then
|
349
|
-
r = self.class.autodetect(@prefetch)
|
350
|
-
if r then
|
351
|
-
self.dbclass = r
|
352
|
-
return r
|
353
|
-
end
|
354
|
-
end
|
355
|
-
end
|
698
|
+
def autodetect(lines = 31, ad = AutoDetect.default)
|
699
|
+
if r = ad.autodetect_flatfile(self, lines)
|
700
|
+
self.dbclass = r
|
701
|
+
else
|
702
|
+
self.dbclass = nil unless self.dbclass
|
356
703
|
end
|
357
|
-
self.dbclass = nil unless dbclass
|
358
704
|
r
|
359
705
|
end
|
360
706
|
|
361
707
|
# Detects database class (== file format) of given file.
|
362
708
|
# If fails to determine, returns nil.
|
363
709
|
def self.autodetect_file(filename)
|
364
|
-
|
365
|
-
r = ff.dbclass
|
366
|
-
ff.close
|
367
|
-
r
|
710
|
+
self.open_file(filename).dbclass
|
368
711
|
end
|
369
712
|
|
370
713
|
# Detects database class (== file format) of given input stream.
|
371
714
|
# If fails to determine, returns nil.
|
372
715
|
# Caution: the method reads some data from the input stream,
|
373
716
|
# and the data will be lost.
|
717
|
+
def self.autodetect_io(io)
|
718
|
+
self.new(nil, io).dbclass
|
719
|
+
end
|
720
|
+
|
721
|
+
# This is OBSOLETED. Please use autodetect_io(io) instead.
|
374
722
|
def self.autodetect_stream(io)
|
375
|
-
|
376
|
-
|
377
|
-
r
|
723
|
+
$stderr.print "Bio::FlatFile.autodetect_stream will be deprecated." if $VERBOSE
|
724
|
+
self.autodetect_io(io)
|
378
725
|
end
|
379
726
|
|
380
727
|
# Detects database class (== file format) of given string.
|
381
728
|
# If fails to determine, returns false or nil.
|
382
729
|
def self.autodetect(text)
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
730
|
+
AutoDetect.default.autodetect(text)
|
731
|
+
end
|
732
|
+
|
733
|
+
|
734
|
+
# AutoDetect automatically determines database class of given data.
|
735
|
+
class AutoDetect
|
736
|
+
|
737
|
+
include TSort
|
738
|
+
|
739
|
+
# Template of a single rule of autodetection
|
740
|
+
class RuleTemplate
|
741
|
+
# Creates a new element.
|
742
|
+
def self.[](*arg)
|
743
|
+
self.new(*arg)
|
744
|
+
end
|
745
|
+
|
746
|
+
# Creates a new element.
|
747
|
+
def initialize
|
748
|
+
a = Array.new
|
749
|
+
def a.inspect
|
750
|
+
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
751
|
+
end
|
752
|
+
@higher_priority_elements = a.clone
|
753
|
+
@lower_priority_elements = a.clone
|
754
|
+
@name = nil
|
408
755
|
end
|
409
756
|
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
Bio::KEGG::KO
|
417
|
-
when /^ENTRY .+ Glycan\s*$/
|
418
|
-
Bio::KEGG::GLYCAN
|
419
|
-
when /^ENTRY .+ (CDS|gene|.*RNA) /
|
420
|
-
Bio::KEGG::GENES
|
421
|
-
when /^ENTRY EC [0-9\.]+$/
|
422
|
-
Bio::KEGG::ENZYME
|
423
|
-
when /^ENTRY C[A-Za-z0-9\._]+$/
|
424
|
-
Bio::KEGG::COMPOUND
|
425
|
-
when /^ENTRY R[A-Za-z0-9\._]+$/
|
426
|
-
Bio::KEGG::REACTION
|
427
|
-
when /^ENTRY [a-z]+$/
|
428
|
-
Bio::KEGG::GENOME
|
429
|
-
|
430
|
-
when /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/
|
431
|
-
if $1 == 'clusters'
|
432
|
-
Bio::FANTOM::MaXML::Cluster
|
433
|
-
elsif $1 == 'sequences'
|
434
|
-
Bio::FANTOM::MaXML::Sequence
|
435
|
-
else
|
436
|
-
nil #unknown
|
757
|
+
# self is prior to the _elem_.
|
758
|
+
def is_prior_to(elem)
|
759
|
+
return nil if self == elem
|
760
|
+
elem.higher_priority_elements << self
|
761
|
+
self.lower_priority_elements << elem
|
762
|
+
true
|
437
763
|
end
|
438
764
|
|
439
|
-
|
440
|
-
|
765
|
+
# higher priority elements
|
766
|
+
attr_reader :higher_priority_elements
|
767
|
+
# lower priority elements
|
768
|
+
attr_reader :lower_priority_elements
|
769
|
+
|
770
|
+
# database classes
|
771
|
+
attr_reader :dbclasses
|
441
772
|
|
442
|
-
|
443
|
-
|
773
|
+
# unique name of the element
|
774
|
+
attr_accessor :name
|
775
|
+
|
776
|
+
# If given text (and/or meta information) is known, returns
|
777
|
+
# the database class.
|
778
|
+
# Otherwise, returns nil or false.
|
779
|
+
#
|
780
|
+
# _text_ will be a String.
|
781
|
+
# _meta_ will be a Hash.
|
782
|
+
# _meta_ may contain following keys.
|
783
|
+
# :path => pathname, filename or uri.
|
784
|
+
def guess(text, meta)
|
785
|
+
nil
|
786
|
+
end
|
787
|
+
end #class Rule_Template
|
788
|
+
|
789
|
+
# RuleDebug is a class for debugging autodetect classes/methods
|
790
|
+
class RuleDebug < RuleTemplate
|
791
|
+
# Creates a new instance.
|
792
|
+
def initialize(name)
|
793
|
+
super()
|
794
|
+
@name = name
|
795
|
+
end
|
444
796
|
|
445
|
-
|
446
|
-
|
797
|
+
# prints information to the $stderr.
|
798
|
+
def guess(text, meta)
|
799
|
+
$stderr.puts @name
|
800
|
+
$stderr.puts text.inspect
|
801
|
+
$stderr.puts meta.inspect
|
802
|
+
nil
|
803
|
+
end
|
804
|
+
end #class RuleDebug
|
447
805
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
806
|
+
# Special element that is always top or bottom priority.
|
807
|
+
class RuleSpecial < RuleTemplate
|
808
|
+
def initialize(name)
|
809
|
+
#super()
|
810
|
+
@name = name
|
811
|
+
end
|
812
|
+
# modification of @name is inhibited.
|
813
|
+
def name=(x)
|
814
|
+
raise 'cannot modify name'
|
815
|
+
end
|
452
816
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
817
|
+
# always returns void array
|
818
|
+
def higher_priority_elements
|
819
|
+
[]
|
820
|
+
end
|
821
|
+
# always returns void array
|
822
|
+
def lower_priority_elements
|
823
|
+
[]
|
824
|
+
end
|
825
|
+
end #class RuleSpecial
|
457
826
|
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
827
|
+
# Special element that is always top priority.
|
828
|
+
TopRule = RuleSpecial.new('top')
|
829
|
+
# Special element that is always bottom priority.
|
830
|
+
BottomRule = RuleSpecial.new('bottom')
|
462
831
|
|
463
|
-
|
464
|
-
|
832
|
+
# A autodetection rule to use a regular expression
|
833
|
+
class RuleRegexp < RuleTemplate
|
834
|
+
# Creates a new instance.
|
835
|
+
def initialize(dbclass, re)
|
836
|
+
super()
|
837
|
+
@re = re
|
838
|
+
@dbclass = dbclass
|
839
|
+
@dbclasses = [ dbclass ]
|
840
|
+
@name = dbclass.to_s
|
841
|
+
end
|
465
842
|
|
466
|
-
|
467
|
-
|
843
|
+
# If given text matches the regexp, returns the database class.
|
844
|
+
# Otherwise, returns nil or false.
|
845
|
+
# _meta_ is ignored.
|
846
|
+
def guess(text, meta)
|
847
|
+
@re =~ text ? @dbclass : nil
|
848
|
+
end
|
849
|
+
end #class RuleRegexp
|
468
850
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
851
|
+
# A autodetection rule to use more than two regular expressions.
|
852
|
+
class RuleRegexp2 < RuleTemplate
|
853
|
+
# Creates a new instance.
|
854
|
+
def initialize(dbclass, *regexps)
|
855
|
+
super()
|
856
|
+
@regexps = regexps
|
857
|
+
@dbclass = dbclass
|
858
|
+
@dbclasses = [ dbclass ]
|
859
|
+
if name
|
860
|
+
@name = name
|
861
|
+
else
|
862
|
+
@name = @dbclass.to_s
|
863
|
+
end
|
864
|
+
end
|
865
|
+
|
866
|
+
# If given text matches the regexp, returns the database class.
|
867
|
+
# Otherwise, returns nil or false.
|
868
|
+
# _meta_ is ignored.
|
869
|
+
def guess(text, meta)
|
870
|
+
@regexps.each do |re|
|
871
|
+
return @dbclass if re =~ text
|
872
|
+
end
|
873
|
+
nil
|
874
|
+
end
|
875
|
+
end #class RuleRegexp
|
876
|
+
|
877
|
+
# A autodetection rule that passes data to the proc object.
|
878
|
+
class RuleProc < RuleTemplate
|
879
|
+
# Creates a new instance.
|
880
|
+
def initialize(*dbclasses, &proc)
|
881
|
+
super()
|
882
|
+
@proc = proc
|
883
|
+
@dbclasses = dbclasses
|
884
|
+
@name = dbclasses.collect { |x| x.to_s }.join('|')
|
885
|
+
end
|
886
|
+
|
887
|
+
# If given text (and/or meta information) is known, returns
|
888
|
+
# the database class.
|
889
|
+
# Otherwise, returns nil or false.
|
890
|
+
#
|
891
|
+
# Refer RuleTemplate#guess for _meta_.
|
892
|
+
def guess(text, meta)
|
893
|
+
@proc.call(text)
|
894
|
+
end
|
895
|
+
end #class RuleProc
|
896
|
+
|
897
|
+
# Creates a new Autodetect object
|
898
|
+
def initialize
|
899
|
+
# stores autodetection rules.
|
900
|
+
@rules = Hash.new
|
901
|
+
# stores elements (cache)
|
902
|
+
@elements = nil
|
903
|
+
self.add(TopRule)
|
904
|
+
self.add(BottomRule)
|
905
|
+
end
|
906
|
+
|
907
|
+
# Adds a new element.
|
908
|
+
# Returns _elem_.
|
909
|
+
def add(elem)
|
910
|
+
raise 'element name conflicts' if @rules[elem.name]
|
911
|
+
@elements = nil
|
912
|
+
@rules[elem.name] = elem
|
913
|
+
elem
|
914
|
+
end
|
915
|
+
|
916
|
+
# (required by TSort.)
|
917
|
+
# For all elements, yields each element.
|
918
|
+
def tsort_each_node(&x)
|
919
|
+
@rules.each_value(&x)
|
920
|
+
end
|
921
|
+
|
922
|
+
# (required by TSort.)
|
923
|
+
# For a given element, yields each child
|
924
|
+
# (= lower priority elements) of the element.
|
925
|
+
def tsort_each_child(elem)
|
926
|
+
if elem == TopRule then
|
927
|
+
@rules.each_value do |e|
|
928
|
+
yield e unless e == TopRule or
|
929
|
+
e.lower_priority_elements.index(TopRule)
|
930
|
+
end
|
931
|
+
elsif elem == BottomRule then
|
932
|
+
@rules.each_value do |e|
|
933
|
+
yield e if e.higher_priority_elements.index(BottomRule)
|
934
|
+
end
|
476
935
|
else
|
477
|
-
|
936
|
+
elem.lower_priority_elements.each do |e|
|
937
|
+
yield e if e != BottomRule
|
938
|
+
end
|
939
|
+
unless elem.higher_priority_elements.index(BottomRule)
|
940
|
+
yield BottomRule
|
941
|
+
end
|
478
942
|
end
|
943
|
+
end
|
479
944
|
|
480
|
-
|
481
|
-
|
945
|
+
# Returns current elements as an array
|
946
|
+
# whose order fulfills all elements' priorities.
|
947
|
+
def elements
|
948
|
+
unless @elements
|
949
|
+
ary = tsort
|
950
|
+
ary.reverse!
|
951
|
+
@elements = ary
|
952
|
+
end
|
953
|
+
@elements
|
954
|
+
end
|
955
|
+
|
956
|
+
# rebuilds the object and clears internal cache.
|
957
|
+
def rehash
|
958
|
+
@rules.rehash
|
959
|
+
@elements = nil
|
960
|
+
end
|
961
|
+
|
962
|
+
# visualizes the object (mainly for debug)
|
963
|
+
def inspect
|
964
|
+
"<#{self.class.to_s} " +
|
965
|
+
self.elements.collect { |e| e.name.inspect }.join(' ') +
|
966
|
+
">"
|
967
|
+
end
|
968
|
+
|
969
|
+
# Iterates over each element.
|
970
|
+
def each_rule(&x) #:yields: elem
|
971
|
+
elements.each(&x)
|
482
972
|
end
|
483
|
-
end
|
484
973
|
|
974
|
+
# Autodetect from the text.
|
975
|
+
# Returns a database class if succeeded.
|
976
|
+
# Returns nil if failed.
|
977
|
+
def autodetect(text, meta = {})
|
978
|
+
r = nil
|
979
|
+
elements.each do |e|
|
980
|
+
#$stderr.puts e.name
|
981
|
+
r = e.guess(text, meta)
|
982
|
+
break if r
|
983
|
+
end
|
984
|
+
r
|
985
|
+
end
|
986
|
+
|
987
|
+
# autodetect from the FlatFile object.
|
988
|
+
# Returns a database class if succeeded.
|
989
|
+
# Returns nil if failed.
|
990
|
+
def autodetect_flatfile(ff, lines = 31)
|
991
|
+
meta = {}
|
992
|
+
stream = ff.instance_eval { @stream }
|
993
|
+
begin
|
994
|
+
path = stream.path
|
995
|
+
rescue NameError
|
996
|
+
end
|
997
|
+
if path then
|
998
|
+
meta[:path] = path
|
999
|
+
# call autodetect onece with meta and without any read action
|
1000
|
+
if r = self.autodetect(stream.prefetch_buffer, meta)
|
1001
|
+
return r
|
1002
|
+
end
|
1003
|
+
end
|
1004
|
+
# reading stream
|
1005
|
+
1.upto(lines) do |x|
|
1006
|
+
break unless line = stream.prefetch_gets
|
1007
|
+
if line.strip.size > 0 then
|
1008
|
+
if r = self.autodetect(stream.prefetch_buffer, meta)
|
1009
|
+
return r
|
1010
|
+
end
|
1011
|
+
end
|
1012
|
+
end
|
1013
|
+
return nil
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
# default autodetect object for class method
|
1017
|
+
@default = nil
|
1018
|
+
|
1019
|
+
# returns the default autodetect object
|
1020
|
+
def self.default
|
1021
|
+
unless @default then
|
1022
|
+
@default = self.make_default
|
1023
|
+
end
|
1024
|
+
@default
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
# sets the default autodetect object.
|
1028
|
+
def self.default=(ad)
|
1029
|
+
@default = ad
|
1030
|
+
end
|
1031
|
+
|
1032
|
+
# make a new autodetect object
|
1033
|
+
def self.[](*arg)
|
1034
|
+
a = self.new
|
1035
|
+
arg.each { |e| a.add(e) }
|
1036
|
+
a
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
# make a default of default autodetect object
|
1040
|
+
def self.make_default
|
1041
|
+
a = self[
|
1042
|
+
genbank = RuleRegexp[ Bio::GenBank,
|
1043
|
+
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
|
1044
|
+
genpept = RuleRegexp[ Bio::GenPept,
|
1045
|
+
/^LOCUS .+ aa .+/ ],
|
1046
|
+
medline = RuleRegexp[ Bio::MEDLINE,
|
1047
|
+
/^UI \- [0-9]+$/ ],
|
1048
|
+
embl = RuleRegexp[ Bio::EMBL,
|
1049
|
+
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
1050
|
+
sptr = RuleRegexp[ Bio::SPTR,
|
1051
|
+
/^ID .+\; *PRT\;/ ],
|
1052
|
+
prosite = RuleRegexp[ Bio::PROSITE,
|
1053
|
+
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
|
1054
|
+
transfac = RuleRegexp[ Bio::TRANSFAC,
|
1055
|
+
/^AC [-A-Za-z0-9_\.]+$/ ],
|
1056
|
+
|
1057
|
+
aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
|
1058
|
+
if /^H [-A-Z0-9_\.]+$/ =~ text then
|
1059
|
+
if text =~ /^M [rc]/ then
|
1060
|
+
Bio::AAindex2
|
1061
|
+
elsif text =~ /^I A\/L/ then
|
1062
|
+
Bio::AAindex1
|
1063
|
+
else
|
1064
|
+
false #fail to determine
|
1065
|
+
end
|
1066
|
+
else
|
1067
|
+
nil
|
1068
|
+
end
|
1069
|
+
end,
|
1070
|
+
|
1071
|
+
litdb = RuleRegexp[ Bio::LITDB,
|
1072
|
+
/^CODE [0-9]+$/ ],
|
1073
|
+
brite = RuleRegexp[ Bio::KEGG::BRITE,
|
1074
|
+
/^Entry [A-Z0-9]+/ ],
|
1075
|
+
ko = RuleRegexp[ Bio::KEGG::KO,
|
1076
|
+
/^ENTRY .+ KO\s*/ ],
|
1077
|
+
glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
|
1078
|
+
/^ENTRY .+ Glycan\s*/ ],
|
1079
|
+
enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
|
1080
|
+
/^ENTRY EC [0-9\.]+$/,
|
1081
|
+
/^ENTRY .+ Enzyme\s*/
|
1082
|
+
],
|
1083
|
+
compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
|
1084
|
+
/^ENTRY C[A-Za-z0-9\._]+$/,
|
1085
|
+
/^ENTRY .+ Compound\s*/
|
1086
|
+
],
|
1087
|
+
reaction = RuleRegexp2[ Bio::KEGG::REACTION,
|
1088
|
+
/^ENTRY R[A-Za-z0-9\._]+$/,
|
1089
|
+
/^ENTRY .+ Reaction\s*/
|
1090
|
+
],
|
1091
|
+
genes = RuleRegexp[ Bio::KEGG::GENES,
|
1092
|
+
/^ENTRY .+ (CDS|gene|.*RNA) / ],
|
1093
|
+
genome = RuleRegexp[ Bio::KEGG::GENOME,
|
1094
|
+
/^ENTRY [a-z]+$/ ],
|
1095
|
+
|
1096
|
+
fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
|
1097
|
+
Bio::FANTOM::MaXML::Sequence) do |text|
|
1098
|
+
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
|
1099
|
+
case $1
|
1100
|
+
when 'clusters'
|
1101
|
+
Bio::FANTOM::MaXML::Cluster
|
1102
|
+
when 'sequences'
|
1103
|
+
Bio::FANTOM::MaXML::Sequence
|
1104
|
+
else
|
1105
|
+
nil #unknown
|
1106
|
+
end
|
1107
|
+
else
|
1108
|
+
nil
|
1109
|
+
end
|
1110
|
+
end,
|
1111
|
+
|
1112
|
+
pdb = RuleRegexp[ Bio::PDB,
|
1113
|
+
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
|
1114
|
+
het = RuleRegexp[ Bio::PDB::ChemicalComponent,
|
1115
|
+
/^RESIDUE +.+ +\d+\s*$/ ],
|
1116
|
+
|
1117
|
+
clustal = RuleRegexp[ Bio::ClustalW::Report,
|
1118
|
+
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
|
1119
|
+
|
1120
|
+
blastxml = RuleRegexp[ Bio::Blast::Report,
|
1121
|
+
/\<\!DOCTYPE BlastOutput PUBLIC / ],
|
1122
|
+
wublast = RuleRegexp[ Bio::Blast::WU::Report,
|
1123
|
+
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
1124
|
+
wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
|
1125
|
+
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
1126
|
+
blast = RuleRegexp[ Bio::Blast::Default::Report,
|
1127
|
+
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
1128
|
+
tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
|
1129
|
+
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
1130
|
+
|
1131
|
+
blat = RuleRegexp[ Bio::Blat::Report,
|
1132
|
+
/^psLayout version \d+\s*$/ ],
|
1133
|
+
spidey = RuleRegexp[ Bio::Spidey::Report,
|
1134
|
+
/^\-\-SPIDEY version .+\-\-$/ ],
|
1135
|
+
hmmer = RuleRegexp[ Bio::HMMER::Report,
|
1136
|
+
/^HMMER +\d+\./ ],
|
1137
|
+
sim4 = RuleRegexp[ Bio::Sim4::Report,
|
1138
|
+
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
1139
|
+
|
1140
|
+
fastaformat = RuleProc.new(Bio::FastaFormat,
|
1141
|
+
Bio::NBRF,
|
1142
|
+
Bio::FastaNumericFormat) do |text|
|
1143
|
+
if /^>.+$/ =~ text
|
1144
|
+
case text
|
1145
|
+
when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
|
1146
|
+
Bio::NBRF
|
1147
|
+
when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
|
1148
|
+
Bio::FastaFormat
|
1149
|
+
when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
|
1150
|
+
Bio::FastaNumericFormat
|
1151
|
+
else
|
1152
|
+
false
|
1153
|
+
end
|
1154
|
+
else
|
1155
|
+
nil
|
1156
|
+
end
|
1157
|
+
end
|
1158
|
+
]
|
1159
|
+
|
1160
|
+
# dependencies
|
1161
|
+
# NCBI
|
1162
|
+
genbank.is_prior_to genpept
|
1163
|
+
# EMBL/UniProt
|
1164
|
+
embl.is_prior_to sptr
|
1165
|
+
sptr.is_prior_to prosite
|
1166
|
+
prosite.is_prior_to transfac
|
1167
|
+
# KEGG
|
1168
|
+
#aaindex.is_prior_to litdb
|
1169
|
+
#litdb.is_prior_to brite
|
1170
|
+
brite.is_prior_to ko
|
1171
|
+
ko.is_prior_to glycan
|
1172
|
+
glycan.is_prior_to enzyme
|
1173
|
+
enzyme.is_prior_to compound
|
1174
|
+
compound.is_prior_to reaction
|
1175
|
+
reaction.is_prior_to genes
|
1176
|
+
genes.is_prior_to genome
|
1177
|
+
# PDB
|
1178
|
+
pdb.is_prior_to het
|
1179
|
+
# BLAST
|
1180
|
+
wublast.is_prior_to wutblast
|
1181
|
+
wutblast.is_prior_to blast
|
1182
|
+
blast.is_prior_to tblast
|
1183
|
+
# FastaFormat
|
1184
|
+
BottomRule.is_prior_to(fastaformat)
|
1185
|
+
|
1186
|
+
# for debug
|
1187
|
+
#debug_first = RuleDebug.new('debug_first')
|
1188
|
+
#a.add(debug_first)
|
1189
|
+
#debug_first.is_prior_to(TopRule)
|
1190
|
+
|
1191
|
+
## for debug
|
1192
|
+
#debug_last = RuleDebug.new('debug_last')
|
1193
|
+
#a.add(debug_last)
|
1194
|
+
#BottomRule.is_prior_to(debug_last)
|
1195
|
+
#fastaformat.is_prior_to(debug_last)
|
1196
|
+
|
1197
|
+
a.rehash
|
1198
|
+
return a
|
1199
|
+
end
|
1200
|
+
|
1201
|
+
end #class AutoDetect
|
1202
|
+
|
485
1203
|
end #class FlatFile
|
486
1204
|
|
487
1205
|
end #module Bio
|
488
1206
|
|
489
|
-
|
490
1207
|
if __FILE__ == $0
|
491
1208
|
if ARGV.size == 2
|
492
1209
|
require 'bio'
|
493
1210
|
p Bio::FlatFile.open(eval(ARGV.shift), ARGV.shift).next_entry
|
494
1211
|
end
|
495
1212
|
end
|
496
|
-
|