bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/lib/bio/io/fastacmd.rb
CHANGED
|
@@ -1,8 +1,47 @@
|
|
|
1
1
|
#
|
|
2
|
-
# bio/io/fastacmd.rb - NCBI fastacmd wrapper class
|
|
2
|
+
# = bio/io/fastacmd.rb - NCBI fastacmd wrapper class
|
|
3
3
|
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
4
|
+
# Copyright:: Copyright (C) 2005, 2006
|
|
5
|
+
# Shuji SHIGENOBU <shige@nibb.ac.jp>,
|
|
6
|
+
# Toshiaki Katayama <k@bioruby.org>,
|
|
7
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
8
|
+
# Lisence:: LGPL
|
|
9
|
+
#
|
|
10
|
+
# $Id: fastacmd.rb,v 1.10 2006/01/28 08:12:21 nakao Exp $
|
|
11
|
+
#
|
|
12
|
+
# == Description
|
|
13
|
+
#
|
|
14
|
+
# Retrives FASTA formatted sequences from a blast database using
|
|
15
|
+
# NCBI fastacmd command.
|
|
16
|
+
#
|
|
17
|
+
# This class requires 'fastacmd' command and a blast database
|
|
18
|
+
# (formatted using the '-o' option of 'formatdb').
|
|
19
|
+
#
|
|
20
|
+
# == Examples
|
|
21
|
+
#
|
|
22
|
+
# database = ARGV.shift || "/db/myblastdb"
|
|
23
|
+
# entry_id = ARGV.shift || "sp:128U_DROME"
|
|
24
|
+
# ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
|
25
|
+
#
|
|
26
|
+
# fastacmd = Bio::Blast::Fastacmd.new(database)
|
|
27
|
+
#
|
|
28
|
+
# entry = fastacmd.get_by_id(entry_id)
|
|
29
|
+
# fastacmd.fetch(entry_id)
|
|
30
|
+
# fastacmd.fetch(ent_list)
|
|
31
|
+
#
|
|
32
|
+
# fastacmd.fetch(ent_list).each do |fasta|
|
|
33
|
+
# puts fasta
|
|
34
|
+
# end
|
|
35
|
+
#
|
|
36
|
+
# == References
|
|
37
|
+
#
|
|
38
|
+
# * NCBI tool
|
|
39
|
+
# ftp://ftp.ncbi.nih.gov/blast/executables/LATEST/ncbi.tar.gz
|
|
40
|
+
#
|
|
41
|
+
# * fastacmd.html
|
|
42
|
+
# http://biowulf.nih.gov/apps/blast/doc/fastacmd.html
|
|
43
|
+
#
|
|
44
|
+
#--
|
|
6
45
|
#
|
|
7
46
|
# This library is free software; you can redistribute it and/or
|
|
8
47
|
# modify it under the terms of the GNU Lesser General Public
|
|
@@ -18,7 +57,7 @@
|
|
|
18
57
|
# License along with this library; if not, write to the Free Software
|
|
19
58
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
59
|
#
|
|
21
|
-
|
|
60
|
+
#++
|
|
22
61
|
#
|
|
23
62
|
|
|
24
63
|
require 'bio/db/fasta'
|
|
@@ -28,23 +67,52 @@ require 'bio/command'
|
|
|
28
67
|
module Bio
|
|
29
68
|
class Blast
|
|
30
69
|
|
|
70
|
+
# NCBI fastacmd wrapper class
|
|
71
|
+
#
|
|
31
72
|
class Fastacmd
|
|
32
73
|
|
|
33
74
|
include Enumerable
|
|
34
75
|
include Bio::Command::Tools
|
|
35
76
|
|
|
36
|
-
|
|
37
|
-
|
|
77
|
+
# Database file path.
|
|
78
|
+
attr_accessor :database
|
|
79
|
+
|
|
80
|
+
# fastcmd command file path.
|
|
81
|
+
attr_accessor :fastacmd
|
|
82
|
+
|
|
83
|
+
#
|
|
84
|
+
attr_accessor :errorlog
|
|
85
|
+
|
|
86
|
+
# Initalize a fastacmd object.
|
|
87
|
+
#
|
|
88
|
+
# fastacmd = Bio::Blast::Fastacmd.new("/db/myblastdb")
|
|
89
|
+
def initialize(blast_database_file_path)
|
|
90
|
+
@database = blast_database_file_path
|
|
38
91
|
@fastacmd = 'fastacmd'
|
|
39
92
|
end
|
|
40
|
-
attr_accessor :database, :fastacmd, :errorlog
|
|
41
93
|
|
|
42
|
-
|
|
94
|
+
|
|
95
|
+
# get an entry_id and returns a Bio::FastaFormat object.
|
|
96
|
+
#
|
|
97
|
+
# entry_id = "sp:128U_DROME"
|
|
98
|
+
# entry = fastacmd.get_by_id(entry_id)
|
|
43
99
|
def get_by_id(entry_id)
|
|
44
100
|
fetch(entry_id).shift
|
|
45
101
|
end
|
|
46
102
|
|
|
47
|
-
# get one or more entry_id and returns an Array of Bio::FastaFormat objects
|
|
103
|
+
# get one or more entry_id and returns an Array of Bio::FastaFormat objects.
|
|
104
|
+
#
|
|
105
|
+
# Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
|
|
106
|
+
# object even when the result is a single entry.
|
|
107
|
+
#
|
|
108
|
+
# p fastacmd.fetch(entry_id)
|
|
109
|
+
#
|
|
110
|
+
# Fastacmd#fetch method also accepts a list of entry_id and returns
|
|
111
|
+
# an Array of Bio::FastaFormat objects.
|
|
112
|
+
#
|
|
113
|
+
# ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
|
114
|
+
# p fastacmd.fetch(ent_list)
|
|
115
|
+
#
|
|
48
116
|
def fetch(list)
|
|
49
117
|
if list.respond_to?(:join)
|
|
50
118
|
entry_id = list.join(",")
|
|
@@ -59,13 +127,20 @@ class Fastacmd
|
|
|
59
127
|
end
|
|
60
128
|
end
|
|
61
129
|
|
|
130
|
+
# Iterates each entry.
|
|
131
|
+
#
|
|
132
|
+
# You can also iterate on all sequences in the database!
|
|
133
|
+
# fastacmd.each do |fasta|
|
|
134
|
+
# p [ fasta.definition[0..30], fasta.seq.size ]
|
|
135
|
+
# end
|
|
136
|
+
#
|
|
62
137
|
def each_entry
|
|
63
138
|
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
|
|
64
139
|
call_command_local(cmd) do |inn, out|
|
|
65
140
|
inn.close_write
|
|
66
141
|
Bio::FlatFile.open(Bio::FastaFormat, out) do |f|
|
|
67
|
-
f.each_entry do |
|
|
68
|
-
yield
|
|
142
|
+
f.each_entry do |entry|
|
|
143
|
+
yield entry
|
|
69
144
|
end
|
|
70
145
|
end
|
|
71
146
|
end
|
|
@@ -73,51 +148,9 @@ class Fastacmd
|
|
|
73
148
|
end
|
|
74
149
|
alias each each_entry
|
|
75
150
|
|
|
76
|
-
end
|
|
151
|
+
end # class Fastacmd
|
|
77
152
|
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if __FILE__ == $0
|
|
83
|
-
|
|
84
|
-
database = ARGV.shift || "/db/myblastdb"
|
|
85
|
-
entry_id = ARGV.shift || "sp:128U_DROME"
|
|
86
|
-
ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
|
87
|
-
|
|
88
|
-
fastacmd = Bio::Blast::Fastacmd.new(database)
|
|
89
|
-
|
|
90
|
-
### Retrieve one sequence
|
|
91
|
-
entry = fastacmd.get_by_id(entry_id)
|
|
92
|
-
|
|
93
|
-
# Fastacmd#get_by_id(entry_id) returns a Bio::FastaFormat object.
|
|
94
|
-
p entry
|
|
95
|
-
|
|
96
|
-
# Bio::FastaFormat becomes a fasta format string when printed by puts.
|
|
97
|
-
puts entry
|
|
98
|
-
|
|
99
|
-
# Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
|
|
100
|
-
# object even when the result is a single entry.
|
|
101
|
-
p fastacmd.fetch(entry_id)
|
|
102
|
-
|
|
103
|
-
### Retrieve more sequences
|
|
104
|
-
|
|
105
|
-
# Fastacmd#fetch method also accepts a list of entry_id and returns
|
|
106
|
-
# an Array of Bio::FastaFormat objects.
|
|
107
|
-
p fastacmd.fetch(ent_list)
|
|
108
|
-
|
|
109
|
-
# So, you can iterate on the results.
|
|
110
|
-
fastacmd.fetch(ent_list).each do |fasta|
|
|
111
|
-
puts fasta
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
### Iterates on all entries
|
|
116
|
-
|
|
117
|
-
# You can also iterate on all sequences in the database!
|
|
118
|
-
fastacmd.each do |fasta|
|
|
119
|
-
p [ fasta.definition[0..30], fasta.seq.size ]
|
|
120
|
-
end
|
|
153
|
+
end # class Blast
|
|
154
|
+
end # module Bio
|
|
121
155
|
|
|
122
|
-
end
|
|
123
156
|
|
data/lib/bio/io/flatfile.rb
CHANGED
|
@@ -1,32 +1,19 @@
|
|
|
1
1
|
#
|
|
2
2
|
# = bio/io/flatfile.rb - flatfile access wrapper class
|
|
3
3
|
#
|
|
4
|
-
#
|
|
5
|
-
# License:: LGPL
|
|
4
|
+
# Copyright (C) 2001-2006 Naohisa Goto <ng@bioruby.org>
|
|
6
5
|
#
|
|
7
|
-
|
|
8
|
-
# This library is free software; you can redistribute it and/or
|
|
9
|
-
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
-
# License as published by the Free Software Foundation; either
|
|
11
|
-
# version 2 of the License, or (at your option) any later version.
|
|
6
|
+
# License:: Ruby's
|
|
12
7
|
#
|
|
13
|
-
#
|
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
-
# Lesser General Public License for more details.
|
|
8
|
+
# $Id: flatfile.rb,v 1.46 2006/02/22 10:01:27 ngoto Exp $
|
|
17
9
|
#
|
|
18
|
-
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
-
# License along with this library; if not, write to the Free Software
|
|
20
|
-
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
-
#++
|
|
22
|
-
#
|
|
23
|
-
# $Id: flatfile.rb,v 1.41 2005/11/01 15:34:45 ngoto Exp $
|
|
24
10
|
#
|
|
25
11
|
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
|
26
12
|
# It acts like a IO object.
|
|
27
13
|
# It can automatically detect data format, and users do not need to tell
|
|
28
14
|
# the class what the data is.
|
|
29
15
|
#
|
|
16
|
+
require 'tsort'
|
|
30
17
|
|
|
31
18
|
module Bio
|
|
32
19
|
|
|
@@ -38,60 +25,407 @@ module Bio
|
|
|
38
25
|
|
|
39
26
|
include Enumerable
|
|
40
27
|
|
|
28
|
+
# Wrapper for a IO (or IO-like) object.
|
|
29
|
+
# It can input with a buffer.
|
|
30
|
+
class BufferedInputStream
|
|
31
|
+
# Creates a new input stream wrapper
|
|
32
|
+
def initialize(io, path)
|
|
33
|
+
@io = io
|
|
34
|
+
@path = path
|
|
35
|
+
# initialize prefetch buffer
|
|
36
|
+
@buffer = ''
|
|
37
|
+
@path = path
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Creates a new input stream wrapper from the given IO object.
|
|
41
|
+
def self.for_io(io)
|
|
42
|
+
begin
|
|
43
|
+
path = io.path
|
|
44
|
+
rescue NameError
|
|
45
|
+
path = nil
|
|
46
|
+
end
|
|
47
|
+
self.new(io, path)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Creates a new input stream wrapper to open file _filename_
|
|
51
|
+
# by using File.open.
|
|
52
|
+
# *arg is passed to File.open.
|
|
53
|
+
#
|
|
54
|
+
# Like File.open, a block can be accepted.
|
|
55
|
+
def self.open_file(filename, *arg)
|
|
56
|
+
if block_given? then
|
|
57
|
+
File.open(filename, *arg) do |fobj|
|
|
58
|
+
yield self.new(fobj, filename)
|
|
59
|
+
end
|
|
60
|
+
else
|
|
61
|
+
fobj = File.open(filename, *arg)
|
|
62
|
+
self.new(fobj, filename)
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Creates a new input stream wrapper from URI specified as _uri_.
|
|
67
|
+
# by using OpenURI.open_uri or URI#open.
|
|
68
|
+
# _uri_ must be a String or URI object.
|
|
69
|
+
# *arg is passed to OpenURI.open_uri or URI#open.
|
|
70
|
+
#
|
|
71
|
+
# Like OpenURI.open_uri, it can accept a block.
|
|
72
|
+
def self.open_uri(uri, *arg)
|
|
73
|
+
if uri.kind_of?(URI)
|
|
74
|
+
if block_given?
|
|
75
|
+
uri.open(*arg) do |fobj|
|
|
76
|
+
yield self.new(fobj, uri.to_s)
|
|
77
|
+
end
|
|
78
|
+
else
|
|
79
|
+
fobj = uri.open(*arg)
|
|
80
|
+
self.new(fobj, uri.to_s)
|
|
81
|
+
end
|
|
82
|
+
else
|
|
83
|
+
if block_given?
|
|
84
|
+
OpenURI.open_uri(uri, *arg) do |fobj|
|
|
85
|
+
yield self.new(fobj, uri)
|
|
86
|
+
end
|
|
87
|
+
else
|
|
88
|
+
fobj = OpenURI.open_uri(uri, *arg)
|
|
89
|
+
self.new(fobj, uri)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Pathname, filename or URI to open the object.
|
|
95
|
+
# Like File#path, returned value isn't normalized.
|
|
96
|
+
attr_reader :path
|
|
97
|
+
|
|
98
|
+
# Converts to IO object if possible
|
|
99
|
+
def to_io
|
|
100
|
+
@io.to_io
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Closes the IO object if possible
|
|
104
|
+
def close
|
|
105
|
+
@io.close
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Rewinds the IO object if possible
|
|
109
|
+
# Internal buffer in this wrapper is cleared.
|
|
110
|
+
def rewind
|
|
111
|
+
r = @io.rewind
|
|
112
|
+
@buffer = ''
|
|
113
|
+
r
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Returns current file position
|
|
117
|
+
def pos
|
|
118
|
+
@io.pos - @buffer.size
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Sets current file position if possible
|
|
122
|
+
# Internal buffer in this wrapper is cleared.
|
|
123
|
+
def pos=(p)
|
|
124
|
+
r = (@io.pos = p)
|
|
125
|
+
@buffer = ''
|
|
126
|
+
r
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Returns true if end-of-file. Otherwise, returns false.
|
|
130
|
+
#
|
|
131
|
+
# Note that it returns false if internal buffer is this wrapper
|
|
132
|
+
# is not empty,
|
|
133
|
+
def eof?
|
|
134
|
+
if @buffer.size > 0
|
|
135
|
+
false
|
|
136
|
+
else
|
|
137
|
+
@io.eof?
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Same as IO#gets.
|
|
142
|
+
def gets(io_rs = $/)
|
|
143
|
+
if @buffer.size > 0
|
|
144
|
+
if io_rs == nil then
|
|
145
|
+
r = @buffer + @io.gets(nil).to_s
|
|
146
|
+
@buffer = ''
|
|
147
|
+
else
|
|
148
|
+
if io_rs == '' then
|
|
149
|
+
sp_rs = /\n\n/n
|
|
150
|
+
sp_rs_orig = "\n\n"
|
|
151
|
+
else
|
|
152
|
+
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
|
153
|
+
sp_rs_orig = io_rs
|
|
154
|
+
end
|
|
155
|
+
a = @buffer.split(sp_rs, 2)
|
|
156
|
+
if a.size > 1 then
|
|
157
|
+
r = a[0] + sp_rs_orig
|
|
158
|
+
@buffer = a[1]
|
|
159
|
+
else
|
|
160
|
+
@buffer << @io.gets(io_rs).to_s
|
|
161
|
+
a = @buffer.split(sp_rs, 2)
|
|
162
|
+
if a.size > 1 then
|
|
163
|
+
r = a[0] + sp_rs_orig
|
|
164
|
+
@buffer = a[1].to_s
|
|
165
|
+
else
|
|
166
|
+
r = @buffer
|
|
167
|
+
@buffer = ''
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
r
|
|
172
|
+
else
|
|
173
|
+
@io.gets(io_rs)
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Pushes back given str to the internal buffer.
|
|
178
|
+
# Returns nil.
|
|
179
|
+
# str must be read previously with the wrapper object.
|
|
180
|
+
#
|
|
181
|
+
# Note that in current implementation, the str can be everything,
|
|
182
|
+
# but please don't depend on it.
|
|
183
|
+
#
|
|
184
|
+
def ungets(str)
|
|
185
|
+
@buffer = str + @buffer
|
|
186
|
+
nil
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Same as IO#getc.
|
|
190
|
+
def getc
|
|
191
|
+
if @buffer.size > 0 then
|
|
192
|
+
r = @buffer[0]
|
|
193
|
+
@buffer = @buffer[1..-1]
|
|
194
|
+
else
|
|
195
|
+
r = @io.getc
|
|
196
|
+
end
|
|
197
|
+
r
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Pushes back one character into the internal buffer.
|
|
201
|
+
# Unlike IO#getc, it can be called more than one time.
|
|
202
|
+
def ungetc(c)
|
|
203
|
+
@buffer = sprintf("%c", c) + @buffer
|
|
204
|
+
nil
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Gets current prefetch buffer
|
|
208
|
+
def prefetch_buffer
|
|
209
|
+
@buffer
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# It does @io.gets, and addes returned string
|
|
213
|
+
# to the internal buffer, and returns the string.
|
|
214
|
+
def prefetch_gets(*arg)
|
|
215
|
+
r = @io.gets(*arg)
|
|
216
|
+
@buffer << r if r
|
|
217
|
+
r
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
# It does @io.readpartial, and addes returned string
|
|
221
|
+
# to the internal buffer, and returns the string.
|
|
222
|
+
def prefetch_readpartial(*arg)
|
|
223
|
+
r = @io.readpartial(*arg)
|
|
224
|
+
@buffer << r if r
|
|
225
|
+
r
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Skips space characters in the stream.
|
|
229
|
+
# returns nil.
|
|
230
|
+
def skip_spaces
|
|
231
|
+
ws = { ?\s => true, ?\n => true, ?\r => true, ?\t => true }
|
|
232
|
+
while r = self.getc
|
|
233
|
+
unless ws[r] then
|
|
234
|
+
self.ungetc(r)
|
|
235
|
+
break
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
nil
|
|
239
|
+
end
|
|
240
|
+
end #class BufferedInputStream
|
|
241
|
+
|
|
242
|
+
# Splitter is a class to get entries from a buffered input stream.
|
|
243
|
+
module Splitter
|
|
244
|
+
# This is a template of splitter.
|
|
245
|
+
class Template
|
|
246
|
+
# Creates a new splitter.
|
|
247
|
+
def initialize(klass, bstream)
|
|
248
|
+
@stream = bstream
|
|
249
|
+
raise NotImplementedError
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# skips leader of the entry.
|
|
253
|
+
def skip_leader
|
|
254
|
+
raise NotImplementedError
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
# Gets entry as a string
|
|
258
|
+
def get_entry
|
|
259
|
+
raise NotImplementedError
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
# the last entry read from the stream
|
|
263
|
+
attr_reader :entry
|
|
264
|
+
|
|
265
|
+
# start position of the entry
|
|
266
|
+
attr_reader :entry_start_pos
|
|
267
|
+
|
|
268
|
+
# (end position of the entry) + 1
|
|
269
|
+
attr_reader :entry_ended_pos
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Default splitter.
|
|
273
|
+
# It sees following constants in the given class.
|
|
274
|
+
# DELIMITER:: (String) delimiter indicates the end of a entry.
|
|
275
|
+
# FLATFILE_HEADER:: (String) start of a entry, located on head of a line.
|
|
276
|
+
# DELIMITER_OVERRUN:: (Integer) excess read size included in DELIMITER.
|
|
277
|
+
#
|
|
278
|
+
class Default < Template
|
|
279
|
+
# Creates a new splitter.
|
|
280
|
+
# klass:: database class
|
|
281
|
+
# bstream:: input stream. It must be a BufferedInputStream object.
|
|
282
|
+
def initialize(klass, bstream)
|
|
283
|
+
@stream = bstream
|
|
284
|
+
@delimiter = klass::DELIMITER rescue nil
|
|
285
|
+
@header = klass::FLATFILE_HEADER rescue nil
|
|
286
|
+
# for specific classes' benefit
|
|
287
|
+
unless header
|
|
288
|
+
if klass == Bio::GenBank or klass == Bio::GenPept
|
|
289
|
+
@header = 'LOCUS '
|
|
290
|
+
end
|
|
291
|
+
end
|
|
292
|
+
@delimiter_overrun = klass::DELIMITER_OVERRUN rescue nil
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# (String) delimiter indicates the end of a entry.
|
|
296
|
+
attr_accessor :delimiter
|
|
297
|
+
|
|
298
|
+
# (String) start of a entry, located on head of a line.
|
|
299
|
+
attr_accessor :header
|
|
300
|
+
|
|
301
|
+
# (Integer) excess read data size included in delimiter.
|
|
302
|
+
attr_accessor :delimiter_overrun
|
|
303
|
+
|
|
304
|
+
# Skips leader of the entry.
|
|
305
|
+
#
|
|
306
|
+
# If @header is not nil, it reads till the contents of @header
|
|
307
|
+
# comes at the head of a line.
|
|
308
|
+
# If correct FLATFILE_HEADER is found, returns true.
|
|
309
|
+
# Otherwise, returns nil.
|
|
310
|
+
def skip_leader
|
|
311
|
+
if @header then
|
|
312
|
+
data = ''
|
|
313
|
+
while s = @stream.gets(@header)
|
|
314
|
+
data << s
|
|
315
|
+
if data.split(/[\r\n]+/)[-1] == @header then
|
|
316
|
+
@stream.ungets(@header)
|
|
317
|
+
return true
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
# @header was not found. For safety,
|
|
321
|
+
# pushes back data with removing white spaces in the head.
|
|
322
|
+
data.sub(/\A\s+/, '')
|
|
323
|
+
@stream.ungets(data)
|
|
324
|
+
return nil
|
|
325
|
+
else
|
|
326
|
+
@stream.skip_spaces
|
|
327
|
+
return nil
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# gets a entry
|
|
332
|
+
def get_entry
|
|
333
|
+
p0 = @stream.pos
|
|
334
|
+
e = @stream.gets(@delimiter)
|
|
335
|
+
if e and @delimiter_overrun then
|
|
336
|
+
if e[-@delimiter.size, @delimiter.size ] == @delimiter then
|
|
337
|
+
overrun = e[-@delimiter_overrun, @delimiter_overrun]
|
|
338
|
+
e[-@delimiter_overrun, @delimiter_overrun] = ''
|
|
339
|
+
@stream.ungets(overrun)
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
p1 = @stream.pos
|
|
343
|
+
@entry_start_pos = p0
|
|
344
|
+
@entry = e
|
|
345
|
+
@entry_ended_pos = p1
|
|
346
|
+
@entry
|
|
347
|
+
end
|
|
348
|
+
end #class Defalult
|
|
349
|
+
end #module Splitter
|
|
350
|
+
|
|
351
|
+
#
|
|
352
|
+
# Bio::FlatFile.open(file, *arg)
|
|
353
|
+
# Bio::FlatFile.open(dbclass, file, *arg)
|
|
354
|
+
#
|
|
41
355
|
# Creates a new Bio::FlatFile object to read a file or a stream
|
|
42
|
-
# which contains
|
|
356
|
+
# which contains _dbclass_ data.
|
|
43
357
|
#
|
|
44
|
-
#
|
|
358
|
+
# _dbclass_ should be a class (or module) or nil.
|
|
45
359
|
# e.g. Bio::GenBank, Bio::FastaFormat.
|
|
46
360
|
#
|
|
47
|
-
# If
|
|
48
|
-
# the method opens a local file named
|
|
49
|
-
# with
|
|
361
|
+
# If _file_ is a filename (which doesn't have gets method),
|
|
362
|
+
# the method opens a local file named _file_
|
|
363
|
+
# with <code>File.open(filename, *arg)</code>.
|
|
50
364
|
#
|
|
51
|
-
# When nil is given to
|
|
52
|
-
#
|
|
53
|
-
#
|
|
54
|
-
#
|
|
55
|
-
# FlatFile#
|
|
365
|
+
# When _dbclass_ is omitted or nil is given to _dbclass_,
|
|
366
|
+
# the method tries to determine database class
|
|
367
|
+
# (file format) automatically.
|
|
368
|
+
# When it fails to determine, dbclass is set to nil
|
|
369
|
+
# and FlatFile#next_entry would fail.
|
|
370
|
+
# You can still set dbclass using FlatFile#dbclass= method.
|
|
56
371
|
#
|
|
57
372
|
# * Example 1
|
|
58
373
|
# Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq")
|
|
59
374
|
# * Example 2
|
|
60
375
|
# Bio::FlatFile.open(nil, "embl/est_hum17.dat")
|
|
61
376
|
# * Example 3
|
|
377
|
+
# Bio::FlatFile.open("genbank/gbest40.seq")
|
|
378
|
+
#
|
|
379
|
+
# * Example 4
|
|
62
380
|
# Bio::FlatFile.open(Bio::GenBank, $stdin)
|
|
63
381
|
#
|
|
64
|
-
# If it is called with block, the block will be executed with
|
|
65
|
-
# a
|
|
66
|
-
#
|
|
382
|
+
# If it is called with a block, the block will be executed with
|
|
383
|
+
# a new Bio::FlatFile object. If filename is given,
|
|
384
|
+
# the file is automatically closed when leaving the block.
|
|
67
385
|
#
|
|
68
|
-
# * Example
|
|
386
|
+
# * Example 5
|
|
69
387
|
# Bio::FlatFile.open(nil, 'test4.fst') do |ff|
|
|
70
388
|
# ff.each { |e| print e.definition, "\n" }
|
|
71
389
|
# end
|
|
72
390
|
#
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
391
|
+
# * Example 6
|
|
392
|
+
# Bio::FlatFile.open('test4.fst') do |ff|
|
|
393
|
+
# ff.each { |e| print e.definition, "\n" }
|
|
394
|
+
# end
|
|
395
|
+
#
|
|
396
|
+
# Compatibility Note:
|
|
397
|
+
# <em>*arg</em> is completely passed to the <code>File.open</code>
|
|
398
|
+
# and you cannot specify ":raw => true" or ":raw => false".
|
|
399
|
+
#
|
|
400
|
+
def self.open(*arg, &block)
|
|
401
|
+
# FlatFile.open(dbclass, file, mode, perm)
|
|
402
|
+
# FlatFile.open(file, mode, perm)
|
|
403
|
+
if arg.size <= 0
|
|
404
|
+
raise ArgumentError, 'wrong number of arguments (0 for 1)'
|
|
405
|
+
end
|
|
406
|
+
x = arg.shift
|
|
407
|
+
if x.is_a?(Module) then
|
|
408
|
+
# FlatFile.open(dbclass, filename_or_io, ...)
|
|
409
|
+
dbclass = x
|
|
410
|
+
elsif x.nil? then
|
|
411
|
+
# FlatFile.open(nil, filename_or_io, ...)
|
|
412
|
+
dbclass = nil
|
|
413
|
+
else
|
|
414
|
+
# FlatFile.open(filename, ...)
|
|
415
|
+
dbclass = nil
|
|
416
|
+
arg.unshift(x)
|
|
417
|
+
end
|
|
418
|
+
if arg.size <= 0
|
|
419
|
+
raise ArgumentError, 'wrong number of arguments (1 for 2)'
|
|
420
|
+
end
|
|
421
|
+
file = arg.shift
|
|
422
|
+
# check if file is filename or IO object
|
|
81
423
|
unless file.respond_to?(:gets)
|
|
82
424
|
# 'file' is a filename
|
|
83
|
-
|
|
84
|
-
File.open(file, *openmode) do |fobj|
|
|
85
|
-
ff = self.new(dbclass, fobj, *arg)
|
|
86
|
-
yield ff
|
|
87
|
-
end
|
|
88
|
-
else
|
|
89
|
-
fobj = File.open(file, *openmode)
|
|
90
|
-
self.new(dbclass, fobj, *arg)
|
|
91
|
-
end
|
|
425
|
+
self.open_file(file, *arg, &block)
|
|
92
426
|
else
|
|
93
427
|
# 'file' is a IO object
|
|
94
|
-
ff = self.new(dbclass, file
|
|
428
|
+
ff = self.new(dbclass, file)
|
|
95
429
|
block_given? ? (yield ff) : ff
|
|
96
430
|
end
|
|
97
431
|
end
|
|
@@ -110,7 +444,8 @@ module Bio
|
|
|
110
444
|
end
|
|
111
445
|
|
|
112
446
|
# Same as FlatFile.auto(filename_or_stream, *arg).to_a
|
|
113
|
-
#
|
|
447
|
+
#
|
|
448
|
+
# (This method might be OBSOLETED in the future.)
|
|
114
449
|
def self.to_a(*arg)
|
|
115
450
|
self.auto(*arg) do |ff|
|
|
116
451
|
raise 'cannot determine file format' unless ff.dbclass
|
|
@@ -118,6 +453,46 @@ module Bio
|
|
|
118
453
|
end
|
|
119
454
|
end
|
|
120
455
|
|
|
456
|
+
# Same as FlatFile.auto(filename, *arg),
|
|
457
|
+
# except that it only accept filename and doesn't accept IO object.
|
|
458
|
+
# File format is automatically determined.
|
|
459
|
+
#
|
|
460
|
+
# It can accept a block.
|
|
461
|
+
# If a block is given, it returns the block's return value.
|
|
462
|
+
# Otherwise, it returns a new FlatFile object.
|
|
463
|
+
#
|
|
464
|
+
def self.open_file(filename, *arg)
|
|
465
|
+
if block_given? then
|
|
466
|
+
BufferedInputStream.open_file(filename, *arg) do |stream|
|
|
467
|
+
yield self.new(nil, stream)
|
|
468
|
+
end
|
|
469
|
+
else
|
|
470
|
+
stream = BufferedInputStream.open_file(filename, *arg)
|
|
471
|
+
self.new(nil, stream)
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Opens URI specified as _uri_.
|
|
476
|
+
# _uri_ must be a String or URI object.
|
|
477
|
+
# *arg is passed to OpenURI.open_uri or URI#open.
|
|
478
|
+
#
|
|
479
|
+
# Like FlatFile#open, it can accept a block.
|
|
480
|
+
#
|
|
481
|
+
# Note that you MUST explicitly require 'open-uri'.
|
|
482
|
+
# Because open-uri.rb modifies existing class,
|
|
483
|
+
# it isn't required by default.
|
|
484
|
+
#
|
|
485
|
+
def self.open_uri(uri, *arg)
|
|
486
|
+
if block_given? then
|
|
487
|
+
BufferedInputStream.open_uri(uri, *arg) do |stream|
|
|
488
|
+
yield self.new(nil, stream)
|
|
489
|
+
end
|
|
490
|
+
else
|
|
491
|
+
stream = BufferedInputStream.open_uri(uri, *arg)
|
|
492
|
+
self.new(nil, stream)
|
|
493
|
+
end
|
|
494
|
+
end
|
|
495
|
+
|
|
121
496
|
# Same as FlatFile.open, except that 'stream' should be a opened
|
|
122
497
|
# stream object (IO, File, ..., who have the 'gets' method).
|
|
123
498
|
#
|
|
@@ -126,62 +501,101 @@ module Bio
|
|
|
126
501
|
# * Example 2
|
|
127
502
|
# Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz"))
|
|
128
503
|
#
|
|
129
|
-
#
|
|
130
|
-
#
|
|
131
|
-
#
|
|
132
|
-
# default: false (not "raw mode").
|
|
504
|
+
# Compatibility Note:
|
|
505
|
+
# Now, you cannot specify ":raw => true" or ":raw => false".
|
|
506
|
+
# Below styles are DEPRECATED.
|
|
133
507
|
#
|
|
134
|
-
# * Example 3
|
|
135
|
-
# Bio::FlatFile.new(nil, $stdin, :raw=>true)
|
|
508
|
+
# * Example 3 (deprecated)
|
|
509
|
+
# # Bio::FlatFile.new(nil, $stdin, :raw=>true) # => ERROR
|
|
510
|
+
# # Please rewrite as below.
|
|
511
|
+
# ff = Bio::FlatFile.new(nil, $stdin)
|
|
512
|
+
# ff.raw = true
|
|
136
513
|
# * Example 3 in old style (deprecated)
|
|
137
|
-
# Bio::FlatFile.new(nil, $stdin, true)
|
|
514
|
+
# # Bio::FlatFile.new(nil, $stdin, true) # => ERROR
|
|
515
|
+
# # Please rewrite as below.
|
|
516
|
+
# ff = Bio::FlatFile.new(nil, $stdin)
|
|
517
|
+
# ff.raw = true
|
|
138
518
|
#
|
|
139
|
-
def initialize(dbclass, stream
|
|
519
|
+
def initialize(dbclass, stream)
|
|
140
520
|
# 2nd arg: IO object
|
|
141
|
-
@
|
|
142
|
-
|
|
143
|
-
self.raw = false
|
|
144
|
-
if options.is_a?(Hash) then
|
|
145
|
-
self.raw = options[:raw] if options.has_key?(:raw)
|
|
521
|
+
if @stream.kind_of?(BufferedInputStream)
|
|
522
|
+
@stream = stream
|
|
146
523
|
else
|
|
147
|
-
|
|
524
|
+
@stream = BufferedInputStream.for_io(stream)
|
|
148
525
|
end
|
|
149
|
-
#
|
|
150
|
-
|
|
526
|
+
# default is raw mode
|
|
527
|
+
self.raw = false
|
|
151
528
|
# 1st arg: database class (or file format autodetection)
|
|
152
529
|
if dbclass then
|
|
153
|
-
|
|
530
|
+
self.dbclass = dbclass
|
|
154
531
|
else
|
|
155
|
-
|
|
532
|
+
autodetect
|
|
156
533
|
end
|
|
534
|
+
#
|
|
535
|
+
@skip_leader_mode = :firsttime
|
|
536
|
+
@firsttime_flag = true
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
# The mode how to skip leader of the data.
|
|
540
|
+
# :firsttime :: (DEFAULT) only head of file (= first time to read)
|
|
541
|
+
# :everytime :: everytime to read entry
|
|
542
|
+
# nil :: never skip
|
|
543
|
+
attr_accessor :skip_leader_mode
|
|
544
|
+
|
|
545
|
+
# (DEPRECATED) IO object in the flatfile object.
|
|
546
|
+
#
|
|
547
|
+
# Compatibility Note: Bio::FlatFile#io is deprecated.
|
|
548
|
+
# Please use Bio::FlatFile#to_io instead.
|
|
549
|
+
def io
|
|
550
|
+
warn "Bio::FlatFile#io is deprecated."
|
|
551
|
+
@stream.to_io
|
|
157
552
|
end
|
|
158
553
|
|
|
159
554
|
# IO object in the flatfile object.
|
|
160
|
-
|
|
555
|
+
#
|
|
556
|
+
# Compatibility Note: Bio::FlatFile#io is deprecated.
|
|
557
|
+
def to_io
|
|
558
|
+
@stream.to_io
|
|
559
|
+
end
|
|
560
|
+
|
|
561
|
+
# Pathname, filename or URI (or nil).
|
|
562
|
+
def path
|
|
563
|
+
@stream.path
|
|
564
|
+
end
|
|
161
565
|
|
|
162
566
|
# Get next entry.
|
|
163
567
|
def next_entry
|
|
164
|
-
@
|
|
165
|
-
|
|
568
|
+
if @skip_leader_mode and
|
|
569
|
+
((@firsttime_flag and @skip_leader_mode == :firsttime) or
|
|
570
|
+
@skip_leader_mode == :everytime)
|
|
571
|
+
@splitter.skip_leader
|
|
572
|
+
end
|
|
573
|
+
r = @splitter.get_entry
|
|
574
|
+
@firsttime_flag = false
|
|
575
|
+
return nil unless r
|
|
166
576
|
if raw then
|
|
167
|
-
|
|
577
|
+
r
|
|
168
578
|
else
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
s = e.entry_overrun
|
|
172
|
-
rescue NameError
|
|
173
|
-
s = nil
|
|
174
|
-
end
|
|
175
|
-
if s then
|
|
176
|
-
@entry_raw[-(s.length), s.length] = ''
|
|
177
|
-
ungets(s)
|
|
178
|
-
end
|
|
179
|
-
e
|
|
579
|
+
@entry = @dbclass.new(r)
|
|
580
|
+
@entry
|
|
180
581
|
end
|
|
181
582
|
end
|
|
583
|
+
attr_reader :entry
|
|
182
584
|
|
|
183
585
|
# Returns the last raw entry as a string.
|
|
184
|
-
|
|
586
|
+
def entry_raw
|
|
587
|
+
@splitter.entry
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
# start position of the last entry
|
|
591
|
+
def entry_start_pos
|
|
592
|
+
@splitter.entry_start_pos
|
|
593
|
+
end
|
|
594
|
+
|
|
595
|
+
# (end position of the last entry) + 1
|
|
596
|
+
def entry_ended_pos
|
|
597
|
+
@splitter.entry_ended_pos
|
|
598
|
+
end
|
|
185
599
|
|
|
186
600
|
# Iterates over each entry in the flatfile.
|
|
187
601
|
#
|
|
@@ -193,23 +607,23 @@ module Bio
|
|
|
193
607
|
# end
|
|
194
608
|
def each_entry
|
|
195
609
|
while e = self.next_entry
|
|
196
|
-
|
|
610
|
+
yield e
|
|
197
611
|
end
|
|
198
612
|
end
|
|
199
|
-
alias each each_entry
|
|
613
|
+
alias :each :each_entry
|
|
200
614
|
|
|
201
615
|
# Resets file pointer to the start of the flatfile.
|
|
202
616
|
# (similar to IO#rewind)
|
|
203
617
|
def rewind
|
|
204
|
-
r = @
|
|
205
|
-
@
|
|
618
|
+
r = @stream.rewind
|
|
619
|
+
@firsttime_flag = true
|
|
206
620
|
r
|
|
207
621
|
end
|
|
208
622
|
|
|
209
623
|
# Closes input stream.
|
|
210
624
|
# (similar to IO#close)
|
|
211
625
|
def close
|
|
212
|
-
@
|
|
626
|
+
@stream.close
|
|
213
627
|
end
|
|
214
628
|
|
|
215
629
|
# Returns current position of input stream.
|
|
@@ -217,9 +631,9 @@ module Bio
|
|
|
217
631
|
# the result is not guaranteed.
|
|
218
632
|
# It is similar to IO#pos.
|
|
219
633
|
# Note that it will not be equal to io.pos,
|
|
220
|
-
# because FlatFile
|
|
634
|
+
# because FlatFile has its own internal buffer.
|
|
221
635
|
def pos
|
|
222
|
-
@
|
|
636
|
+
@stream.pos
|
|
223
637
|
end
|
|
224
638
|
|
|
225
639
|
# (Not recommended to use it.)
|
|
@@ -228,86 +642,17 @@ module Bio
|
|
|
228
642
|
# the result is not guaranteed.
|
|
229
643
|
# It is similar to IO#pos=.
|
|
230
644
|
# Note that it will not be equal to io.pos=,
|
|
231
|
-
# because FlatFile
|
|
645
|
+
# because FlatFile has its own internal buffer.
|
|
232
646
|
def pos=(p)
|
|
233
|
-
|
|
234
|
-
@prefetch = ''
|
|
235
|
-
r
|
|
647
|
+
@stream.pos=(p)
|
|
236
648
|
end
|
|
237
649
|
|
|
238
650
|
# Returns true if input stream is end-of-file.
|
|
239
651
|
# Otherwise, returns false.
|
|
240
652
|
# (Similar to IO#eof?, but may not be equal to io.eof?,
|
|
241
|
-
# because FlatFile
|
|
653
|
+
# because FlatFile has its own internal buffer.)
|
|
242
654
|
def eof?
|
|
243
|
-
|
|
244
|
-
false
|
|
245
|
-
else
|
|
246
|
-
@io.eof?
|
|
247
|
-
end
|
|
248
|
-
end
|
|
249
|
-
|
|
250
|
-
# Similar to IO#gets.
|
|
251
|
-
# Internal use only. Users should not call it directly.
|
|
252
|
-
def gets(io_rs = $/)
|
|
253
|
-
if @prefetch.size > 0
|
|
254
|
-
if io_rs == nil then
|
|
255
|
-
r = @prefetch + @io.gets(nil).to_s
|
|
256
|
-
@prefetch = ''
|
|
257
|
-
else
|
|
258
|
-
if io_rs == '' then
|
|
259
|
-
sp_rs = /\n\n/n
|
|
260
|
-
sp_rs_orig = "\n\n"
|
|
261
|
-
else
|
|
262
|
-
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
|
263
|
-
sp_rs_orig = io_rs
|
|
264
|
-
end
|
|
265
|
-
a = @prefetch.split(sp_rs, 2)
|
|
266
|
-
if a.size > 1 then
|
|
267
|
-
r = a[0] + sp_rs_orig
|
|
268
|
-
@prefetch = a[1]
|
|
269
|
-
else
|
|
270
|
-
@prefetch << @io.gets(io_rs).to_s
|
|
271
|
-
a = @prefetch.split(sp_rs, 2)
|
|
272
|
-
if a.size > 1 then
|
|
273
|
-
r = a[0] + sp_rs_orig
|
|
274
|
-
@prefetch = a[1].to_s
|
|
275
|
-
else
|
|
276
|
-
r = @prefetch
|
|
277
|
-
@prefetch = ''
|
|
278
|
-
end
|
|
279
|
-
end
|
|
280
|
-
end
|
|
281
|
-
r
|
|
282
|
-
else
|
|
283
|
-
@io.gets(io_rs)
|
|
284
|
-
end
|
|
285
|
-
end
|
|
286
|
-
|
|
287
|
-
# Unread read data.
|
|
288
|
-
# Internal use only. Users must not call it.
|
|
289
|
-
def ungets(str)
|
|
290
|
-
@prefetch = str + @prefetch
|
|
291
|
-
nil
|
|
292
|
-
end
|
|
293
|
-
|
|
294
|
-
# Similar to IO#getc.
|
|
295
|
-
# Internal use only. Users should not call it directly.
|
|
296
|
-
def getc
|
|
297
|
-
if @prefetch.size > 0 then
|
|
298
|
-
r = @prefetch[0]
|
|
299
|
-
@prefetch = @prefetch[1..-1]
|
|
300
|
-
else
|
|
301
|
-
r = @io.getc
|
|
302
|
-
end
|
|
303
|
-
r
|
|
304
|
-
end
|
|
305
|
-
|
|
306
|
-
# Similar to IO#ungetc.
|
|
307
|
-
# Internal use only. Users should not call it.
|
|
308
|
-
def ungetc(c)
|
|
309
|
-
@prefetch = sprintf("%c", c) + @prefetch
|
|
310
|
-
nil
|
|
655
|
+
@stream.eof?
|
|
311
656
|
end
|
|
312
657
|
|
|
313
658
|
# If true is given, the next_entry method returns
|
|
@@ -319,14 +664,24 @@ module Bio
|
|
|
319
664
|
# If true, raw mode.
|
|
320
665
|
attr_reader :raw
|
|
321
666
|
|
|
667
|
+
# Similar to IO#gets.
|
|
668
|
+
# Internal use only. Users should not call it directly.
|
|
669
|
+
def gets(*arg)
|
|
670
|
+
@stream.gets(*arg)
|
|
671
|
+
end
|
|
672
|
+
|
|
322
673
|
# Sets database class. Plese use only if autodetect fails.
|
|
323
|
-
def dbclass=(
|
|
324
|
-
if
|
|
325
|
-
|
|
326
|
-
|
|
674
|
+
def dbclass=(klass)
|
|
675
|
+
if klass then
|
|
676
|
+
@dbclass = klass
|
|
677
|
+
begin
|
|
678
|
+
@splitter = @dbclass.flatfile_splitter(@dbclass, @stream)
|
|
679
|
+
rescue NameError, NoMethodError
|
|
680
|
+
@splitter = Splitter::Default.new(klass, @stream)
|
|
681
|
+
end
|
|
327
682
|
else
|
|
328
|
-
|
|
329
|
-
|
|
683
|
+
@dbclass = nil
|
|
684
|
+
@splitter = nil
|
|
330
685
|
end
|
|
331
686
|
end
|
|
332
687
|
|
|
@@ -340,157 +695,518 @@ module Bio
|
|
|
340
695
|
#
|
|
341
696
|
# The method can be called anytime if you want (but not recommended).
|
|
342
697
|
# This might be useful if input file is a mixture of muitiple format data.
|
|
343
|
-
def autodetect(lines = 31)
|
|
344
|
-
r =
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
if line and line.strip.size > 0 then
|
|
349
|
-
r = self.class.autodetect(@prefetch)
|
|
350
|
-
if r then
|
|
351
|
-
self.dbclass = r
|
|
352
|
-
return r
|
|
353
|
-
end
|
|
354
|
-
end
|
|
355
|
-
end
|
|
698
|
+
def autodetect(lines = 31, ad = AutoDetect.default)
|
|
699
|
+
if r = ad.autodetect_flatfile(self, lines)
|
|
700
|
+
self.dbclass = r
|
|
701
|
+
else
|
|
702
|
+
self.dbclass = nil unless self.dbclass
|
|
356
703
|
end
|
|
357
|
-
self.dbclass = nil unless dbclass
|
|
358
704
|
r
|
|
359
705
|
end
|
|
360
706
|
|
|
361
707
|
# Detects database class (== file format) of given file.
|
|
362
708
|
# If fails to determine, returns nil.
|
|
363
709
|
def self.autodetect_file(filename)
|
|
364
|
-
|
|
365
|
-
r = ff.dbclass
|
|
366
|
-
ff.close
|
|
367
|
-
r
|
|
710
|
+
self.open_file(filename).dbclass
|
|
368
711
|
end
|
|
369
712
|
|
|
370
713
|
# Detects database class (== file format) of given input stream.
|
|
371
714
|
# If fails to determine, returns nil.
|
|
372
715
|
# Caution: the method reads some data from the input stream,
|
|
373
716
|
# and the data will be lost.
|
|
717
|
+
def self.autodetect_io(io)
|
|
718
|
+
self.new(nil, io).dbclass
|
|
719
|
+
end
|
|
720
|
+
|
|
721
|
+
# This is OBSOLETED. Please use autodetect_io(io) instead.
|
|
374
722
|
def self.autodetect_stream(io)
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
r
|
|
723
|
+
$stderr.print "Bio::FlatFile.autodetect_stream will be deprecated." if $VERBOSE
|
|
724
|
+
self.autodetect_io(io)
|
|
378
725
|
end
|
|
379
726
|
|
|
380
727
|
# Detects database class (== file format) of given string.
|
|
381
728
|
# If fails to determine, returns false or nil.
|
|
382
729
|
def self.autodetect(text)
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
730
|
+
AutoDetect.default.autodetect(text)
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
# AutoDetect automatically determines database class of given data.
|
|
735
|
+
class AutoDetect
|
|
736
|
+
|
|
737
|
+
include TSort
|
|
738
|
+
|
|
739
|
+
# Template of a single rule of autodetection
|
|
740
|
+
class RuleTemplate
|
|
741
|
+
# Creates a new element.
|
|
742
|
+
def self.[](*arg)
|
|
743
|
+
self.new(*arg)
|
|
744
|
+
end
|
|
745
|
+
|
|
746
|
+
# Creates a new element.
|
|
747
|
+
def initialize
|
|
748
|
+
a = Array.new
|
|
749
|
+
def a.inspect
|
|
750
|
+
"[#{self.collect { |e| e.name.inspect }.join(' ')}]"
|
|
751
|
+
end
|
|
752
|
+
@higher_priority_elements = a.clone
|
|
753
|
+
@lower_priority_elements = a.clone
|
|
754
|
+
@name = nil
|
|
408
755
|
end
|
|
409
756
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
Bio::KEGG::KO
|
|
417
|
-
when /^ENTRY .+ Glycan\s*$/
|
|
418
|
-
Bio::KEGG::GLYCAN
|
|
419
|
-
when /^ENTRY .+ (CDS|gene|.*RNA) /
|
|
420
|
-
Bio::KEGG::GENES
|
|
421
|
-
when /^ENTRY EC [0-9\.]+$/
|
|
422
|
-
Bio::KEGG::ENZYME
|
|
423
|
-
when /^ENTRY C[A-Za-z0-9\._]+$/
|
|
424
|
-
Bio::KEGG::COMPOUND
|
|
425
|
-
when /^ENTRY R[A-Za-z0-9\._]+$/
|
|
426
|
-
Bio::KEGG::REACTION
|
|
427
|
-
when /^ENTRY [a-z]+$/
|
|
428
|
-
Bio::KEGG::GENOME
|
|
429
|
-
|
|
430
|
-
when /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/
|
|
431
|
-
if $1 == 'clusters'
|
|
432
|
-
Bio::FANTOM::MaXML::Cluster
|
|
433
|
-
elsif $1 == 'sequences'
|
|
434
|
-
Bio::FANTOM::MaXML::Sequence
|
|
435
|
-
else
|
|
436
|
-
nil #unknown
|
|
757
|
+
# self is prior to the _elem_.
|
|
758
|
+
def is_prior_to(elem)
|
|
759
|
+
return nil if self == elem
|
|
760
|
+
elem.higher_priority_elements << self
|
|
761
|
+
self.lower_priority_elements << elem
|
|
762
|
+
true
|
|
437
763
|
end
|
|
438
764
|
|
|
439
|
-
|
|
440
|
-
|
|
765
|
+
# higher priority elements
|
|
766
|
+
attr_reader :higher_priority_elements
|
|
767
|
+
# lower priority elements
|
|
768
|
+
attr_reader :lower_priority_elements
|
|
769
|
+
|
|
770
|
+
# database classes
|
|
771
|
+
attr_reader :dbclasses
|
|
441
772
|
|
|
442
|
-
|
|
443
|
-
|
|
773
|
+
# unique name of the element
|
|
774
|
+
attr_accessor :name
|
|
775
|
+
|
|
776
|
+
# If given text (and/or meta information) is known, returns
|
|
777
|
+
# the database class.
|
|
778
|
+
# Otherwise, returns nil or false.
|
|
779
|
+
#
|
|
780
|
+
# _text_ will be a String.
|
|
781
|
+
# _meta_ will be a Hash.
|
|
782
|
+
# _meta_ may contain following keys.
|
|
783
|
+
# :path => pathname, filename or uri.
|
|
784
|
+
def guess(text, meta)
|
|
785
|
+
nil
|
|
786
|
+
end
|
|
787
|
+
end #class Rule_Template
|
|
788
|
+
|
|
789
|
+
# RuleDebug is a class for debugging autodetect classes/methods
|
|
790
|
+
class RuleDebug < RuleTemplate
|
|
791
|
+
# Creates a new instance.
|
|
792
|
+
def initialize(name)
|
|
793
|
+
super()
|
|
794
|
+
@name = name
|
|
795
|
+
end
|
|
444
796
|
|
|
445
|
-
|
|
446
|
-
|
|
797
|
+
# prints information to the $stderr.
|
|
798
|
+
def guess(text, meta)
|
|
799
|
+
$stderr.puts @name
|
|
800
|
+
$stderr.puts text.inspect
|
|
801
|
+
$stderr.puts meta.inspect
|
|
802
|
+
nil
|
|
803
|
+
end
|
|
804
|
+
end #class RuleDebug
|
|
447
805
|
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
806
|
+
# Special element that is always top or bottom priority.
|
|
807
|
+
class RuleSpecial < RuleTemplate
|
|
808
|
+
def initialize(name)
|
|
809
|
+
#super()
|
|
810
|
+
@name = name
|
|
811
|
+
end
|
|
812
|
+
# modification of @name is inhibited.
|
|
813
|
+
def name=(x)
|
|
814
|
+
raise 'cannot modify name'
|
|
815
|
+
end
|
|
452
816
|
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
817
|
+
# always returns void array
|
|
818
|
+
def higher_priority_elements
|
|
819
|
+
[]
|
|
820
|
+
end
|
|
821
|
+
# always returns void array
|
|
822
|
+
def lower_priority_elements
|
|
823
|
+
[]
|
|
824
|
+
end
|
|
825
|
+
end #class RuleSpecial
|
|
457
826
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
827
|
+
# Special element that is always top priority.
|
|
828
|
+
TopRule = RuleSpecial.new('top')
|
|
829
|
+
# Special element that is always bottom priority.
|
|
830
|
+
BottomRule = RuleSpecial.new('bottom')
|
|
462
831
|
|
|
463
|
-
|
|
464
|
-
|
|
832
|
+
# A autodetection rule to use a regular expression
|
|
833
|
+
class RuleRegexp < RuleTemplate
|
|
834
|
+
# Creates a new instance.
|
|
835
|
+
def initialize(dbclass, re)
|
|
836
|
+
super()
|
|
837
|
+
@re = re
|
|
838
|
+
@dbclass = dbclass
|
|
839
|
+
@dbclasses = [ dbclass ]
|
|
840
|
+
@name = dbclass.to_s
|
|
841
|
+
end
|
|
465
842
|
|
|
466
|
-
|
|
467
|
-
|
|
843
|
+
# If given text matches the regexp, returns the database class.
|
|
844
|
+
# Otherwise, returns nil or false.
|
|
845
|
+
# _meta_ is ignored.
|
|
846
|
+
def guess(text, meta)
|
|
847
|
+
@re =~ text ? @dbclass : nil
|
|
848
|
+
end
|
|
849
|
+
end #class RuleRegexp
|
|
468
850
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
851
|
+
# A autodetection rule to use more than two regular expressions.
|
|
852
|
+
class RuleRegexp2 < RuleTemplate
|
|
853
|
+
# Creates a new instance.
|
|
854
|
+
def initialize(dbclass, *regexps)
|
|
855
|
+
super()
|
|
856
|
+
@regexps = regexps
|
|
857
|
+
@dbclass = dbclass
|
|
858
|
+
@dbclasses = [ dbclass ]
|
|
859
|
+
if name
|
|
860
|
+
@name = name
|
|
861
|
+
else
|
|
862
|
+
@name = @dbclass.to_s
|
|
863
|
+
end
|
|
864
|
+
end
|
|
865
|
+
|
|
866
|
+
# If given text matches the regexp, returns the database class.
|
|
867
|
+
# Otherwise, returns nil or false.
|
|
868
|
+
# _meta_ is ignored.
|
|
869
|
+
def guess(text, meta)
|
|
870
|
+
@regexps.each do |re|
|
|
871
|
+
return @dbclass if re =~ text
|
|
872
|
+
end
|
|
873
|
+
nil
|
|
874
|
+
end
|
|
875
|
+
end #class RuleRegexp
|
|
876
|
+
|
|
877
|
+
# A autodetection rule that passes data to the proc object.
|
|
878
|
+
class RuleProc < RuleTemplate
|
|
879
|
+
# Creates a new instance.
|
|
880
|
+
def initialize(*dbclasses, &proc)
|
|
881
|
+
super()
|
|
882
|
+
@proc = proc
|
|
883
|
+
@dbclasses = dbclasses
|
|
884
|
+
@name = dbclasses.collect { |x| x.to_s }.join('|')
|
|
885
|
+
end
|
|
886
|
+
|
|
887
|
+
# If given text (and/or meta information) is known, returns
|
|
888
|
+
# the database class.
|
|
889
|
+
# Otherwise, returns nil or false.
|
|
890
|
+
#
|
|
891
|
+
# Refer RuleTemplate#guess for _meta_.
|
|
892
|
+
def guess(text, meta)
|
|
893
|
+
@proc.call(text)
|
|
894
|
+
end
|
|
895
|
+
end #class RuleProc
|
|
896
|
+
|
|
897
|
+
# Creates a new Autodetect object
|
|
898
|
+
def initialize
|
|
899
|
+
# stores autodetection rules.
|
|
900
|
+
@rules = Hash.new
|
|
901
|
+
# stores elements (cache)
|
|
902
|
+
@elements = nil
|
|
903
|
+
self.add(TopRule)
|
|
904
|
+
self.add(BottomRule)
|
|
905
|
+
end
|
|
906
|
+
|
|
907
|
+
# Adds a new element.
|
|
908
|
+
# Returns _elem_.
|
|
909
|
+
def add(elem)
|
|
910
|
+
raise 'element name conflicts' if @rules[elem.name]
|
|
911
|
+
@elements = nil
|
|
912
|
+
@rules[elem.name] = elem
|
|
913
|
+
elem
|
|
914
|
+
end
|
|
915
|
+
|
|
916
|
+
# (required by TSort.)
|
|
917
|
+
# For all elements, yields each element.
|
|
918
|
+
def tsort_each_node(&x)
|
|
919
|
+
@rules.each_value(&x)
|
|
920
|
+
end
|
|
921
|
+
|
|
922
|
+
# (required by TSort.)
|
|
923
|
+
# For a given element, yields each child
|
|
924
|
+
# (= lower priority elements) of the element.
|
|
925
|
+
def tsort_each_child(elem)
|
|
926
|
+
if elem == TopRule then
|
|
927
|
+
@rules.each_value do |e|
|
|
928
|
+
yield e unless e == TopRule or
|
|
929
|
+
e.lower_priority_elements.index(TopRule)
|
|
930
|
+
end
|
|
931
|
+
elsif elem == BottomRule then
|
|
932
|
+
@rules.each_value do |e|
|
|
933
|
+
yield e if e.higher_priority_elements.index(BottomRule)
|
|
934
|
+
end
|
|
476
935
|
else
|
|
477
|
-
|
|
936
|
+
elem.lower_priority_elements.each do |e|
|
|
937
|
+
yield e if e != BottomRule
|
|
938
|
+
end
|
|
939
|
+
unless elem.higher_priority_elements.index(BottomRule)
|
|
940
|
+
yield BottomRule
|
|
941
|
+
end
|
|
478
942
|
end
|
|
943
|
+
end
|
|
479
944
|
|
|
480
|
-
|
|
481
|
-
|
|
945
|
+
# Returns current elements as an array
|
|
946
|
+
# whose order fulfills all elements' priorities.
|
|
947
|
+
def elements
|
|
948
|
+
unless @elements
|
|
949
|
+
ary = tsort
|
|
950
|
+
ary.reverse!
|
|
951
|
+
@elements = ary
|
|
952
|
+
end
|
|
953
|
+
@elements
|
|
954
|
+
end
|
|
955
|
+
|
|
956
|
+
# rebuilds the object and clears internal cache.
|
|
957
|
+
def rehash
|
|
958
|
+
@rules.rehash
|
|
959
|
+
@elements = nil
|
|
960
|
+
end
|
|
961
|
+
|
|
962
|
+
# visualizes the object (mainly for debug)
|
|
963
|
+
def inspect
|
|
964
|
+
"<#{self.class.to_s} " +
|
|
965
|
+
self.elements.collect { |e| e.name.inspect }.join(' ') +
|
|
966
|
+
">"
|
|
967
|
+
end
|
|
968
|
+
|
|
969
|
+
# Iterates over each element.
|
|
970
|
+
def each_rule(&x) #:yields: elem
|
|
971
|
+
elements.each(&x)
|
|
482
972
|
end
|
|
483
|
-
end
|
|
484
973
|
|
|
974
|
+
# Autodetect from the text.
|
|
975
|
+
# Returns a database class if succeeded.
|
|
976
|
+
# Returns nil if failed.
|
|
977
|
+
def autodetect(text, meta = {})
|
|
978
|
+
r = nil
|
|
979
|
+
elements.each do |e|
|
|
980
|
+
#$stderr.puts e.name
|
|
981
|
+
r = e.guess(text, meta)
|
|
982
|
+
break if r
|
|
983
|
+
end
|
|
984
|
+
r
|
|
985
|
+
end
|
|
986
|
+
|
|
987
|
+
# autodetect from the FlatFile object.
|
|
988
|
+
# Returns a database class if succeeded.
|
|
989
|
+
# Returns nil if failed.
|
|
990
|
+
def autodetect_flatfile(ff, lines = 31)
|
|
991
|
+
meta = {}
|
|
992
|
+
stream = ff.instance_eval { @stream }
|
|
993
|
+
begin
|
|
994
|
+
path = stream.path
|
|
995
|
+
rescue NameError
|
|
996
|
+
end
|
|
997
|
+
if path then
|
|
998
|
+
meta[:path] = path
|
|
999
|
+
# call autodetect onece with meta and without any read action
|
|
1000
|
+
if r = self.autodetect(stream.prefetch_buffer, meta)
|
|
1001
|
+
return r
|
|
1002
|
+
end
|
|
1003
|
+
end
|
|
1004
|
+
# reading stream
|
|
1005
|
+
1.upto(lines) do |x|
|
|
1006
|
+
break unless line = stream.prefetch_gets
|
|
1007
|
+
if line.strip.size > 0 then
|
|
1008
|
+
if r = self.autodetect(stream.prefetch_buffer, meta)
|
|
1009
|
+
return r
|
|
1010
|
+
end
|
|
1011
|
+
end
|
|
1012
|
+
end
|
|
1013
|
+
return nil
|
|
1014
|
+
end
|
|
1015
|
+
|
|
1016
|
+
# default autodetect object for class method
|
|
1017
|
+
@default = nil
|
|
1018
|
+
|
|
1019
|
+
# returns the default autodetect object
|
|
1020
|
+
def self.default
|
|
1021
|
+
unless @default then
|
|
1022
|
+
@default = self.make_default
|
|
1023
|
+
end
|
|
1024
|
+
@default
|
|
1025
|
+
end
|
|
1026
|
+
|
|
1027
|
+
# sets the default autodetect object.
|
|
1028
|
+
def self.default=(ad)
|
|
1029
|
+
@default = ad
|
|
1030
|
+
end
|
|
1031
|
+
|
|
1032
|
+
# make a new autodetect object
|
|
1033
|
+
def self.[](*arg)
|
|
1034
|
+
a = self.new
|
|
1035
|
+
arg.each { |e| a.add(e) }
|
|
1036
|
+
a
|
|
1037
|
+
end
|
|
1038
|
+
|
|
1039
|
+
# make a default of default autodetect object
|
|
1040
|
+
def self.make_default
|
|
1041
|
+
a = self[
|
|
1042
|
+
genbank = RuleRegexp[ Bio::GenBank,
|
|
1043
|
+
/^LOCUS .+ bp .*[a-z]*[DR]?NA/ ],
|
|
1044
|
+
genpept = RuleRegexp[ Bio::GenPept,
|
|
1045
|
+
/^LOCUS .+ aa .+/ ],
|
|
1046
|
+
medline = RuleRegexp[ Bio::MEDLINE,
|
|
1047
|
+
/^UI \- [0-9]+$/ ],
|
|
1048
|
+
embl = RuleRegexp[ Bio::EMBL,
|
|
1049
|
+
/^ID .+\; .*(DNA|RNA|XXX)\;/ ],
|
|
1050
|
+
sptr = RuleRegexp[ Bio::SPTR,
|
|
1051
|
+
/^ID .+\; *PRT\;/ ],
|
|
1052
|
+
prosite = RuleRegexp[ Bio::PROSITE,
|
|
1053
|
+
/^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/ ],
|
|
1054
|
+
transfac = RuleRegexp[ Bio::TRANSFAC,
|
|
1055
|
+
/^AC [-A-Za-z0-9_\.]+$/ ],
|
|
1056
|
+
|
|
1057
|
+
aaindex = RuleProc.new(Bio::AAindex1, Bio::AAindex2) do |text|
|
|
1058
|
+
if /^H [-A-Z0-9_\.]+$/ =~ text then
|
|
1059
|
+
if text =~ /^M [rc]/ then
|
|
1060
|
+
Bio::AAindex2
|
|
1061
|
+
elsif text =~ /^I A\/L/ then
|
|
1062
|
+
Bio::AAindex1
|
|
1063
|
+
else
|
|
1064
|
+
false #fail to determine
|
|
1065
|
+
end
|
|
1066
|
+
else
|
|
1067
|
+
nil
|
|
1068
|
+
end
|
|
1069
|
+
end,
|
|
1070
|
+
|
|
1071
|
+
litdb = RuleRegexp[ Bio::LITDB,
|
|
1072
|
+
/^CODE [0-9]+$/ ],
|
|
1073
|
+
brite = RuleRegexp[ Bio::KEGG::BRITE,
|
|
1074
|
+
/^Entry [A-Z0-9]+/ ],
|
|
1075
|
+
ko = RuleRegexp[ Bio::KEGG::KO,
|
|
1076
|
+
/^ENTRY .+ KO\s*/ ],
|
|
1077
|
+
glycan = RuleRegexp[ Bio::KEGG::GLYCAN,
|
|
1078
|
+
/^ENTRY .+ Glycan\s*/ ],
|
|
1079
|
+
enzyme = RuleRegexp2[ Bio::KEGG::ENZYME,
|
|
1080
|
+
/^ENTRY EC [0-9\.]+$/,
|
|
1081
|
+
/^ENTRY .+ Enzyme\s*/
|
|
1082
|
+
],
|
|
1083
|
+
compound = RuleRegexp2[ Bio::KEGG::COMPOUND,
|
|
1084
|
+
/^ENTRY C[A-Za-z0-9\._]+$/,
|
|
1085
|
+
/^ENTRY .+ Compound\s*/
|
|
1086
|
+
],
|
|
1087
|
+
reaction = RuleRegexp2[ Bio::KEGG::REACTION,
|
|
1088
|
+
/^ENTRY R[A-Za-z0-9\._]+$/,
|
|
1089
|
+
/^ENTRY .+ Reaction\s*/
|
|
1090
|
+
],
|
|
1091
|
+
genes = RuleRegexp[ Bio::KEGG::GENES,
|
|
1092
|
+
/^ENTRY .+ (CDS|gene|.*RNA) / ],
|
|
1093
|
+
genome = RuleRegexp[ Bio::KEGG::GENOME,
|
|
1094
|
+
/^ENTRY [a-z]+$/ ],
|
|
1095
|
+
|
|
1096
|
+
fantom = RuleProc.new(Bio::FANTOM::MaXML::Cluster,
|
|
1097
|
+
Bio::FANTOM::MaXML::Sequence) do |text|
|
|
1098
|
+
if /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/ =~ text
|
|
1099
|
+
case $1
|
|
1100
|
+
when 'clusters'
|
|
1101
|
+
Bio::FANTOM::MaXML::Cluster
|
|
1102
|
+
when 'sequences'
|
|
1103
|
+
Bio::FANTOM::MaXML::Sequence
|
|
1104
|
+
else
|
|
1105
|
+
nil #unknown
|
|
1106
|
+
end
|
|
1107
|
+
else
|
|
1108
|
+
nil
|
|
1109
|
+
end
|
|
1110
|
+
end,
|
|
1111
|
+
|
|
1112
|
+
pdb = RuleRegexp[ Bio::PDB,
|
|
1113
|
+
/^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/ ],
|
|
1114
|
+
het = RuleRegexp[ Bio::PDB::ChemicalComponent,
|
|
1115
|
+
/^RESIDUE +.+ +\d+\s*$/ ],
|
|
1116
|
+
|
|
1117
|
+
clustal = RuleRegexp[ Bio::ClustalW::Report,
|
|
1118
|
+
/^CLUSTAL .*\(.*\).*sequence +alignment/ ],
|
|
1119
|
+
|
|
1120
|
+
blastxml = RuleRegexp[ Bio::Blast::Report,
|
|
1121
|
+
/\<\!DOCTYPE BlastOutput PUBLIC / ],
|
|
1122
|
+
wublast = RuleRegexp[ Bio::Blast::WU::Report,
|
|
1123
|
+
/^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
1124
|
+
wutblast = RuleRegexp[ Bio::Blast::WU::Report_TBlast,
|
|
1125
|
+
/^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/ ],
|
|
1126
|
+
blast = RuleRegexp[ Bio::Blast::Default::Report,
|
|
1127
|
+
/^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
1128
|
+
tblast = RuleRegexp[ Bio::Blast::Default::Report_TBlast,
|
|
1129
|
+
/^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/ ],
|
|
1130
|
+
|
|
1131
|
+
blat = RuleRegexp[ Bio::Blat::Report,
|
|
1132
|
+
/^psLayout version \d+\s*$/ ],
|
|
1133
|
+
spidey = RuleRegexp[ Bio::Spidey::Report,
|
|
1134
|
+
/^\-\-SPIDEY version .+\-\-$/ ],
|
|
1135
|
+
hmmer = RuleRegexp[ Bio::HMMER::Report,
|
|
1136
|
+
/^HMMER +\d+\./ ],
|
|
1137
|
+
sim4 = RuleRegexp[ Bio::Sim4::Report,
|
|
1138
|
+
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
|
1139
|
+
|
|
1140
|
+
fastaformat = RuleProc.new(Bio::FastaFormat,
|
|
1141
|
+
Bio::NBRF,
|
|
1142
|
+
Bio::FastaNumericFormat) do |text|
|
|
1143
|
+
if /^>.+$/ =~ text
|
|
1144
|
+
case text
|
|
1145
|
+
when /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/
|
|
1146
|
+
Bio::NBRF
|
|
1147
|
+
when /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/
|
|
1148
|
+
Bio::FastaFormat
|
|
1149
|
+
when /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/
|
|
1150
|
+
Bio::FastaNumericFormat
|
|
1151
|
+
else
|
|
1152
|
+
false
|
|
1153
|
+
end
|
|
1154
|
+
else
|
|
1155
|
+
nil
|
|
1156
|
+
end
|
|
1157
|
+
end
|
|
1158
|
+
]
|
|
1159
|
+
|
|
1160
|
+
# dependencies
|
|
1161
|
+
# NCBI
|
|
1162
|
+
genbank.is_prior_to genpept
|
|
1163
|
+
# EMBL/UniProt
|
|
1164
|
+
embl.is_prior_to sptr
|
|
1165
|
+
sptr.is_prior_to prosite
|
|
1166
|
+
prosite.is_prior_to transfac
|
|
1167
|
+
# KEGG
|
|
1168
|
+
#aaindex.is_prior_to litdb
|
|
1169
|
+
#litdb.is_prior_to brite
|
|
1170
|
+
brite.is_prior_to ko
|
|
1171
|
+
ko.is_prior_to glycan
|
|
1172
|
+
glycan.is_prior_to enzyme
|
|
1173
|
+
enzyme.is_prior_to compound
|
|
1174
|
+
compound.is_prior_to reaction
|
|
1175
|
+
reaction.is_prior_to genes
|
|
1176
|
+
genes.is_prior_to genome
|
|
1177
|
+
# PDB
|
|
1178
|
+
pdb.is_prior_to het
|
|
1179
|
+
# BLAST
|
|
1180
|
+
wublast.is_prior_to wutblast
|
|
1181
|
+
wutblast.is_prior_to blast
|
|
1182
|
+
blast.is_prior_to tblast
|
|
1183
|
+
# FastaFormat
|
|
1184
|
+
BottomRule.is_prior_to(fastaformat)
|
|
1185
|
+
|
|
1186
|
+
# for debug
|
|
1187
|
+
#debug_first = RuleDebug.new('debug_first')
|
|
1188
|
+
#a.add(debug_first)
|
|
1189
|
+
#debug_first.is_prior_to(TopRule)
|
|
1190
|
+
|
|
1191
|
+
## for debug
|
|
1192
|
+
#debug_last = RuleDebug.new('debug_last')
|
|
1193
|
+
#a.add(debug_last)
|
|
1194
|
+
#BottomRule.is_prior_to(debug_last)
|
|
1195
|
+
#fastaformat.is_prior_to(debug_last)
|
|
1196
|
+
|
|
1197
|
+
a.rehash
|
|
1198
|
+
return a
|
|
1199
|
+
end
|
|
1200
|
+
|
|
1201
|
+
end #class AutoDetect
|
|
1202
|
+
|
|
485
1203
|
end #class FlatFile
|
|
486
1204
|
|
|
487
1205
|
end #module Bio
|
|
488
1206
|
|
|
489
|
-
|
|
490
1207
|
if __FILE__ == $0
|
|
491
1208
|
if ARGV.size == 2
|
|
492
1209
|
require 'bio'
|
|
493
1210
|
p Bio::FlatFile.open(eval(ARGV.shift), ARGV.shift).next_entry
|
|
494
1211
|
end
|
|
495
1212
|
end
|
|
496
|
-
|