bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/io/fastacmd.rb - NCBI fastacmd wrapper class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2005 Shuji SHIGENOBU <shige@nibb.ac.jp>
|
|
5
|
+
# Copyright (C) 2005 Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
#
|
|
7
|
+
# This library is free software; you can redistribute it and/or
|
|
8
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
9
|
+
# License as published by the Free Software Foundation; either
|
|
10
|
+
# version 2 of the License, or (at your option) any later version.
|
|
11
|
+
#
|
|
12
|
+
# This library is distributed in the hope that it will be useful,
|
|
13
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15
|
+
# Lesser General Public License for more details.
|
|
16
|
+
#
|
|
17
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
18
|
+
# License along with this library; if not, write to the Free Software
|
|
19
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
20
|
+
#
|
|
21
|
+
# $Id: fastacmd.rb,v 1.8 2005/09/26 13:00:08 k Exp $
|
|
22
|
+
#
|
|
23
|
+
|
|
24
|
+
require 'bio/db/fasta'
|
|
25
|
+
require 'bio/io/flatfile'
|
|
26
|
+
require 'bio/command'
|
|
27
|
+
|
|
28
|
+
module Bio
|
|
29
|
+
class Blast
|
|
30
|
+
|
|
31
|
+
class Fastacmd
|
|
32
|
+
|
|
33
|
+
include Enumerable
|
|
34
|
+
include Bio::Command::Tools
|
|
35
|
+
|
|
36
|
+
def initialize(db)
|
|
37
|
+
@database = db
|
|
38
|
+
@fastacmd = 'fastacmd'
|
|
39
|
+
end
|
|
40
|
+
attr_accessor :database, :fastacmd, :errorlog
|
|
41
|
+
|
|
42
|
+
# get an entry_id and returns a Bio::FastaFormat object
|
|
43
|
+
def get_by_id(entry_id)
|
|
44
|
+
fetch(entry_id).shift
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# get one or more entry_id and returns an Array of Bio::FastaFormat objects
|
|
48
|
+
def fetch(list)
|
|
49
|
+
if list.respond_to?(:join)
|
|
50
|
+
entry_id = list.join(",")
|
|
51
|
+
else
|
|
52
|
+
entry_id = list
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
cmd = [ @fastacmd, '-d', @database, '-s', entry_id ]
|
|
56
|
+
call_command_local(cmd) do |inn, out|
|
|
57
|
+
inn.close_write
|
|
58
|
+
Bio::FlatFile.new(Bio::FastaFormat, out).to_a
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def each_entry
|
|
63
|
+
cmd = [ @fastacmd, '-d', @database, '-D', 'T' ]
|
|
64
|
+
call_command_local(cmd) do |inn, out|
|
|
65
|
+
inn.close_write
|
|
66
|
+
Bio::FlatFile.open(Bio::FastaFormat, out) do |f|
|
|
67
|
+
f.each_entry do |e|
|
|
68
|
+
yield e
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
self
|
|
73
|
+
end
|
|
74
|
+
alias each each_entry
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
if __FILE__ == $0
|
|
83
|
+
|
|
84
|
+
database = ARGV.shift || "/db/myblastdb"
|
|
85
|
+
entry_id = ARGV.shift || "sp:128U_DROME"
|
|
86
|
+
ent_list = ["sp:1433_SPIOL", "sp:1432_MAIZE"]
|
|
87
|
+
|
|
88
|
+
fastacmd = Bio::Blast::Fastacmd.new(database)
|
|
89
|
+
|
|
90
|
+
### Retrieve one sequence
|
|
91
|
+
entry = fastacmd.get_by_id(entry_id)
|
|
92
|
+
|
|
93
|
+
# Fastacmd#get_by_id(entry_id) returns a Bio::FastaFormat object.
|
|
94
|
+
p entry
|
|
95
|
+
|
|
96
|
+
# Bio::FastaFormat becomes a fasta format string when printed by puts.
|
|
97
|
+
puts entry
|
|
98
|
+
|
|
99
|
+
# Fastacmd#fetch(entry_id) returns an Array of a Bio::FastaFormat
|
|
100
|
+
# object even when the result is a single entry.
|
|
101
|
+
p fastacmd.fetch(entry_id)
|
|
102
|
+
|
|
103
|
+
### Retrieve more sequences
|
|
104
|
+
|
|
105
|
+
# Fastacmd#fetch method also accepts a list of entry_id and returns
|
|
106
|
+
# an Array of Bio::FastaFormat objects.
|
|
107
|
+
p fastacmd.fetch(ent_list)
|
|
108
|
+
|
|
109
|
+
# So, you can iterate on the results.
|
|
110
|
+
fastacmd.fetch(ent_list).each do |fasta|
|
|
111
|
+
puts fasta
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
### Iterates on all entries
|
|
116
|
+
|
|
117
|
+
# You can also iterate on all sequences in the database!
|
|
118
|
+
fastacmd.each do |fasta|
|
|
119
|
+
p [ fasta.definition[0..30], fasta.seq.size ]
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
end
|
|
123
|
+
|
data/lib/bio/io/fetch.rb
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/io/biofetch.rb - BioFetch access module
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2002, 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: fetch.rb,v 1.4 2005/12/18 15:58:42 k Exp $
|
|
9
|
+
#
|
|
10
|
+
#--
|
|
11
|
+
#
|
|
12
|
+
# This library is free software; you can redistribute it and/or
|
|
13
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
14
|
+
# License as published by the Free Software Foundation; either
|
|
15
|
+
# version 2 of the License, or (at your option) any later version.
|
|
16
|
+
#
|
|
17
|
+
# This library is distributed in the hope that it will be useful,
|
|
18
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20
|
+
# Lesser General Public License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
23
|
+
# License along with this library; if not, write to the Free Software
|
|
24
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
25
|
+
#
|
|
26
|
+
#++
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
require 'uri'
|
|
30
|
+
require 'net/http'
|
|
31
|
+
|
|
32
|
+
module Bio
|
|
33
|
+
|
|
34
|
+
class Fetch
|
|
35
|
+
|
|
36
|
+
# Create a new Bio::Fetch server object.
|
|
37
|
+
# Use Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch') to connect
|
|
38
|
+
# to EBI BioFetch server.
|
|
39
|
+
def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
|
|
40
|
+
schema, user, @host, @port, reg, @path, = URI.split(url)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Set default database to dbname (prepare for get_by_id).
|
|
44
|
+
attr_accessor :database
|
|
45
|
+
|
|
46
|
+
# Get raw database entry by id (mainly used by Bio::Registry).
|
|
47
|
+
def get_by_id(id)
|
|
48
|
+
fetch(@database, id)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Fetch a database entry as specified by database (db), entry id (id),
|
|
52
|
+
# 'raw' text or 'html' (style), and format. When using BioRuby's
|
|
53
|
+
# BioFetch server, value for the format should not be set.
|
|
54
|
+
def fetch(db, id, style = 'raw', format = nil)
|
|
55
|
+
data = [ "db=#{db}", "id=#{id}", "style=#{style}" ]
|
|
56
|
+
data.push("format=#{format}") if format
|
|
57
|
+
data = data.join('&')
|
|
58
|
+
|
|
59
|
+
responce, result = Net::HTTP.new(@host, @port).post(@path, data)
|
|
60
|
+
return result
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Short cut for using BioRuby's BioFetch server. You can fetch an entry
|
|
64
|
+
# without creating instance of BioFetch server.
|
|
65
|
+
def self.query(*args)
|
|
66
|
+
self.new.fetch(*args)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# What databases are available?
|
|
70
|
+
def databases
|
|
71
|
+
query = "info=dbs"
|
|
72
|
+
responce, result = Net::HTTP.new(@host, @port).post(@path, query)
|
|
73
|
+
return result
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# What formats does the database X have?
|
|
77
|
+
def formats(database = @database)
|
|
78
|
+
if database
|
|
79
|
+
query = "info=formats;db=#{database}"
|
|
80
|
+
responce, result = Net::HTTP.new(@host, @port).post(@path, query)
|
|
81
|
+
return result
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# How many entries can be retrieved simultaneously?
|
|
86
|
+
def maxids
|
|
87
|
+
query = "info=maxids"
|
|
88
|
+
responce, result = Net::HTTP.new(@host, @port).post(@path, query)
|
|
89
|
+
return result
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
end # module Bio
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
if __FILE__ == $0
|
|
99
|
+
|
|
100
|
+
# bfserv = Bio::Fetch.new('http://www.ebi.ac.uk:80/cgi-bin/dbfetch')
|
|
101
|
+
bfserv = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
|
|
102
|
+
puts "# test 1"
|
|
103
|
+
puts bfserv.fetch('embl', 'J00231', 'raw')
|
|
104
|
+
puts "# test 2"
|
|
105
|
+
puts bfserv.fetch('embl', 'J00231', 'html')
|
|
106
|
+
|
|
107
|
+
puts "# test 3"
|
|
108
|
+
puts Bio::Fetch.query('genbank', 'J00231')
|
|
109
|
+
puts "# test 4"
|
|
110
|
+
puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
|
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/io/flatfile.rb - flatfile access wrapper class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2001, 2002 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#--
|
|
8
|
+
# This library is free software; you can redistribute it and/or
|
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
+
# License as published by the Free Software Foundation; either
|
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
+
# Lesser General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
+
# License along with this library; if not, write to the Free Software
|
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
#++
|
|
22
|
+
#
|
|
23
|
+
# $Id: flatfile.rb,v 1.41 2005/11/01 15:34:45 ngoto Exp $
|
|
24
|
+
#
|
|
25
|
+
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
|
26
|
+
# It acts like a IO object.
|
|
27
|
+
# It can automatically detect data format, and users do not need to tell
|
|
28
|
+
# the class what the data is.
|
|
29
|
+
#
|
|
30
|
+
|
|
31
|
+
module Bio
|
|
32
|
+
|
|
33
|
+
# Bio::FlatFile is a helper and wrapper class to read a biological data file.
|
|
34
|
+
# It acts like a IO object.
|
|
35
|
+
# It can automatically detect data format, and users do not need to tell
|
|
36
|
+
# the class what the data is.
|
|
37
|
+
class FlatFile
|
|
38
|
+
|
|
39
|
+
include Enumerable
|
|
40
|
+
|
|
41
|
+
# Creates a new Bio::FlatFile object to read a file or a stream
|
|
42
|
+
# which contains +dbclass+ data.
|
|
43
|
+
#
|
|
44
|
+
# +dbclass+ should be a class (or module) or nil.
|
|
45
|
+
# e.g. Bio::GenBank, Bio::FastaFormat.
|
|
46
|
+
#
|
|
47
|
+
# If +file+ is a filename (which doesn't have gets method),
|
|
48
|
+
# the method opens a local file named +file+
|
|
49
|
+
# with 'File.open(filename, mode, perm)'.
|
|
50
|
+
#
|
|
51
|
+
# When nil is given to dbclass, trying to determine database class
|
|
52
|
+
# (file format) automatically. If fails to determine, dbclass is
|
|
53
|
+
# set to nil and FlatFile#next_entry works same as IO#gets when
|
|
54
|
+
# raw = true. It is recommended to set dbclass using
|
|
55
|
+
# FlatFile#dbclass= method if fails to determine automatically.
|
|
56
|
+
#
|
|
57
|
+
# * Example 1
|
|
58
|
+
# Bio::FlatFile.open(Bio::GenBank, "genbank/gbest40.seq")
|
|
59
|
+
# * Example 2
|
|
60
|
+
# Bio::FlatFile.open(nil, "embl/est_hum17.dat")
|
|
61
|
+
# * Example 3
|
|
62
|
+
# Bio::FlatFile.open(Bio::GenBank, $stdin)
|
|
63
|
+
#
|
|
64
|
+
# If it is called with block, the block will be executed with
|
|
65
|
+
# a newly opened Bio::FlatFile instance object. If filename
|
|
66
|
+
# is given, the file is automatically closed when leaving the block.
|
|
67
|
+
#
|
|
68
|
+
# * Example 4
|
|
69
|
+
# Bio::FlatFile.open(nil, 'test4.fst') do |ff|
|
|
70
|
+
# ff.each { |e| print e.definition, "\n" }
|
|
71
|
+
# end
|
|
72
|
+
#
|
|
73
|
+
def self.open(dbclass, file, *arg)
|
|
74
|
+
# 3rd and 4th arg: mode, perm (passed to File.open)
|
|
75
|
+
openmode = []
|
|
76
|
+
while x = arg[0] and !x.is_a?(Hash)
|
|
77
|
+
openmode << arg.shift
|
|
78
|
+
end
|
|
79
|
+
# rest of arg: passed to FlatFile.new
|
|
80
|
+
# create a flatfile object
|
|
81
|
+
unless file.respond_to?(:gets)
|
|
82
|
+
# 'file' is a filename
|
|
83
|
+
if block_given? then
|
|
84
|
+
File.open(file, *openmode) do |fobj|
|
|
85
|
+
ff = self.new(dbclass, fobj, *arg)
|
|
86
|
+
yield ff
|
|
87
|
+
end
|
|
88
|
+
else
|
|
89
|
+
fobj = File.open(file, *openmode)
|
|
90
|
+
self.new(dbclass, fobj, *arg)
|
|
91
|
+
end
|
|
92
|
+
else
|
|
93
|
+
# 'file' is a IO object
|
|
94
|
+
ff = self.new(dbclass, file, *arg)
|
|
95
|
+
block_given? ? (yield ff) : ff
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Same as Bio::FlatFile.open(nil, filename_or_stream, mode, perm, options).
|
|
100
|
+
#
|
|
101
|
+
# * Example 1
|
|
102
|
+
# Bio::FlatFile.auto(ARGF)
|
|
103
|
+
# * Example 2
|
|
104
|
+
# Bio::FlatFile.auto("embl/est_hum17.dat")
|
|
105
|
+
# * Example 3
|
|
106
|
+
# Bio::FlatFile.auto(IO.popen("gzip -dc nc1101.flat.gz"))
|
|
107
|
+
#
|
|
108
|
+
def self.auto(*arg, &block)
|
|
109
|
+
self.open(nil, *arg, &block)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Same as FlatFile.auto(filename_or_stream, *arg).to_a
|
|
113
|
+
# (It might be OBSOLETED in the future.)
|
|
114
|
+
def self.to_a(*arg)
|
|
115
|
+
self.auto(*arg) do |ff|
|
|
116
|
+
raise 'cannot determine file format' unless ff.dbclass
|
|
117
|
+
ff.to_a
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Same as FlatFile.open, except that 'stream' should be a opened
|
|
122
|
+
# stream object (IO, File, ..., who have the 'gets' method).
|
|
123
|
+
#
|
|
124
|
+
# * Example 1
|
|
125
|
+
# Bio::FlatFile.new(Bio::GenBank, ARGF)
|
|
126
|
+
# * Example 2
|
|
127
|
+
# Bio::FlatFile.new(Bio::GenBank, IO.popen("gzip -dc nc1101.flat.gz"))
|
|
128
|
+
#
|
|
129
|
+
# +options+ should be a hash (or nil). It will be OBSOLETED!!
|
|
130
|
+
# Available options are below:
|
|
131
|
+
# [<tt>:raw</tt>] if true, "raw mode" (same as #raw=true).
|
|
132
|
+
# default: false (not "raw mode").
|
|
133
|
+
#
|
|
134
|
+
# * Example 3
|
|
135
|
+
# Bio::FlatFile.new(nil, $stdin, :raw=>true)
|
|
136
|
+
# * Example 3 in old style (deprecated)
|
|
137
|
+
# Bio::FlatFile.new(nil, $stdin, true)
|
|
138
|
+
#
|
|
139
|
+
def initialize(dbclass, stream, options = nil)
|
|
140
|
+
# 2nd arg: IO object
|
|
141
|
+
@io = stream
|
|
142
|
+
# 3rd arg: options (nil or a Hash)
|
|
143
|
+
self.raw = false
|
|
144
|
+
if options.is_a?(Hash) then
|
|
145
|
+
self.raw = options[:raw] if options.has_key?(:raw)
|
|
146
|
+
else
|
|
147
|
+
self.raw = options
|
|
148
|
+
end
|
|
149
|
+
# initialize prefetch buffer
|
|
150
|
+
@prefetch = ''
|
|
151
|
+
# 1st arg: database class (or file format autodetection)
|
|
152
|
+
if dbclass then
|
|
153
|
+
self.dbclass = dbclass
|
|
154
|
+
else
|
|
155
|
+
autodetect
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# IO object in the flatfile object.
|
|
160
|
+
attr_reader :io
|
|
161
|
+
|
|
162
|
+
# Get next entry.
|
|
163
|
+
def next_entry
|
|
164
|
+
@entry_raw = gets(@rs)
|
|
165
|
+
return nil unless @entry_raw
|
|
166
|
+
if raw then
|
|
167
|
+
@entry_raw
|
|
168
|
+
else
|
|
169
|
+
e = @dbclass.new(@entry_raw)
|
|
170
|
+
begin
|
|
171
|
+
s = e.entry_overrun
|
|
172
|
+
rescue NameError
|
|
173
|
+
s = nil
|
|
174
|
+
end
|
|
175
|
+
if s then
|
|
176
|
+
@entry_raw[-(s.length), s.length] = ''
|
|
177
|
+
ungets(s)
|
|
178
|
+
end
|
|
179
|
+
e
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Returns the last raw entry as a string.
|
|
184
|
+
attr_reader :entry_raw
|
|
185
|
+
|
|
186
|
+
# Iterates over each entry in the flatfile.
|
|
187
|
+
#
|
|
188
|
+
# * Example
|
|
189
|
+
# include Bio
|
|
190
|
+
# ff = FlatFile.open(GenBank, "genbank/gbhtg14.seq")
|
|
191
|
+
# ff.each_entry do |x|
|
|
192
|
+
# puts x.definition
|
|
193
|
+
# end
|
|
194
|
+
def each_entry
|
|
195
|
+
while e = self.next_entry
|
|
196
|
+
yield e
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
alias each each_entry
|
|
200
|
+
|
|
201
|
+
# Resets file pointer to the start of the flatfile.
|
|
202
|
+
# (similar to IO#rewind)
|
|
203
|
+
def rewind
|
|
204
|
+
r = @io.rewind
|
|
205
|
+
@prefetch = ''
|
|
206
|
+
r
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Closes input stream.
|
|
210
|
+
# (similar to IO#close)
|
|
211
|
+
def close
|
|
212
|
+
@io.close
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Returns current position of input stream.
|
|
216
|
+
# If the input stream is not a normal file,
|
|
217
|
+
# the result is not guaranteed.
|
|
218
|
+
# It is similar to IO#pos.
|
|
219
|
+
# Note that it will not be equal to io.pos,
|
|
220
|
+
# because FlatFile#autodetect may pre-read some lines.
|
|
221
|
+
def pos
|
|
222
|
+
@io.pos - @prefetch.size
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
# (Not recommended to use it.)
|
|
226
|
+
# Sets position of input stream.
|
|
227
|
+
# If the input stream is not a normal file,
|
|
228
|
+
# the result is not guaranteed.
|
|
229
|
+
# It is similar to IO#pos=.
|
|
230
|
+
# Note that it will not be equal to io.pos=,
|
|
231
|
+
# because FlatFile#autodetect may pre-read some lines.
|
|
232
|
+
def pos=(p)
|
|
233
|
+
r = (@io.pos = p)
|
|
234
|
+
@prefetch = ''
|
|
235
|
+
r
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Returns true if input stream is end-of-file.
|
|
239
|
+
# Otherwise, returns false.
|
|
240
|
+
# (Similar to IO#eof?, but may not be equal to io.eof?,
|
|
241
|
+
# because FlatFile#autodetect may pre-read some lines.)
|
|
242
|
+
def eof?
|
|
243
|
+
if @prefetch.size > 0
|
|
244
|
+
false
|
|
245
|
+
else
|
|
246
|
+
@io.eof?
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Similar to IO#gets.
|
|
251
|
+
# Internal use only. Users should not call it directly.
|
|
252
|
+
def gets(io_rs = $/)
|
|
253
|
+
if @prefetch.size > 0
|
|
254
|
+
if io_rs == nil then
|
|
255
|
+
r = @prefetch + @io.gets(nil).to_s
|
|
256
|
+
@prefetch = ''
|
|
257
|
+
else
|
|
258
|
+
if io_rs == '' then
|
|
259
|
+
sp_rs = /\n\n/n
|
|
260
|
+
sp_rs_orig = "\n\n"
|
|
261
|
+
else
|
|
262
|
+
sp_rs = Regexp.new(Regexp.escape(io_rs, 'n'), 0, 'n')
|
|
263
|
+
sp_rs_orig = io_rs
|
|
264
|
+
end
|
|
265
|
+
a = @prefetch.split(sp_rs, 2)
|
|
266
|
+
if a.size > 1 then
|
|
267
|
+
r = a[0] + sp_rs_orig
|
|
268
|
+
@prefetch = a[1]
|
|
269
|
+
else
|
|
270
|
+
@prefetch << @io.gets(io_rs).to_s
|
|
271
|
+
a = @prefetch.split(sp_rs, 2)
|
|
272
|
+
if a.size > 1 then
|
|
273
|
+
r = a[0] + sp_rs_orig
|
|
274
|
+
@prefetch = a[1].to_s
|
|
275
|
+
else
|
|
276
|
+
r = @prefetch
|
|
277
|
+
@prefetch = ''
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
r
|
|
282
|
+
else
|
|
283
|
+
@io.gets(io_rs)
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# Unread read data.
|
|
288
|
+
# Internal use only. Users must not call it.
|
|
289
|
+
def ungets(str)
|
|
290
|
+
@prefetch = str + @prefetch
|
|
291
|
+
nil
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
# Similar to IO#getc.
|
|
295
|
+
# Internal use only. Users should not call it directly.
|
|
296
|
+
def getc
|
|
297
|
+
if @prefetch.size > 0 then
|
|
298
|
+
r = @prefetch[0]
|
|
299
|
+
@prefetch = @prefetch[1..-1]
|
|
300
|
+
else
|
|
301
|
+
r = @io.getc
|
|
302
|
+
end
|
|
303
|
+
r
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# Similar to IO#ungetc.
|
|
307
|
+
# Internal use only. Users should not call it.
|
|
308
|
+
def ungetc(c)
|
|
309
|
+
@prefetch = sprintf("%c", c) + @prefetch
|
|
310
|
+
nil
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# If true is given, the next_entry method returns
|
|
314
|
+
# a entry as a text, whereas if false, returns as a parsed object.
|
|
315
|
+
def raw=(bool)
|
|
316
|
+
@raw = (bool ? true : false)
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
# If true, raw mode.
|
|
320
|
+
attr_reader :raw
|
|
321
|
+
|
|
322
|
+
# Sets database class. Plese use only if autodetect fails.
|
|
323
|
+
def dbclass=(k)
|
|
324
|
+
if k then
|
|
325
|
+
@dbclass = k
|
|
326
|
+
@rs = @dbclass::DELIMITER
|
|
327
|
+
else
|
|
328
|
+
@dbclass = nil
|
|
329
|
+
@rs = $/
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
# Returns database class which is automatically detected or
|
|
334
|
+
# given in FlatFile#initialize.
|
|
335
|
+
attr_reader :dbclass
|
|
336
|
+
|
|
337
|
+
# Performs determination of database class (file format).
|
|
338
|
+
# Pre-reads +lines+ lines for format determination (default 31 lines).
|
|
339
|
+
# If fails, returns nil or false. Otherwise, returns database class.
|
|
340
|
+
#
|
|
341
|
+
# The method can be called anytime if you want (but not recommended).
|
|
342
|
+
# This might be useful if input file is a mixture of muitiple format data.
|
|
343
|
+
def autodetect(lines = 31)
|
|
344
|
+
r = nil
|
|
345
|
+
1.upto(lines) do |x|
|
|
346
|
+
if line = @io.gets then
|
|
347
|
+
@prefetch << line
|
|
348
|
+
if line and line.strip.size > 0 then
|
|
349
|
+
r = self.class.autodetect(@prefetch)
|
|
350
|
+
if r then
|
|
351
|
+
self.dbclass = r
|
|
352
|
+
return r
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
self.dbclass = nil unless dbclass
|
|
358
|
+
r
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Detects database class (== file format) of given file.
|
|
362
|
+
# If fails to determine, returns nil.
|
|
363
|
+
def self.autodetect_file(filename)
|
|
364
|
+
ff = self.open(nil, filename)
|
|
365
|
+
r = ff.dbclass
|
|
366
|
+
ff.close
|
|
367
|
+
r
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
# Detects database class (== file format) of given input stream.
|
|
371
|
+
# If fails to determine, returns nil.
|
|
372
|
+
# Caution: the method reads some data from the input stream,
|
|
373
|
+
# and the data will be lost.
|
|
374
|
+
def self.autodetect_stream(io)
|
|
375
|
+
ff = self.new(nil, io)
|
|
376
|
+
r = ff.dbclass
|
|
377
|
+
r
|
|
378
|
+
end
|
|
379
|
+
|
|
380
|
+
# Detects database class (== file format) of given string.
|
|
381
|
+
# If fails to determine, returns false or nil.
|
|
382
|
+
def self.autodetect(text)
|
|
383
|
+
require 'bio'
|
|
384
|
+
case text
|
|
385
|
+
when /^LOCUS .+ bp .*[a-z]*[DR]?NA/
|
|
386
|
+
Bio::GenBank
|
|
387
|
+
when /^LOCUS .+ aa .+/
|
|
388
|
+
Bio::GenPept
|
|
389
|
+
when /^UI \- [0-9]+$/
|
|
390
|
+
Bio::MEDLINE
|
|
391
|
+
|
|
392
|
+
when /^ID .+\; .*(DNA|RNA|XXX)\;/
|
|
393
|
+
Bio::EMBL
|
|
394
|
+
when /^ID .+\; *PRT\;/
|
|
395
|
+
Bio::SPTR
|
|
396
|
+
when /^ID [-A-Za-z0-9_\.]+\; (PATTERN|RULE|MATRIX)\.$/
|
|
397
|
+
Bio::PROSITE
|
|
398
|
+
when /^AC [-A-Za-z0-9_\.]+$/
|
|
399
|
+
Bio::TRANSFAC
|
|
400
|
+
|
|
401
|
+
when /^H [-A-Z0-9_\.]+$/
|
|
402
|
+
if text =~ /^M [rc]/ then
|
|
403
|
+
Bio::AAindex2
|
|
404
|
+
elsif text =~ /^I A\/L/ then
|
|
405
|
+
Bio::AAindex1
|
|
406
|
+
else
|
|
407
|
+
false #fail to determine
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
when /^CODE [0-9]+$/
|
|
411
|
+
Bio::LITDB
|
|
412
|
+
when /^Entry [A-Z0-9]+/
|
|
413
|
+
Bio::KEGG::BRITE
|
|
414
|
+
|
|
415
|
+
when /^ENTRY .+ KO\s*$/
|
|
416
|
+
Bio::KEGG::KO
|
|
417
|
+
when /^ENTRY .+ Glycan\s*$/
|
|
418
|
+
Bio::KEGG::GLYCAN
|
|
419
|
+
when /^ENTRY .+ (CDS|gene|.*RNA) /
|
|
420
|
+
Bio::KEGG::GENES
|
|
421
|
+
when /^ENTRY EC [0-9\.]+$/
|
|
422
|
+
Bio::KEGG::ENZYME
|
|
423
|
+
when /^ENTRY C[A-Za-z0-9\._]+$/
|
|
424
|
+
Bio::KEGG::COMPOUND
|
|
425
|
+
when /^ENTRY R[A-Za-z0-9\._]+$/
|
|
426
|
+
Bio::KEGG::REACTION
|
|
427
|
+
when /^ENTRY [a-z]+$/
|
|
428
|
+
Bio::KEGG::GENOME
|
|
429
|
+
|
|
430
|
+
when /\<\!DOCTYPE\s+maxml\-(sequences|clusters)\s+SYSTEM/
|
|
431
|
+
if $1 == 'clusters'
|
|
432
|
+
Bio::FANTOM::MaXML::Cluster
|
|
433
|
+
elsif $1 == 'sequences'
|
|
434
|
+
Bio::FANTOM::MaXML::Sequence
|
|
435
|
+
else
|
|
436
|
+
nil #unknown
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
when /^HEADER .{40}\d\d\-[A-Z]{3}\-\d\d [0-9A-Z]{4}/
|
|
440
|
+
Bio::PDB
|
|
441
|
+
|
|
442
|
+
when /^CLUSTAL .*\(.*\).*sequence +alignment/
|
|
443
|
+
Bio::ClustalW::Report
|
|
444
|
+
|
|
445
|
+
when /\<\!DOCTYPE BlastOutput PUBLIC /
|
|
446
|
+
Bio::Blast::Report
|
|
447
|
+
|
|
448
|
+
when /^BLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/
|
|
449
|
+
Bio::Blast::WU::Report
|
|
450
|
+
when /^TBLAST.? +[\-\.\w]+\-WashU +\[[\-\.\w ]+\]/
|
|
451
|
+
Bio::Blast::WU::Report_TBlast
|
|
452
|
+
|
|
453
|
+
when /^BLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/
|
|
454
|
+
Bio::Blast::Default::Report
|
|
455
|
+
when /^TBLAST.? +[\-\.\w]+ +\[[\-\.\w ]+\]/
|
|
456
|
+
Bio::Blast::Default::Report_TBlast
|
|
457
|
+
|
|
458
|
+
when /^psLayout version \d+\s*$/
|
|
459
|
+
Bio::Blat::Report
|
|
460
|
+
when /^\-\-SPIDEY version .+\-\-$/
|
|
461
|
+
Bio::Spidey::Report
|
|
462
|
+
|
|
463
|
+
when /^HMMER +\d+\./
|
|
464
|
+
Bio::HMMER::Report
|
|
465
|
+
|
|
466
|
+
when /^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/
|
|
467
|
+
Bio::Sim4::Report
|
|
468
|
+
|
|
469
|
+
when /^>.+$/
|
|
470
|
+
if text =~ /^>([PF]1|[DR][LC]|N[13]|XX)\;.+/ then
|
|
471
|
+
Bio::NBRF
|
|
472
|
+
elsif text =~ /^>.+$\s+(^\#.*$\s*)*^\s*\d*\s*[-a-zA-Z_\.\[\]\(\)\*\+\$]+/ then
|
|
473
|
+
Bio::FastaFormat
|
|
474
|
+
elsif text =~ /^>.+$\s+^\s*\d+(\s+\d+)*\s*$/ then
|
|
475
|
+
Bio::FastaNumericFormat
|
|
476
|
+
else
|
|
477
|
+
false #fail to determine
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
else
|
|
481
|
+
nil #not found
|
|
482
|
+
end
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
end #class FlatFile
|
|
486
|
+
|
|
487
|
+
end #module Bio
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
if __FILE__ == $0
|
|
491
|
+
if ARGV.size == 2
|
|
492
|
+
require 'bio'
|
|
493
|
+
p Bio::FlatFile.open(eval(ARGV.shift), ARGV.shift).next_entry
|
|
494
|
+
end
|
|
495
|
+
end
|
|
496
|
+
|