bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/embl/swissprot.rb - SwissProt database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: swissprot.rb,v 1.3 2004/08/23 23:40:35 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db/embl/sptr'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class SwissProt < SPTR
|
|
28
|
+
# Nothing to do (SwissProt format is abstracted in SPTR)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/embl/trembl.rb - TrEMBL database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001, 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: trembl.rb,v 1.3 2004/08/23 23:40:35 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db/embl/sptr'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class TrEMBL < SPTR
|
|
28
|
+
# Nothing to do (TrEMBL format is abstracted in SPTR)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/embl/uniprot.rb - UniProt database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2005 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: uniprot.rb,v 1.1 2005/09/10 23:43:35 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db/embl/sptr'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class UniProt < SPTR
|
|
28
|
+
# Nothing to do (UniProt format is abstracted in SPTR)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
@@ -0,0 +1,604 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/fantom.rb - RIKEN FANTOM2 database classes
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: fantom.rb,v 1.11 2005/09/26 13:00:06 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
begin
|
|
24
|
+
require 'rexml/document'
|
|
25
|
+
rescue LoadError
|
|
26
|
+
end
|
|
27
|
+
require 'uri'
|
|
28
|
+
require 'net/http'
|
|
29
|
+
|
|
30
|
+
require 'bio/db'
|
|
31
|
+
#require 'bio/sequence'
|
|
32
|
+
|
|
33
|
+
module Bio
|
|
34
|
+
|
|
35
|
+
module FANTOM
|
|
36
|
+
|
|
37
|
+
def query(idstr, http_proxy = nil)
|
|
38
|
+
xml = get_by_id(idstr, http_proxy)
|
|
39
|
+
seqs = MaXML::Sequences.new(xml.to_s)
|
|
40
|
+
seqs[0]
|
|
41
|
+
end
|
|
42
|
+
module_function :query
|
|
43
|
+
|
|
44
|
+
def get_by_id(idstr, http_proxy = nil)
|
|
45
|
+
addr = 'fantom.gsc.riken.go.jp'
|
|
46
|
+
port = 80
|
|
47
|
+
path = "/db/maxml/maxmlseq.cgi?masterid=#{URI.escape(idstr.to_s)}&style=xml"
|
|
48
|
+
proxy = URI.parse(http_proxy.to_s)
|
|
49
|
+
xml = ''
|
|
50
|
+
Net::HTTP.start(addr, port, proxy.host, proxy.port) do |http|
|
|
51
|
+
response, = http.get(path)
|
|
52
|
+
xml = response.body
|
|
53
|
+
end
|
|
54
|
+
xml
|
|
55
|
+
end
|
|
56
|
+
module_function :get_by_id
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MaXML < DB
|
|
60
|
+
# DTD of MaXML(Mouse annotation XML)
|
|
61
|
+
# http://fantom.gsc.riken.go.jp/maxml/maxml.dtd
|
|
62
|
+
|
|
63
|
+
DELIMITER = RS = "\n--EOF--\n"
|
|
64
|
+
# This class is for {allseq|repseq|allclust}.sep.xml,
|
|
65
|
+
# not for {allseq|repseq|allclust}.xml.
|
|
66
|
+
|
|
67
|
+
Data_XPath = ''
|
|
68
|
+
|
|
69
|
+
def initialize(x)
|
|
70
|
+
if x.is_a?(REXML::Element) then
|
|
71
|
+
@elem = x
|
|
72
|
+
else
|
|
73
|
+
if x.is_a?(String) then
|
|
74
|
+
x = x.sub(/#{Regexp.escape(DELIMITER)}\z/om, "\n")
|
|
75
|
+
end
|
|
76
|
+
doc = REXML::Document.new(x)
|
|
77
|
+
@elem = doc.elements[self.class::Data_XPath]
|
|
78
|
+
#raise 'element is null' unless @elem
|
|
79
|
+
@elem = REXML::Document.new('') unless @elem
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
attr_reader :elem
|
|
83
|
+
|
|
84
|
+
def to_s
|
|
85
|
+
@elem.to_s
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def gsub_entities(str)
|
|
89
|
+
# workaround for bug?
|
|
90
|
+
if str then
|
|
91
|
+
str.gsub(/\&\#(\d{1,3})\;/) { sprintf("%c", $1.to_i) }
|
|
92
|
+
else
|
|
93
|
+
str
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def entry_id
|
|
98
|
+
unless defined?(@entry_id)
|
|
99
|
+
@entry_id = @elem.attributes['id']
|
|
100
|
+
end
|
|
101
|
+
@entry_id
|
|
102
|
+
end
|
|
103
|
+
def self.define_element_text_method(array)
|
|
104
|
+
array.each do |tagstr|
|
|
105
|
+
module_eval("
|
|
106
|
+
def #{tagstr}
|
|
107
|
+
unless defined?(@#{tagstr})
|
|
108
|
+
@#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
|
|
109
|
+
end
|
|
110
|
+
@#{tagstr}
|
|
111
|
+
end
|
|
112
|
+
")
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
private_class_method :define_element_text_method
|
|
116
|
+
|
|
117
|
+
class Cluster < MaXML
|
|
118
|
+
# (MaXML cluster)
|
|
119
|
+
# ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz
|
|
120
|
+
|
|
121
|
+
Data_XPath = 'maxml-clusters/cluster'
|
|
122
|
+
|
|
123
|
+
def representative_seqid
|
|
124
|
+
unless defined?(@representative_seqid)
|
|
125
|
+
@representative_seqid =
|
|
126
|
+
gsub_entities(@elem.text('representative-seqid'))
|
|
127
|
+
end
|
|
128
|
+
@representative_seqid
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def sequences
|
|
132
|
+
unless defined?(@sequences)
|
|
133
|
+
@sequences = MaXML::Sequences.new(@elem)
|
|
134
|
+
end
|
|
135
|
+
@sequences
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def sequence(idstr = nil)
|
|
139
|
+
idstr ? sequences[idstr] : representative_sequence
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def representative_sequence
|
|
143
|
+
unless defined?(@representative_sequence)
|
|
144
|
+
rid = representative_seqid
|
|
145
|
+
@representative_sequence =
|
|
146
|
+
rid ? sequences[representative_seqid] : nil
|
|
147
|
+
end
|
|
148
|
+
@representative_sequence
|
|
149
|
+
end
|
|
150
|
+
alias representative_clone representative_sequence
|
|
151
|
+
|
|
152
|
+
def representative_annotations
|
|
153
|
+
e = representative_sequence
|
|
154
|
+
e ? e.annotations : nil
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def representative_cloneid
|
|
158
|
+
e = representative_sequence
|
|
159
|
+
e ? e.cloneid : nil
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
define_element_text_method(%w(fantomid))
|
|
163
|
+
end #class MaXML::Cluster
|
|
164
|
+
|
|
165
|
+
class Sequences < MaXML
|
|
166
|
+
Data_XPath = 'maxml-sequences'
|
|
167
|
+
|
|
168
|
+
include Enumerable
|
|
169
|
+
def each
|
|
170
|
+
to_a.each { |x| yield x }
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def to_a
|
|
174
|
+
unless defined?(@sequences)
|
|
175
|
+
@sequences = @elem.get_elements('sequence')
|
|
176
|
+
@sequences.collect! { |e| MaXML::Sequence.new(e) }
|
|
177
|
+
end
|
|
178
|
+
@sequences
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def get(idstr)
|
|
182
|
+
unless defined?(@hash)
|
|
183
|
+
@hash = {}
|
|
184
|
+
end
|
|
185
|
+
unless @hash.member?(idstr) then
|
|
186
|
+
@hash[idstr] = self.find do |x|
|
|
187
|
+
x.altid.values.index(idstr)
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
@hash[idstr]
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def [](*arg)
|
|
194
|
+
if arg[0].is_a?(String) and arg.size == 1 then
|
|
195
|
+
get(arg[0])
|
|
196
|
+
else
|
|
197
|
+
to_a[*arg]
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def cloneids
|
|
202
|
+
unless defined?(@cloneids)
|
|
203
|
+
@cloneids = to_a.collect { |x| x.cloneid }
|
|
204
|
+
end
|
|
205
|
+
@cloneids
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def id_strings
|
|
209
|
+
unless defined?(@id_strings)
|
|
210
|
+
@id_strings = to_a.collect { |x| x.id_strings }
|
|
211
|
+
@id_strings.flatten!
|
|
212
|
+
@id_strings.sort!
|
|
213
|
+
@id_strings.uniq!
|
|
214
|
+
end
|
|
215
|
+
@id_strings
|
|
216
|
+
end
|
|
217
|
+
end #class MaXML::Sequences
|
|
218
|
+
|
|
219
|
+
class Sequence < MaXML
|
|
220
|
+
# (MaXML sequence)
|
|
221
|
+
# ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz
|
|
222
|
+
# ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz
|
|
223
|
+
|
|
224
|
+
Data_XPath = 'maxml-sequences/sequence'
|
|
225
|
+
|
|
226
|
+
def altid(t = nil)
|
|
227
|
+
unless defined?(@altid)
|
|
228
|
+
@altid = {}
|
|
229
|
+
@elem.each_element('altid') do |e|
|
|
230
|
+
@altid[e.attributes['type']] = gsub_entities(e.text)
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
if t then
|
|
234
|
+
@altid[t]
|
|
235
|
+
else
|
|
236
|
+
@altid
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def id_strings
|
|
241
|
+
altid.values.sort.uniq
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def library_id
|
|
245
|
+
entry_id[0,2]
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def annotations
|
|
249
|
+
unless defined?(@annotations)
|
|
250
|
+
@annotations =
|
|
251
|
+
MaXML::Annotations.new(@elem.elements['annotations'])
|
|
252
|
+
end
|
|
253
|
+
@annotations
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
define_element_text_method(%w(annotator version modified_time comment))
|
|
257
|
+
|
|
258
|
+
def self.define_id_method(array)
|
|
259
|
+
array.each do |tagstr|
|
|
260
|
+
module_eval("
|
|
261
|
+
def #{tagstr}
|
|
262
|
+
unless defined?(@#{tagstr})
|
|
263
|
+
@#{tagstr} = gsub_entities(@elem.text('#{tagstr}'))
|
|
264
|
+
@#{tagstr} = altid('#{tagstr}') unless @#{tagstr}
|
|
265
|
+
end
|
|
266
|
+
@#{tagstr}
|
|
267
|
+
end
|
|
268
|
+
")
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
private_class_method :define_id_method
|
|
272
|
+
|
|
273
|
+
define_id_method(%w(seqid fantomid cloneid rearrayid accession))
|
|
274
|
+
end #class MaXML::Sequence
|
|
275
|
+
|
|
276
|
+
class Annotations < MaXML
|
|
277
|
+
Data_XPath = nil
|
|
278
|
+
|
|
279
|
+
include Enumerable
|
|
280
|
+
def each
|
|
281
|
+
to_a.each { |x| yield x }
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def to_a
|
|
285
|
+
unless defined?(@a)
|
|
286
|
+
@a = @elem.get_elements('annotation')
|
|
287
|
+
@a.collect! { |e| MaXML::Annotation.new(e) }
|
|
288
|
+
end
|
|
289
|
+
@a
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def get_all_by_qualifier(qstr)
|
|
293
|
+
unless defined?(@hash)
|
|
294
|
+
@hash = {}
|
|
295
|
+
end
|
|
296
|
+
unless @hash.member?(qstr) then
|
|
297
|
+
@hash[qstr] = self.find_all do |x|
|
|
298
|
+
x.qualifier == qstr
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
@hash[qstr]
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def get_by_qualifier(qstr)
|
|
305
|
+
a = get_all_by_qualifier(qstr)
|
|
306
|
+
a ? a[0] : nil
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
def [](*arg)
|
|
310
|
+
if arg[0].is_a?(String) and arg.size == 1 then
|
|
311
|
+
get_by_qualifier(arg[0])
|
|
312
|
+
else
|
|
313
|
+
to_a[*arg]
|
|
314
|
+
end
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def cds_start
|
|
318
|
+
unless defined?(@cds_start)
|
|
319
|
+
e = get_by_qualifier('cds_start')
|
|
320
|
+
@cds_start = e ? e.anntext.to_i : nil
|
|
321
|
+
end
|
|
322
|
+
@cds_start
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def cds_stop
|
|
326
|
+
unless defined?(@cds_stop)
|
|
327
|
+
e = get_by_qualifier('cds_stop')
|
|
328
|
+
@cds_stop = e ? e.anntext.to_i : nil
|
|
329
|
+
end
|
|
330
|
+
@cds_stop
|
|
331
|
+
end
|
|
332
|
+
|
|
333
|
+
def gene_name
|
|
334
|
+
unless defined?(@gene_name)
|
|
335
|
+
e = get_by_qualifier('gene_name')
|
|
336
|
+
@gene_name = e ? e.anntext : nil
|
|
337
|
+
end
|
|
338
|
+
@gene_name
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def data_source
|
|
342
|
+
unless defined?(@data_source)
|
|
343
|
+
e = get_by_qualifier('gene_name')
|
|
344
|
+
@data_source = e ? e.datasrc[0] : nil
|
|
345
|
+
end
|
|
346
|
+
@data_source
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def evidence
|
|
350
|
+
unless defined?(@evidence)
|
|
351
|
+
e = get_by_qualifier('gene_name')
|
|
352
|
+
@evidence = e ? e.evidence : nil
|
|
353
|
+
end
|
|
354
|
+
@evidence
|
|
355
|
+
end
|
|
356
|
+
end #class MaXML::Annotations
|
|
357
|
+
|
|
358
|
+
class Annotation < MaXML
|
|
359
|
+
def entry_id
|
|
360
|
+
nil
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
class DataSrc < String
|
|
364
|
+
def initialize(text, href)
|
|
365
|
+
super(text)
|
|
366
|
+
@href = href
|
|
367
|
+
end
|
|
368
|
+
attr_reader :href
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
def datasrc
|
|
372
|
+
unless defined?(@datasrc)
|
|
373
|
+
@datasrc = []
|
|
374
|
+
@elem.each_element('datasrc') do |e|
|
|
375
|
+
text = e.text
|
|
376
|
+
href = e.attributes['href']
|
|
377
|
+
@datasrc << DataSrc.new(gsub_entities(text), gsub_entities(href))
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
@datasrc
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
define_element_text_method(%w(qualifier srckey anntext evidence))
|
|
384
|
+
end #class MaXML::Annotation
|
|
385
|
+
|
|
386
|
+
end #class MaXML
|
|
387
|
+
|
|
388
|
+
end #module FANTOM
|
|
389
|
+
|
|
390
|
+
end #module Bio
|
|
391
|
+
|
|
392
|
+
=begin
|
|
393
|
+
|
|
394
|
+
Bio::FANTOM are database classes (and modules) treating RIKEN FANTOM2 data.
|
|
395
|
+
FANTOM2 is available at ((<URL:http://fantom2.gsc.riken.go.jp/>)).
|
|
396
|
+
|
|
397
|
+
= Bio::FANTOM
|
|
398
|
+
|
|
399
|
+
This module contains useful methods to access databases.
|
|
400
|
+
|
|
401
|
+
--- Bio::FANTOM.query(idstr, http_proxy=nil)
|
|
402
|
+
|
|
403
|
+
Get MaXML sequence data corresponding to given ID through the internet
|
|
404
|
+
from ((<URL:http://fantom.gsc.riken.go.jp/db/maxml/)).
|
|
405
|
+
Returns Bio::FANTOM::MaXML::Sequence object.
|
|
406
|
+
|
|
407
|
+
--- Bio::FANTOM.get_by_id(idstr, http_proxy=nil)
|
|
408
|
+
|
|
409
|
+
Same as FANTOM.query, but returns XML document as a string.
|
|
410
|
+
(Reference: bio/io/registry.rb)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
= Bio::FANTOM::MaXML::Cluster
|
|
414
|
+
|
|
415
|
+
This class is for 'allclust.sep.xml' found at
|
|
416
|
+
((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allclust.sep.xml.gz>)).
|
|
417
|
+
Not that this class is not suitable for 'allclust.xml'.
|
|
418
|
+
|
|
419
|
+
--- Bio::FANTOM::MaXML::Cluster.new(str)
|
|
420
|
+
|
|
421
|
+
--- Bio::FANTOM::MaXML::Cluster#entry_id
|
|
422
|
+
|
|
423
|
+
--- Bio::FANTOM::MaXML::Cluster#fantomid
|
|
424
|
+
|
|
425
|
+
--- Bio::FANTOM::MaXML::Cluster#representative_seqid
|
|
426
|
+
|
|
427
|
+
--- Bio::FANTOM::MaXML::Cluster#sequences
|
|
428
|
+
|
|
429
|
+
Lists sequences in this cluster.
|
|
430
|
+
Returns Bio::FANTOM::MaXML::Sequences object.
|
|
431
|
+
|
|
432
|
+
--- Bio::FANTOM::MaXML::Cluster#sequence(id_str)
|
|
433
|
+
|
|
434
|
+
Shows a sequence information of given id.
|
|
435
|
+
Returns Bio::FANTOM::MaXML::Sequence object or nil.
|
|
436
|
+
|
|
437
|
+
--- Bio::FANTOM::MaXML::Cluster#representataive_sequence
|
|
438
|
+
--- Bio::FANTOM::MaXML::Cluster#representataive_clone
|
|
439
|
+
|
|
440
|
+
Shows a sequence of repesentative_seqid.
|
|
441
|
+
Returns Bio::FANTOM::MaXML::Sequence object (or nil).
|
|
442
|
+
|
|
443
|
+
-- Bio::FANTOM::MaXML::Cluster#representative_annotations
|
|
444
|
+
|
|
445
|
+
Shows annotations of repesentative sequence.
|
|
446
|
+
Returns Bio::FANTOM::MaXML::Annotations object (or nil).
|
|
447
|
+
|
|
448
|
+
-- Bio::FANTOM::MaXML::Cluster#representative_cloneid
|
|
449
|
+
|
|
450
|
+
Shows cloneid of repesentative sequence.
|
|
451
|
+
Returns String (or nil).
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
= Bio::FANTOM::MaXML::Sequences
|
|
455
|
+
|
|
456
|
+
The instances of this class are automatically created
|
|
457
|
+
by Bio::FANTOM::MaXML::Cluster class.
|
|
458
|
+
|
|
459
|
+
This class can also be used for 'allseq.sep.xml' and 'repseq.sep.xml',
|
|
460
|
+
but you'd better using Bio::FANTOM::MaXML::Sequence class.
|
|
461
|
+
|
|
462
|
+
In addition, this class can be used for 'allseq.xml' and 'repseq.xml',
|
|
463
|
+
but you'd better not to use them, becase of the speed is very slow.
|
|
464
|
+
|
|
465
|
+
--- Bio::FANTOM::MaXML::Sequences#to_a
|
|
466
|
+
|
|
467
|
+
Returns an Array of Bio::FANTOM::MaXML::Sequence objects.
|
|
468
|
+
|
|
469
|
+
--- Bio::FANTOM::MaXML::Sequences#each
|
|
470
|
+
|
|
471
|
+
--- Bio::FANTOM::MaXML::Sequences#[](x)
|
|
472
|
+
|
|
473
|
+
Same as to_a[x] when x is a integer.
|
|
474
|
+
Same as get[x] when x is a string.
|
|
475
|
+
|
|
476
|
+
--- Bio::FANTOM::MaXML::Sequences#get(id_str)
|
|
477
|
+
|
|
478
|
+
Shows a sequence information of given id.
|
|
479
|
+
Returns Bio::FANTOM::MaXML::Sequence object or nil.
|
|
480
|
+
|
|
481
|
+
--- Bio::FANTOM::MaXML::Sequences#cloneids
|
|
482
|
+
|
|
483
|
+
Shows clone ID list.
|
|
484
|
+
Returns an array of strings.
|
|
485
|
+
|
|
486
|
+
--- Bio::FANTOM::MaXML::Sequences#id_strings
|
|
487
|
+
|
|
488
|
+
Shows ID list.
|
|
489
|
+
Returns an array of strings.
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
= Bio::FANTOM::MaXML::Sequence
|
|
493
|
+
|
|
494
|
+
This class is for 'allseq.sep.xml' and 'repseq.sep.xml' found at
|
|
495
|
+
((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/allseq.sep.xml.gz>)) and
|
|
496
|
+
((<URL:ftp://fantom2.gsc.riken.go.jp/fantom/2.1/repseq.sep.xml.gz>)).
|
|
497
|
+
Not that this class is not suitable for 'allseq.xml' and 'repseq.xml'.
|
|
498
|
+
|
|
499
|
+
In addition, the instances of this class are automatically created
|
|
500
|
+
by Bio::FANTOM::MaXML::Sequences class.
|
|
501
|
+
|
|
502
|
+
--- Bio::FANTOM::MaXML::Sequence.new(str)
|
|
503
|
+
|
|
504
|
+
--- Bio::FANTOM::MaXML::Sequence#entry_id
|
|
505
|
+
|
|
506
|
+
--- Bio::FANTOM::MaXML::Sequence#altid(type_str = nil)
|
|
507
|
+
|
|
508
|
+
Returns hash of altid if no arguments are given.
|
|
509
|
+
Returns ID as a string if a type of ID (string) is given.
|
|
510
|
+
|
|
511
|
+
--- Bio::FANTOM::MaXML::Sequence#annotations
|
|
512
|
+
|
|
513
|
+
Gets lists of annotation data.
|
|
514
|
+
Returns a Bio::FANTOM::MaXML::Annotations object.
|
|
515
|
+
|
|
516
|
+
--- Bio::FANTOM::MaXML::Sequence#id_strings
|
|
517
|
+
|
|
518
|
+
Gets lists of ID. (same as altid.values)
|
|
519
|
+
Returns an array of strings.
|
|
520
|
+
|
|
521
|
+
--- Bio::FANTOM::MaXML::Sequence#library_id
|
|
522
|
+
|
|
523
|
+
Shows library ID. (same as cloneid[0,2])
|
|
524
|
+
Library IDs are listed at:
|
|
525
|
+
((<URL:http://fantom2.gsc.riken.go.jp/fantom2/SI/sup01_est_3r_libraryinfo.pdf))
|
|
526
|
+
((<URL:http://fantom2.gsc.riken.go.jp/fantom2/SI/sup01_est_5f_libraryinfo.pdf))
|
|
527
|
+
|
|
528
|
+
--- Bio::FANTOM::MaXML::Sequence#seqid
|
|
529
|
+
|
|
530
|
+
--- Bio::FANTOM::MaXML::Sequence#fantomid
|
|
531
|
+
|
|
532
|
+
--- Bio::FANTOM::MaXML::Sequence#cloneid
|
|
533
|
+
|
|
534
|
+
--- Bio::FANTOM::MaXML::Sequence#rearrayid
|
|
535
|
+
|
|
536
|
+
--- Bio::FANTOM::MaXML::Sequence#accession
|
|
537
|
+
|
|
538
|
+
--- Bio::FANTOM::MaXML::Sequence#annotator
|
|
539
|
+
|
|
540
|
+
--- Bio::FANTOM::MaXML::Sequence#version
|
|
541
|
+
|
|
542
|
+
--- Bio::FANTOM::MaXML::Sequence#modified_time
|
|
543
|
+
|
|
544
|
+
--- Bio::FANTOM::MaXML::Sequence#comment
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
= Bio::FANTOM::MaXML::Annotations
|
|
548
|
+
|
|
549
|
+
The instances of this class are automatically created
|
|
550
|
+
by Bio::FANTOM::MaXML::Sequence class.
|
|
551
|
+
|
|
552
|
+
--- Bio::FANTOM::MaXML::Annotations#to_a
|
|
553
|
+
|
|
554
|
+
Returns an Array of Bio::FANTOM::MaXML::Annotations objects.
|
|
555
|
+
|
|
556
|
+
--- Bio::FANTOM::MaXML::Annotations#each
|
|
557
|
+
|
|
558
|
+
--- Bio::FANTOM::MaXML::Annotations#get_all_by_qualifier(qstr)
|
|
559
|
+
|
|
560
|
+
--- Bio::FANTOM::MaXML::Annotations#get_by_qualifier(qstr)
|
|
561
|
+
|
|
562
|
+
--- Bio::FANTOM::MaXML::Annotations#[](x)
|
|
563
|
+
|
|
564
|
+
Same as to_a[x] when x is a integer.
|
|
565
|
+
Same as get_by_qualifier[x] when x is a string.
|
|
566
|
+
|
|
567
|
+
--- Bio::FANTOM::MaXML::Annotations#cds_start
|
|
568
|
+
--- Bio::FANTOM::MaXML::Annotations#cds_stop
|
|
569
|
+
--- Bio::FANTOM::MaXML::Annotations#gene_name
|
|
570
|
+
--- Bio::FANTOM::MaXML::Annotations#data_source
|
|
571
|
+
--- Bio::FANTOM::MaXML::Annotations#evidence
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
= Bio::FANTOM::MaXML::Annotation
|
|
575
|
+
|
|
576
|
+
The instances of this class are automatically created
|
|
577
|
+
by Bio::FANTOM::MaXML::Annotations class.
|
|
578
|
+
|
|
579
|
+
--- Bio::FANTOM::MaXML::Annotation#datasrc
|
|
580
|
+
|
|
581
|
+
Returns an Array of Bio::FANTOM::MaXML::Annotation::DataSrc objects.
|
|
582
|
+
|
|
583
|
+
--- Bio::FANTOM::MaXML::Annotation#qualifier
|
|
584
|
+
|
|
585
|
+
--- Bio::FANTOM::MaXML::Annotation#srckey
|
|
586
|
+
|
|
587
|
+
--- Bio::FANTOM::MaXML::Annotation#anntext
|
|
588
|
+
|
|
589
|
+
--- Bio::FANTOM::MaXML::Annotation#evidence
|
|
590
|
+
|
|
591
|
+
= Bio::FANTOM::MaXML::Annotation::DataSrc < String
|
|
592
|
+
|
|
593
|
+
The instances of this class are automatically created
|
|
594
|
+
by Bio::FANTOM::MaXML::Annotation class.
|
|
595
|
+
|
|
596
|
+
---- Bio::FANTOM::MaXML::Annotation::DataSrc#href
|
|
597
|
+
|
|
598
|
+
Shows a link URL to database web page as an String.
|
|
599
|
+
|
|
600
|
+
= References
|
|
601
|
+
|
|
602
|
+
* ((<URL:http://fantom2.gsc.riken.go.jp/>))
|
|
603
|
+
|
|
604
|
+
=end
|