bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/genbank/genpept.rb - GenPept database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002-2004 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: genpept.rb,v 1.10 2005/10/23 07:20:37 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db/genbank/common'
|
|
24
|
+
require 'bio/db/genbank/genbank'
|
|
25
|
+
|
|
26
|
+
module Bio
|
|
27
|
+
class GenPept < NCBIDB
|
|
28
|
+
|
|
29
|
+
include Bio::NCBIDB::Common
|
|
30
|
+
|
|
31
|
+
# LOCUS
|
|
32
|
+
class Locus
|
|
33
|
+
def initialize(locus_line)
|
|
34
|
+
@entry_id = locus_line[12..27].strip
|
|
35
|
+
@length = locus_line[29..39].to_i
|
|
36
|
+
@circular = locus_line[55..62].strip # always linear
|
|
37
|
+
@division = locus_line[63..66].strip
|
|
38
|
+
@date = locus_line[68..78].strip
|
|
39
|
+
end
|
|
40
|
+
attr_accessor :entry_id, :length, :circular, :division, :date
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def locus
|
|
44
|
+
@data['LOCUS'] ||= Locus.new(get('LOCUS'))
|
|
45
|
+
end
|
|
46
|
+
def entry_id; locus.entry_id; end
|
|
47
|
+
def length; locus.length; end
|
|
48
|
+
def circular; locus.circular; end
|
|
49
|
+
def division; locus.division; end
|
|
50
|
+
def date; locus.date; end
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ORIGIN
|
|
54
|
+
def seq
|
|
55
|
+
unless @data['SEQUENCE']
|
|
56
|
+
origin
|
|
57
|
+
end
|
|
58
|
+
Bio::Sequence::AA.new(@data['SEQUENCE'])
|
|
59
|
+
end
|
|
60
|
+
alias aaseq seq
|
|
61
|
+
alias aalen length
|
|
62
|
+
|
|
63
|
+
def seq_len
|
|
64
|
+
seq.length
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# DBSOURCE
|
|
68
|
+
def dbsource
|
|
69
|
+
get('DBSOURCE')
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end # GenPept
|
|
73
|
+
end # Bio
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/db/genbank/refseq.rb - RefSeq database class
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2000-2004 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: refseq.rb,v 1.6 2004/08/23 23:40:35 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'bio/db/genbank/genbank'
|
|
24
|
+
|
|
25
|
+
module Bio
|
|
26
|
+
|
|
27
|
+
class RefSeq < GenBank
|
|
28
|
+
# Nothing to do (RefSeq database format is completely same as GenBank)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end # Bio
|
data/lib/bio/db/gff.rb
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/gff.rb - GFF format class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003, 2005
|
|
5
|
+
# Toshiaki Katayama <k@bioruby.org>
|
|
6
|
+
# License:: LGPL
|
|
7
|
+
#
|
|
8
|
+
# $Id: gff.rb,v 1.5 2005/12/18 15:58:41 k Exp $
|
|
9
|
+
#
|
|
10
|
+
# == Description
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
# == Example
|
|
14
|
+
#
|
|
15
|
+
#
|
|
16
|
+
# == References
|
|
17
|
+
#
|
|
18
|
+
# * http://www.sanger.ac.uk/Software/formats/GFF/
|
|
19
|
+
#
|
|
20
|
+
#--
|
|
21
|
+
#
|
|
22
|
+
# This library is free software; you can redistribute it and/or
|
|
23
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
24
|
+
# License as published by the Free Software Foundation; either
|
|
25
|
+
# version 2 of the License, or (at your option) any later version.
|
|
26
|
+
#
|
|
27
|
+
# This library is distributed in the hope that it will be useful,
|
|
28
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
29
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
30
|
+
# Lesser General Public License for more details.
|
|
31
|
+
#
|
|
32
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
33
|
+
# License along with this library; if not, write to the Free Software
|
|
34
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
35
|
+
#
|
|
36
|
+
#++
|
|
37
|
+
#
|
|
38
|
+
|
|
39
|
+
module Bio
|
|
40
|
+
|
|
41
|
+
class GFF
|
|
42
|
+
|
|
43
|
+
attr_accessor :records
|
|
44
|
+
|
|
45
|
+
def initialize(str = '')
|
|
46
|
+
@records = Array.new
|
|
47
|
+
str.each_line do |line|
|
|
48
|
+
@records << Record.new(line)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
class Record
|
|
53
|
+
|
|
54
|
+
attr_accessor :seqname
|
|
55
|
+
attr_accessor :source
|
|
56
|
+
attr_accessor :feature
|
|
57
|
+
attr_accessor :start
|
|
58
|
+
attr_accessor :end
|
|
59
|
+
attr_accessor :score
|
|
60
|
+
attr_accessor :strand
|
|
61
|
+
attr_accessor :frame
|
|
62
|
+
attr_accessor :attributes
|
|
63
|
+
attr_accessor :comments
|
|
64
|
+
|
|
65
|
+
def initialize(str)
|
|
66
|
+
@comments = str.chomp[/#.*/]
|
|
67
|
+
return if /^#/.match(str)
|
|
68
|
+
@seqname, @source, @feature, @start, @end, @score, @strand, @frame,
|
|
69
|
+
attributes, = str.chomp.split("\t")
|
|
70
|
+
@attributes = parse_attributes(attributes) if attributes
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
def parse_attributes(attributes)
|
|
76
|
+
hash = Hash.new
|
|
77
|
+
attributes.split(/[^\\];/).each do |atr|
|
|
78
|
+
key, value = atr.split(' ', 2)
|
|
79
|
+
hash[key] = value
|
|
80
|
+
end
|
|
81
|
+
return hash
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
class GFF2 < GFF
|
|
86
|
+
VERSION = 2
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
class GFF3 < GFF
|
|
90
|
+
VERSION = 3
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
end # class GFF
|
|
94
|
+
|
|
95
|
+
end # module Bio
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
if __FILE__ == $0
|
|
99
|
+
begin
|
|
100
|
+
require 'pp'
|
|
101
|
+
alias p pp
|
|
102
|
+
rescue LoadError
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
p Bio::GFF.new(ARGF.read)
|
|
106
|
+
end
|
data/lib/bio/db/go.rb
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/db/go.rb - Classes for Gene Ontology
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# License::
|
|
6
|
+
#
|
|
7
|
+
# $Id: go.rb,v 1.9 2005/10/31 18:32:36 nakao Exp $
|
|
8
|
+
#
|
|
9
|
+
# == Gene Ontology
|
|
10
|
+
#
|
|
11
|
+
# == Example
|
|
12
|
+
#
|
|
13
|
+
# == References
|
|
14
|
+
#--
|
|
15
|
+
#
|
|
16
|
+
# This library is free software; you can redistribute it and/or
|
|
17
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
18
|
+
# License as published by the Free Software Foundation; either
|
|
19
|
+
# version 2 of the License, or (at your option) any later version.
|
|
20
|
+
#
|
|
21
|
+
# This library is distributed in the hope that it will be useful,
|
|
22
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
24
|
+
# Lesser General Public License for more details.
|
|
25
|
+
#
|
|
26
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
27
|
+
# License along with this library; if not, write to the Free Software
|
|
28
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
29
|
+
#
|
|
30
|
+
#++
|
|
31
|
+
#
|
|
32
|
+
|
|
33
|
+
require 'bio/pathway'
|
|
34
|
+
|
|
35
|
+
module Bio
|
|
36
|
+
|
|
37
|
+
# = Bio::GO
|
|
38
|
+
# Classes for Gene Ontology http://www.geneontology.org
|
|
39
|
+
class GO
|
|
40
|
+
|
|
41
|
+
# = Bio::GO::Ontology
|
|
42
|
+
#
|
|
43
|
+
# Container class for ontologies in the DAG Edit format.
|
|
44
|
+
#
|
|
45
|
+
# == Example
|
|
46
|
+
#
|
|
47
|
+
# c_data = File.open('component.oontology').read
|
|
48
|
+
# go_c = Bio::GO::Ontology.new(c_data)
|
|
49
|
+
# p go_c.bfs_shortest_path('0003673','0005632')
|
|
50
|
+
class Ontology < Bio::Pathway
|
|
51
|
+
|
|
52
|
+
# Bio::GO::Ontology.parse_ogids(line)
|
|
53
|
+
#
|
|
54
|
+
# Parsing GOID line in the DAGEdit format
|
|
55
|
+
# GO:ID[ ; GO:ID...]
|
|
56
|
+
def self.parse_goids(line)
|
|
57
|
+
goids = []
|
|
58
|
+
loop {
|
|
59
|
+
if /^ *[$%<]\S.+?;/ =~ line
|
|
60
|
+
endpoint = line.index(';') + 1
|
|
61
|
+
line = line[endpoint..line.size]
|
|
62
|
+
elsif /^,* GO:(\d{7}),*/ =~ line
|
|
63
|
+
goids << $1.clone
|
|
64
|
+
endpoint = line.index(goids.last) + goids.last.size
|
|
65
|
+
line = line[endpoint..line.size]
|
|
66
|
+
else
|
|
67
|
+
break
|
|
68
|
+
end
|
|
69
|
+
}
|
|
70
|
+
return goids
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Returns a Hash instance of the header lines in ontology flatfile.
|
|
74
|
+
attr_reader :header_lines
|
|
75
|
+
|
|
76
|
+
#
|
|
77
|
+
attr_reader :id2term
|
|
78
|
+
|
|
79
|
+
#
|
|
80
|
+
attr_reader :id2id
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# Bio::GO::Ontology.new(str)
|
|
84
|
+
# The DAG Edit format ontology data parser.
|
|
85
|
+
def initialize(str)
|
|
86
|
+
@id2term = {}
|
|
87
|
+
@header_lines = {}
|
|
88
|
+
@id2id = {}
|
|
89
|
+
adj_list = dag_edit_format_parser(str)
|
|
90
|
+
super(adj_list)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# Returns a GO_Term correspondig with the given GO_ID.
|
|
95
|
+
def goid2term(goid)
|
|
96
|
+
term = id2term[goid]
|
|
97
|
+
term = id2term[id2id[goid]] if term == nil
|
|
98
|
+
return term
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
private
|
|
102
|
+
|
|
103
|
+
# constructing adjaency list for the given ontology
|
|
104
|
+
def dag_edit_format_parser(str)
|
|
105
|
+
stack = []
|
|
106
|
+
adj_list = []
|
|
107
|
+
|
|
108
|
+
str.each {|line|
|
|
109
|
+
if /^!(.+?):\s+(\S.+)$/ =~ line # Parsing head lines
|
|
110
|
+
tag = $1
|
|
111
|
+
value = $2
|
|
112
|
+
tag.gsub!(/-/,'_')
|
|
113
|
+
next if tag == 'type'
|
|
114
|
+
instance_eval("@header_lines['#{tag}'] = '#{value}'")
|
|
115
|
+
next
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
case line
|
|
119
|
+
when /^( *)([$<%])(.+?) ; GO:(\d{7})(\n*)/ # GO Term ; GO:ID
|
|
120
|
+
depth = $1.length.to_i
|
|
121
|
+
rel = $2
|
|
122
|
+
term = $3
|
|
123
|
+
goid1 = goid = $4
|
|
124
|
+
en = $5
|
|
125
|
+
goids = parse_goids(line) # GO:ID[ ; GO:ID...]
|
|
126
|
+
synonyms = parse_synonyms(line) # synonym:Term[ ; synonym:Term...]
|
|
127
|
+
stack[depth] = goids.first
|
|
128
|
+
@id2term[goid] = term
|
|
129
|
+
|
|
130
|
+
next if depth == 0
|
|
131
|
+
|
|
132
|
+
goids.each {|goid|
|
|
133
|
+
@id2term[goid] = term
|
|
134
|
+
@id2id[goid] = goids.first
|
|
135
|
+
adj_list << Bio::Relation.new(stack[depth - 1], goid, rel)
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
if en == ""
|
|
139
|
+
loop {
|
|
140
|
+
case line
|
|
141
|
+
when /^\n$/
|
|
142
|
+
break
|
|
143
|
+
when /^ *([<%]) (.+?) ; GO:(\d{7})/ # <%GO Term ; GO:ID
|
|
144
|
+
rel1 = $1
|
|
145
|
+
term1 = $2
|
|
146
|
+
goid1 = $3
|
|
147
|
+
goids1 = parse_goids(line)
|
|
148
|
+
synonyms1 = parse_synonyms(line)
|
|
149
|
+
|
|
150
|
+
@id2term[goid1] = term1
|
|
151
|
+
goids.each {|goid|
|
|
152
|
+
adj_list << Bio::Relation.new(goid1, goid, rel1)
|
|
153
|
+
}
|
|
154
|
+
else
|
|
155
|
+
break
|
|
156
|
+
end
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
}
|
|
161
|
+
return adj_list
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# Returns an ary of GO IDs by parsing an entry line in the DAG Edit
|
|
166
|
+
# format.
|
|
167
|
+
def parse_goids(line)
|
|
168
|
+
Ontology.parse_goids(line)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Bio::GO::Ontology#parse_synonyms(line)
|
|
172
|
+
def parse_synonyms(line)
|
|
173
|
+
synonyms = []
|
|
174
|
+
loop {
|
|
175
|
+
if / ; synonym:(\S.+?) *[;<%\n]/ =~ line
|
|
176
|
+
synonyms << $1.clone
|
|
177
|
+
endpoint = line.index(synonyms.last) + synonyms.last.size
|
|
178
|
+
line = line[endpoint..line.size]
|
|
179
|
+
else
|
|
180
|
+
break
|
|
181
|
+
end
|
|
182
|
+
}
|
|
183
|
+
return synonyms
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
end # class Ontology
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
# = Bio::GO::GeneAssociation
|
|
191
|
+
# $CVSROOT/go/gene-associations/gene_association.*
|
|
192
|
+
#
|
|
193
|
+
# Data parser for the gene_association go annotation.
|
|
194
|
+
# See also the file format http://www.geneontology.org/doc/GO.annotation.html#file
|
|
195
|
+
#
|
|
196
|
+
# == Example
|
|
197
|
+
#
|
|
198
|
+
# mgi_data = File.open('gene_association.mgi').read
|
|
199
|
+
# mgi = Bio::GO::GeneAssociation.parser(mgi_data)
|
|
200
|
+
#
|
|
201
|
+
# Bio::GO::GeneAssociation.parser(mgi_data) do |entry|
|
|
202
|
+
# p [entry.entry_id, entry.evidence, entry.goid]
|
|
203
|
+
# end
|
|
204
|
+
#
|
|
205
|
+
class GeneAssociation # < Bio::DB
|
|
206
|
+
|
|
207
|
+
# Delimiter
|
|
208
|
+
DELIMITER = "\n"
|
|
209
|
+
|
|
210
|
+
# Delimiter
|
|
211
|
+
RS = DELIMITER
|
|
212
|
+
|
|
213
|
+
# Retruns an Array of parsed gene_association flatfile.
|
|
214
|
+
# Block is acceptable.
|
|
215
|
+
def self.parser(str)
|
|
216
|
+
if block_given?
|
|
217
|
+
str.each(DELIMITER) {|line|
|
|
218
|
+
next if /^!/ =~ line
|
|
219
|
+
yield GeneAssociation.new(line)
|
|
220
|
+
}
|
|
221
|
+
else
|
|
222
|
+
galist = []
|
|
223
|
+
str.each(DELIMITER) {|line|
|
|
224
|
+
next if /^!/ =~ line
|
|
225
|
+
galist << GeneAssociation.new(line)
|
|
226
|
+
}
|
|
227
|
+
return galist
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Returns DB variable.
|
|
232
|
+
attr_reader :db # -> aStr
|
|
233
|
+
|
|
234
|
+
# Returns Db_Object_Id variable. Alias to entry_id.
|
|
235
|
+
attr_reader :db_object_id # -> aStr
|
|
236
|
+
|
|
237
|
+
# Returns Db_Object_Symbol variable.
|
|
238
|
+
attr_reader :db_object_symbol
|
|
239
|
+
|
|
240
|
+
# Returns Db_Object_Name variable.
|
|
241
|
+
attr_reader :qualifier
|
|
242
|
+
|
|
243
|
+
# Returns Db_Reference variable.
|
|
244
|
+
attr_reader :db_reference # -> []
|
|
245
|
+
|
|
246
|
+
# Retruns Evidence code variable.
|
|
247
|
+
attr_reader :evidence
|
|
248
|
+
|
|
249
|
+
# Returns the entry is associated with this value.
|
|
250
|
+
attr_reader :with # -> []
|
|
251
|
+
|
|
252
|
+
# Returns Aspect valiable.
|
|
253
|
+
attr_reader :aspect
|
|
254
|
+
|
|
255
|
+
#
|
|
256
|
+
attr_reader :db_object_name
|
|
257
|
+
|
|
258
|
+
#
|
|
259
|
+
attr_reader :db_object_synonym # -> []
|
|
260
|
+
|
|
261
|
+
# Returns Db_Object_Type variable.
|
|
262
|
+
attr_reader :db_object_type
|
|
263
|
+
|
|
264
|
+
# Returns Taxon variable.
|
|
265
|
+
attr_reader :taxon
|
|
266
|
+
|
|
267
|
+
# Returns Date variable.
|
|
268
|
+
attr_reader :date
|
|
269
|
+
|
|
270
|
+
#
|
|
271
|
+
attr_reader :assigned_by
|
|
272
|
+
|
|
273
|
+
alias entry_id db_object_id
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# Parsing an entry (in a line) in the gene_association flatfile.
|
|
277
|
+
def initialize(entry)
|
|
278
|
+
tmp = entry.chomp.split(/\t/)
|
|
279
|
+
@db = tmp[0]
|
|
280
|
+
@db_object_id = tmp[1]
|
|
281
|
+
@db_object_symbol = tmp[2]
|
|
282
|
+
@qualifier = tmp[3] #
|
|
283
|
+
@goid = tmp[4]
|
|
284
|
+
@db_reference = tmp[5].split(/\|/) #
|
|
285
|
+
@evidence = tmp[6]
|
|
286
|
+
@with = tmp[7].split(/\|/) #
|
|
287
|
+
@aspect = tmp[8]
|
|
288
|
+
@db_object_name = tmp[9] #
|
|
289
|
+
@db_object_synonym = tmp[10].split(/\|/) #
|
|
290
|
+
@db_object_type = tmp[11]
|
|
291
|
+
@taxon = tmp[12] # taxon:4932
|
|
292
|
+
@date = tmp[13] # 20010118
|
|
293
|
+
@assigned_by = tmp[14]
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# Returns GO_ID in /\d{7}/ format. Giving not nil arg, returns
|
|
298
|
+
# /GO:\d{7}/ style.
|
|
299
|
+
#
|
|
300
|
+
# * Bio::GO::GeneAssociation#goid -> "001234"
|
|
301
|
+
# * Bio::GO::GeneAssociation#goid(true) -> "GO:001234"
|
|
302
|
+
def goid(org = nil)
|
|
303
|
+
if org
|
|
304
|
+
@goid
|
|
305
|
+
else
|
|
306
|
+
@goid.sub('GO:','')
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Bio::GO::GeneAssociation#to_str -> a line of gene_association file.
|
|
311
|
+
def to_str
|
|
312
|
+
return [@db, @db_object_id, @db_object_symbol, @quialifier, @goid,
|
|
313
|
+
@qualifier.join("|"), @evidence, @with.join("|"), @aspect,
|
|
314
|
+
@db_object_name, @db_object_synonym.join("|"), @db_object_type,
|
|
315
|
+
@taxon, @date, @assigned_by].join("\t")
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
end # class GeneAssociation
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# = Container class for files in geneontology.org/go/external2go/*2go.
|
|
323
|
+
#
|
|
324
|
+
# The line syntax is:
|
|
325
|
+
#
|
|
326
|
+
# database:<identifier> > GO:<term> ; GO:<GO_id>
|
|
327
|
+
#
|
|
328
|
+
# == Example
|
|
329
|
+
#
|
|
330
|
+
# spkw2go = Bio::GO::External2go.new(File.read("spkw2go"))
|
|
331
|
+
# spkw2go.size
|
|
332
|
+
# spkw2go.each do |relation|
|
|
333
|
+
# relation # -> {:db => "", :db_id => "", :go_term => "", :go_id => ""}
|
|
334
|
+
# end
|
|
335
|
+
# spkw2go.dbs
|
|
336
|
+
#
|
|
337
|
+
# == SAMPLE
|
|
338
|
+
# !date: 2005/02/08 18:02:54
|
|
339
|
+
# !Mapping of SWISS-PROT KEYWORDS to GO terms.
|
|
340
|
+
# !Evelyn Camon, SWISS-PROT.
|
|
341
|
+
# !
|
|
342
|
+
# SP_KW:ATP synthesis > GO:ATP biosynthesis ; GO:0006754
|
|
343
|
+
# ...
|
|
344
|
+
#
|
|
345
|
+
class External2go < Array
|
|
346
|
+
|
|
347
|
+
# Returns aHash of the external2go header information
|
|
348
|
+
attr_reader :header
|
|
349
|
+
|
|
350
|
+
# Constructor from parsing external2go file.
|
|
351
|
+
def self.parser(str)
|
|
352
|
+
e2g = self.new
|
|
353
|
+
str.each_line do |line|
|
|
354
|
+
line.chomp!
|
|
355
|
+
if line =~ /^\!date: (.+)/
|
|
356
|
+
e2g.header[:date] = $1
|
|
357
|
+
elsif line =~ /^\!(.*)/
|
|
358
|
+
e2g.header[:desc] << $1
|
|
359
|
+
elsif ary = line.scan(/^(.+?):(.+) > GO:(.+) ; (GO:\d{7})/).first
|
|
360
|
+
e2g << {:db_id => ary[1], :db => ary[0], :go_term => ary[2], :go_id => ary[3]}
|
|
361
|
+
else
|
|
362
|
+
raise("Invalid Format Line: \n #{line.inspect}\n")
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
return e2g
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# Constructor.
|
|
370
|
+
# relation := {:db => aStr, :db_id => aStr, :go_term => aStr, :go_id => aStr}
|
|
371
|
+
def initialize
|
|
372
|
+
@header = {:date => '', :desc => []}
|
|
373
|
+
super
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
# Bio::GO::External2go#set_date(value)
|
|
378
|
+
def set_date(value)
|
|
379
|
+
@header[:date] = value
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
# Bio::GO::External2go#set_desc(ary)
|
|
384
|
+
def set_desc(ary)
|
|
385
|
+
@header[:desc] = ary
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# Bio::GO::External2go#to_str
|
|
390
|
+
# Returns the contents in the external2go format.
|
|
391
|
+
def to_str
|
|
392
|
+
["!date: #{@header[:date]}",
|
|
393
|
+
@header[:desc].map {|e| "!#{e}" },
|
|
394
|
+
self.map { |e| [e[:db], ':', e[:db_id], ' > GO:', e[:go_term], ' ; ', e[:go_id]].join }
|
|
395
|
+
].join("\n")
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# Returns ary of databases.
|
|
400
|
+
def dbs
|
|
401
|
+
self.map {|rel| rel[:db] }.uniq
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
# Returns ary of database IDs.
|
|
406
|
+
def db_ids
|
|
407
|
+
self.map {|rel| rel[:db_id] }.uniq
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Returns ary of GO Terms.
|
|
411
|
+
def go_terms
|
|
412
|
+
self.map {|rel| rel[:go_term] }.uniq
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
# Returns ary of GO IDs.
|
|
416
|
+
def go_ids
|
|
417
|
+
self.map {|rel| rel[:go_id] }.uniq
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
end # class External2go
|
|
421
|
+
|
|
422
|
+
end # class GO
|
|
423
|
+
|
|
424
|
+
end # module Bio
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
if __FILE__ == $0
|
|
431
|
+
|
|
432
|
+
require 'net/http'
|
|
433
|
+
|
|
434
|
+
def wget(url)
|
|
435
|
+
if /http:\/\/(.+?)\// =~ url
|
|
436
|
+
host = $1
|
|
437
|
+
path = url[(url.index(host) + host.size)..url.size]
|
|
438
|
+
else
|
|
439
|
+
raise ArgumentError, "Invalid URL\n#{url}"
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
result = Net::HTTP.new(host).get(path).body
|
|
443
|
+
end
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
go_c_url = 'http://www.geneontology.org/ontology/component.ontology'
|
|
448
|
+
ga_url = 'http://www.geneontology.org/gene-associations/gene_association.sgd.gz'
|
|
449
|
+
e2g_url = 'http://www.geneontology.org/external2go/spkw2go'
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
puts "\n #==> Bio::GO::Ontology"
|
|
454
|
+
p go_c_url
|
|
455
|
+
component_ontology = wget(go_c_url)
|
|
456
|
+
comp = Bio::GO::Ontology.new(component_ontology)
|
|
457
|
+
|
|
458
|
+
[['0003673', '0005632'],
|
|
459
|
+
['0003673', '0005619'],
|
|
460
|
+
['0003673', '0004649']].each {|pair|
|
|
461
|
+
puts
|
|
462
|
+
p pair
|
|
463
|
+
p [:pair, pair.map {|i| [comp.id2term[i], comp.goid2term(i)] }]
|
|
464
|
+
puts "\n #==> comp.bfs_shortest_path(pair[0], pair[1])"
|
|
465
|
+
p comp.bfs_shortest_path(pair[0], pair[1])
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
puts "\n #==> Bio::GO::External2go"
|
|
470
|
+
p e2g_url
|
|
471
|
+
spkw2go = Bio::GO::External2go.new(wget(e2g_url))
|
|
472
|
+
|
|
473
|
+
puts "\n #==> spkw2go.db"
|
|
474
|
+
p spkw2go.db
|
|
475
|
+
|
|
476
|
+
puts "\n #==> spkw2go[1]"
|
|
477
|
+
p spkw2go[1]
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
require 'zlib'
|
|
482
|
+
puts "\n #==> Bio::GO::GeenAssociation"
|
|
483
|
+
p ga_url
|
|
484
|
+
ga = Zlib::Inflate.inflate(wget(ga_url))
|
|
485
|
+
ga = Bio::GO::GeneAssociation.parser(ga)
|
|
486
|
+
|
|
487
|
+
puts "\n #==> ga.size"
|
|
488
|
+
p ga.size
|
|
489
|
+
|
|
490
|
+
puts "\n #==> ga[100]"
|
|
491
|
+
p ga[100]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
end
|