bio 1.4.3.0001 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/extconf.rb
DELETED
data/lib/bio/appl/blast/ddbj.rb
DELETED
@@ -1,131 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# = bio/appl/blast/ddbj.rb - Remote BLAST wrapper using DDBJ web service
|
3
|
-
#
|
4
|
-
# Copyright:: Copyright (C) 2008 Naohisa Goto <ng@bioruby.org>
|
5
|
-
# License:: The Ruby License
|
6
|
-
#
|
7
|
-
|
8
|
-
require 'bio/io/ddbjrest'
|
9
|
-
|
10
|
-
module Bio::Blast::Remote
|
11
|
-
|
12
|
-
# Remote BLAST factory using DDBJ Web API for Biology
|
13
|
-
# (http://xml.nig.ac.jp/).
|
14
|
-
#
|
15
|
-
module DDBJ
|
16
|
-
|
17
|
-
# Creates a remote BLAST factory using DDBJ.
|
18
|
-
# Returns Bio::Blast object.
|
19
|
-
#
|
20
|
-
# Note for future improvement: In the future, it might return
|
21
|
-
# Bio::Blast::Remote::DDBJ or other object.
|
22
|
-
#
|
23
|
-
def self.new(program, db, options = [])
|
24
|
-
Bio::Blast.new(program, db, options, 'ddbj')
|
25
|
-
end
|
26
|
-
|
27
|
-
# Information about DDBJ BLAST.
|
28
|
-
module Information
|
29
|
-
|
30
|
-
include Bio::Blast::Remote::Information
|
31
|
-
|
32
|
-
# (private) parse database information
|
33
|
-
def _parse_databases
|
34
|
-
if defined? @parse_databases
|
35
|
-
return nil if @parse_databases
|
36
|
-
end
|
37
|
-
drv = Bio::DDBJ::REST::Blast.new
|
38
|
-
str = drv.getSupportDatabaseList
|
39
|
-
|
40
|
-
databases = {}
|
41
|
-
dbdescs = {}
|
42
|
-
keys = [ 'blastn', 'blastp' ]
|
43
|
-
keys.each do |key|
|
44
|
-
databases[key] ||= []
|
45
|
-
dbdescs[key] ||= {}
|
46
|
-
end
|
47
|
-
prefix = ''
|
48
|
-
prefix_count = 0
|
49
|
-
str.each_line do |line|
|
50
|
-
a = line.strip.split(/\s*\-\s*/, 2)
|
51
|
-
case a.size
|
52
|
-
when 1
|
53
|
-
prefix = a[0].to_s.strip
|
54
|
-
prefix += ': ' unless prefix.empty?
|
55
|
-
prefix_count = 0
|
56
|
-
next #each_line
|
57
|
-
when 0
|
58
|
-
prefix = '' if prefix_count > 0
|
59
|
-
next #each_line
|
60
|
-
end
|
61
|
-
name = a[0].to_s.strip.freeze
|
62
|
-
desc = a[1].to_s.strip
|
63
|
-
key = case desc
|
64
|
-
when /\(NT\)\s*$/
|
65
|
-
'blastn'
|
66
|
-
when /\(AA\)\s*$/
|
67
|
-
'blastp'
|
68
|
-
else
|
69
|
-
warn "DDBJ BLAST: could not determine the database is NT or AA: #{line.chomp}" if $VERBOSE
|
70
|
-
next #each_line
|
71
|
-
end
|
72
|
-
desc = (prefix + desc).freeze
|
73
|
-
prefix_count += 1
|
74
|
-
databases[key].push name
|
75
|
-
dbdescs[key][name] = desc
|
76
|
-
end
|
77
|
-
|
78
|
-
databases['blastp'] ||= []
|
79
|
-
dbdescs['blastp'] ||= []
|
80
|
-
|
81
|
-
databases['blastn'].freeze
|
82
|
-
databases['blastp'].freeze
|
83
|
-
|
84
|
-
databases['blastx'] = databases['blastp']
|
85
|
-
dbdescs['blastx'] = dbdescs['blastp']
|
86
|
-
databases['tblastn'] = databases['blastn']
|
87
|
-
dbdescs['tblastn'] = dbdescs['blastn']
|
88
|
-
databases['tblastx'] = databases['blastn']
|
89
|
-
dbdescs['tblastx'] = dbdescs['blastn']
|
90
|
-
|
91
|
-
@databases = databases
|
92
|
-
@database_descriptions = dbdescs
|
93
|
-
@parse_databases = true
|
94
|
-
true
|
95
|
-
end
|
96
|
-
private :_parse_databases
|
97
|
-
|
98
|
-
end #module Information
|
99
|
-
|
100
|
-
extend Information
|
101
|
-
|
102
|
-
# executes BLAST and returns result as a string
|
103
|
-
def exec_ddbj(query)
|
104
|
-
options = make_command_line_options
|
105
|
-
opt = Bio::Blast::NCBIOptions.new(options)
|
106
|
-
|
107
|
-
# REST objects are cached
|
108
|
-
@ddbj_remote_blast ||= Bio::DDBJ::REST::Blast.new
|
109
|
-
@ddbj_request_manager ||= Bio::DDBJ::REST::RequestManager.new
|
110
|
-
|
111
|
-
program = opt.delete('-p')
|
112
|
-
db = opt.delete('-d')
|
113
|
-
optstr = Bio::Command.make_command_line_unix(opt.options)
|
114
|
-
|
115
|
-
# using searchParamAsync
|
116
|
-
qid = @ddbj_remote_blast.searchParamAsync(program, db, query, optstr)
|
117
|
-
@output = qid
|
118
|
-
|
119
|
-
result = @ddbj_request_manager.wait_getAsyncResult(qid)
|
120
|
-
|
121
|
-
@output = result
|
122
|
-
return @output
|
123
|
-
end
|
124
|
-
|
125
|
-
end #module DDBJ
|
126
|
-
|
127
|
-
# for lazy load DDBJ module
|
128
|
-
Ddbj = DDBJ
|
129
|
-
|
130
|
-
end #module Bio::Blast::Remote
|
131
|
-
|
data/lib/bio/db/kegg/taxonomy.rb
DELETED
@@ -1,280 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# = bio/db/kegg/taxonomy.rb - KEGG taxonomy parser class
|
3
|
-
#
|
4
|
-
# Copyright:: Copyright (C) 2007 Toshiaki Katayama <k@bioruby.org>
|
5
|
-
# License:: The Ruby License
|
6
|
-
#
|
7
|
-
# $Id:$
|
8
|
-
#
|
9
|
-
|
10
|
-
module Bio
|
11
|
-
class KEGG
|
12
|
-
|
13
|
-
# == Description
|
14
|
-
#
|
15
|
-
# Parse the KEGG 'taxonomy' file which describes taxonomic classification
|
16
|
-
# of organisms.
|
17
|
-
#
|
18
|
-
# == References
|
19
|
-
#
|
20
|
-
# The KEGG 'taxonomy' file is available at
|
21
|
-
#
|
22
|
-
# * ftp://ftp.genome.jp/pub/kegg/genes/taxonomy
|
23
|
-
#
|
24
|
-
class Taxonomy
|
25
|
-
|
26
|
-
def initialize(filename, orgs = [])
|
27
|
-
# Stores the taxonomic tree as a linked list (implemented in Hash), so
|
28
|
-
# every node need to have unique name (key) to work correctly
|
29
|
-
@tree = Hash.new
|
30
|
-
|
31
|
-
# Also stores the taxonomic tree as a list of arrays (full path)
|
32
|
-
@path = Array.new
|
33
|
-
|
34
|
-
# Also stores all leaf nodes (organism codes) of every intermediate nodes
|
35
|
-
@leaves = Hash.new
|
36
|
-
|
37
|
-
# tentative name for the root node (use accessor to change)
|
38
|
-
@root = 'Genes'
|
39
|
-
|
40
|
-
hier = Array.new
|
41
|
-
level = 0
|
42
|
-
label = nil
|
43
|
-
|
44
|
-
File.open(filename).each do |line|
|
45
|
-
next if line.strip.empty?
|
46
|
-
|
47
|
-
# line for taxonomic hierarchy (indent according to the number of # marks)
|
48
|
-
if line[/^#/]
|
49
|
-
level = line[/^#+/].length
|
50
|
-
label = line[/[A-z].*/]
|
51
|
-
hier[level] = sanitize(label)
|
52
|
-
|
53
|
-
# line for organims name (unify different strains of a species)
|
54
|
-
else
|
55
|
-
tax, org, name, desc = line.chomp.split("\t")
|
56
|
-
if orgs.nil? or orgs.empty? or orgs.include?(org)
|
57
|
-
species, strain, = name.split('_')
|
58
|
-
# (0) Grouping of the strains of the same species.
|
59
|
-
# If the name of species is the same as the previous line,
|
60
|
-
# add the species to the same species group.
|
61
|
-
# ex. Gamma/enterobacteria has a large number of organisms,
|
62
|
-
# so sub grouping of strains is needed for E.coli strains etc.
|
63
|
-
#
|
64
|
-
# However, if the species name is already used, need to avoid
|
65
|
-
# collision of species name as the current implementation stores
|
66
|
-
# the tree as a Hash, which may cause the infinite loop.
|
67
|
-
#
|
68
|
-
# (1) If species name == the intermediate node of other lineage
|
69
|
-
# Add '_sp' to the species name to avoid the conflict (1-1), and if
|
70
|
-
# 'species_sp' is already taken, use 'species_strain' instead (1-2).
|
71
|
-
# ex. Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
|
72
|
-
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_ATCC33889/tdn
|
73
|
-
# -> Bacteria/Proteobacteria/Beta/T.denitrificans/tbd
|
74
|
-
# Bacteria/Proteobacteria/Epsilon/T.denitrificans_sp/tdn
|
75
|
-
#
|
76
|
-
# (2) If species name == the intermediate node of the same lineage
|
77
|
-
# Add '_sp' to the species name to avoid the conflict.
|
78
|
-
# ex. Bacteria/Cyanobacgteria/Cyanobacteria_CYA/cya
|
79
|
-
# Bacteria/Cyanobacgteria/Cyanobacteria_CYB/cya
|
80
|
-
# Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_MC1/mgm
|
81
|
-
# -> Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
|
82
|
-
# Bacteria/Cyanobacgteria/Cyanobacteria_sp/cya
|
83
|
-
# Bacteria/Proteobacteria/Magnetococcus/Magnetococcus_sp/mgm
|
84
|
-
sp_group = "#{species}_sp"
|
85
|
-
if @tree[species]
|
86
|
-
if hier[level+1] == species
|
87
|
-
# case (0)
|
88
|
-
else
|
89
|
-
# case (1-1)
|
90
|
-
species = sp_group
|
91
|
-
# case (1-2)
|
92
|
-
if @tree[sp_group] and hier[level+1] != species
|
93
|
-
species = name
|
94
|
-
end
|
95
|
-
end
|
96
|
-
else
|
97
|
-
if hier[level] == species
|
98
|
-
# case (2)
|
99
|
-
species = sp_group
|
100
|
-
end
|
101
|
-
end
|
102
|
-
# 'hier' is an array of the taxonomic tree + species and strain name.
|
103
|
-
# ex. [nil, Eukaryotes, Fungi, Ascomycetes, Saccharomycetes] +
|
104
|
-
# [S_cerevisiae, sce]
|
105
|
-
hier[level+1] = species # sanitize(species)
|
106
|
-
hier[level+2] = org
|
107
|
-
ary = hier[1, level+2]
|
108
|
-
warn ary.inspect if $DEBUG
|
109
|
-
add_to_tree(ary)
|
110
|
-
add_to_leaves(ary)
|
111
|
-
add_to_path(ary)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
return tree
|
116
|
-
end
|
117
|
-
|
118
|
-
attr_reader :tree
|
119
|
-
attr_reader :path
|
120
|
-
attr_reader :leaves
|
121
|
-
attr_accessor :root
|
122
|
-
|
123
|
-
def organisms(group)
|
124
|
-
@leaves[group]
|
125
|
-
end
|
126
|
-
|
127
|
-
# Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
|
128
|
-
# and every intermediate nodes stores their child nodes as a Hash.
|
129
|
-
def add_to_tree(ary)
|
130
|
-
parent = @root
|
131
|
-
ary.each do |node|
|
132
|
-
@tree[parent] ||= Hash.new
|
133
|
-
@tree[parent][node] = nil
|
134
|
-
parent = node
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
# Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
|
139
|
-
# and stores leaf nodes to the every intermediate nodes as an Array.
|
140
|
-
def add_to_leaves(ary)
|
141
|
-
leaf = ary.last
|
142
|
-
ary.each do |node|
|
143
|
-
@leaves[node] ||= Array.new
|
144
|
-
@leaves[node] << leaf
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
# Add a new path [node, subnode, subsubnode, ..., leaf] under the root node
|
149
|
-
# and stores the path itself in an Array.
|
150
|
-
def add_to_path(ary)
|
151
|
-
@path << ary
|
152
|
-
end
|
153
|
-
|
154
|
-
# Compaction of intermediate nodes of the resulted taxonomic tree.
|
155
|
-
# - If child node has only one child node (grandchild), make the child of
|
156
|
-
# grandchild as a grandchild.
|
157
|
-
# ex.
|
158
|
-
# Plants / Monocotyledons / grass family / osa
|
159
|
-
# --> Plants / Monocotyledons / osa
|
160
|
-
#
|
161
|
-
def compact(node = root)
|
162
|
-
# if the node has children
|
163
|
-
if subnodes = @tree[node]
|
164
|
-
# obtain grandchildren for each child
|
165
|
-
subnodes.keys.each do |subnode|
|
166
|
-
if subsubnodes = @tree[subnode]
|
167
|
-
# if the number of grandchild node is 1
|
168
|
-
if subsubnodes.keys.size == 1
|
169
|
-
# obtain the name of the grandchild node
|
170
|
-
subsubnode = subsubnodes.keys.first
|
171
|
-
# obtain the child of the grandchlid node
|
172
|
-
if subsubsubnodes = @tree[subsubnode]
|
173
|
-
# make the child of grandchild node as a chlid of child node
|
174
|
-
@tree[subnode] = subsubsubnodes
|
175
|
-
# delete grandchild node
|
176
|
-
@tree[subnode].delete(subsubnode)
|
177
|
-
warn "--- compact: #{subsubnode} is replaced by #{subsubsubnodes}" if $DEBUG
|
178
|
-
# retry until new grandchild also needed to be compacted.
|
179
|
-
retry
|
180
|
-
end
|
181
|
-
end
|
182
|
-
end
|
183
|
-
# repeat recurseively
|
184
|
-
compact(subnode)
|
185
|
-
end
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
# Reduction of the leaf node of the resulted taxonomic tree.
|
190
|
-
# - If the parent node have only one leaf node, replace parent node
|
191
|
-
# with the leaf node.
|
192
|
-
# ex.
|
193
|
-
# Plants / Monocotyledons / osa
|
194
|
-
# --> Plants / osa
|
195
|
-
#
|
196
|
-
def reduce(node = root)
|
197
|
-
# if the node has children
|
198
|
-
if subnodes = @tree[node]
|
199
|
-
# obtain grandchildren for each child
|
200
|
-
subnodes.keys.each do |subnode|
|
201
|
-
if subsubnodes = @tree[subnode]
|
202
|
-
# if the number of grandchild node is 1
|
203
|
-
if subsubnodes.keys.size == 1
|
204
|
-
# obtain the name of the grandchild node
|
205
|
-
subsubnode = subsubnodes.keys.first
|
206
|
-
# if the grandchild node is a leaf node
|
207
|
-
unless @tree[subsubnode]
|
208
|
-
# make the grandchild node as a child node
|
209
|
-
@tree[node].update(subsubnodes)
|
210
|
-
# delete child node
|
211
|
-
@tree[node].delete(subnode)
|
212
|
-
warn "--- reduce: #{subnode} is replaced by #{subsubnode}" if $DEBUG
|
213
|
-
end
|
214
|
-
end
|
215
|
-
end
|
216
|
-
# repeat recursively
|
217
|
-
reduce(subnode)
|
218
|
-
end
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
# Traverse the taxonomic tree by the depth first search method
|
223
|
-
# under the given (root or intermediate) node.
|
224
|
-
def dfs(parent, &block)
|
225
|
-
if children = @tree[parent]
|
226
|
-
yield parent, children
|
227
|
-
children.keys.each do |child|
|
228
|
-
dfs(child, &block)
|
229
|
-
end
|
230
|
-
end
|
231
|
-
end
|
232
|
-
|
233
|
-
# Similar to the dfs method but also passes the current level of the nest
|
234
|
-
# to the iterator.
|
235
|
-
def dfs_with_level(parent, &block)
|
236
|
-
@level ||= 0
|
237
|
-
if children = @tree[parent]
|
238
|
-
yield parent, children, @level
|
239
|
-
@level += 1
|
240
|
-
children.keys.each do |child|
|
241
|
-
dfs_with_level(child, &block)
|
242
|
-
end
|
243
|
-
@level -= 1
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
# Convert the taxonomic tree structure to a simple ascii art.
|
248
|
-
def to_s
|
249
|
-
result = "#{@root}\n"
|
250
|
-
@tree[@root].keys.each do |node|
|
251
|
-
result += ascii_tree(node, " ")
|
252
|
-
end
|
253
|
-
return result
|
254
|
-
end
|
255
|
-
|
256
|
-
private
|
257
|
-
|
258
|
-
# Helper method for the to_s method.
|
259
|
-
def ascii_tree(node, indent)
|
260
|
-
result = "#{indent}+- #{node}\n"
|
261
|
-
indent += " "
|
262
|
-
@tree[node].keys.each do |child|
|
263
|
-
if @tree[child]
|
264
|
-
result += ascii_tree(child, indent)
|
265
|
-
else
|
266
|
-
result += "#{indent}+- #{child}\n"
|
267
|
-
end
|
268
|
-
end
|
269
|
-
return result
|
270
|
-
end
|
271
|
-
|
272
|
-
def sanitize(str)
|
273
|
-
str.gsub(/[^A-z0-9]/, '_')
|
274
|
-
end
|
275
|
-
|
276
|
-
end # Taxonomy
|
277
|
-
|
278
|
-
end # KEGG
|
279
|
-
end # Bio
|
280
|
-
|
data/lib/bio/io/dbget.rb
DELETED
@@ -1,194 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# = bio/io/dbget.rb - GenomeNet/DBGET client module
|
3
|
-
#
|
4
|
-
# Copyright:: Copyright (C) 2000, 2001
|
5
|
-
# Mitsuteru C. Nakao <n@bioruby.org>,
|
6
|
-
# Toshiaki Katayama <k@bioruby.org>
|
7
|
-
# License:: The Ruby License
|
8
|
-
#
|
9
|
-
# $Id: dbget.rb,v 1.13 2007/04/05 23:35:41 trevor Exp $
|
10
|
-
#
|
11
|
-
# == DBGET
|
12
|
-
#
|
13
|
-
# Accessing the GenomeNet/DBGET data retrieval system
|
14
|
-
# http://www.genome.jp/dbget/ within the intranet.
|
15
|
-
#
|
16
|
-
|
17
|
-
require 'socket'
|
18
|
-
|
19
|
-
module Bio
|
20
|
-
|
21
|
-
class DBGET
|
22
|
-
|
23
|
-
# default DBGET server address
|
24
|
-
# SERV = "dbgetserv.genome.jp"
|
25
|
-
SERV = "dbget.genome.jp"
|
26
|
-
# default DBGET port number
|
27
|
-
PORT = "3266"
|
28
|
-
|
29
|
-
# Main class method to access DBGET server. Optionally, this method
|
30
|
-
# can be called with the alternative DBGET server address and the
|
31
|
-
# TCP/IP port number.
|
32
|
-
#
|
33
|
-
# 'com' should be one of the following DBGET commands:
|
34
|
-
#
|
35
|
-
# * alink, bfind, bget, binfo, blink, bman, bref, btab, btit
|
36
|
-
#
|
37
|
-
# These methods are shortcut for the dbget commands. Actually,
|
38
|
-
# Bio::DBGET.((|com|))(arg) internally calls Bio::DBGET.dbget(com, arg).
|
39
|
-
# Most of these methods accept the argument "-h" for help.
|
40
|
-
#
|
41
|
-
# 'arg' should be one of the following formats :
|
42
|
-
#
|
43
|
-
# * [options] db
|
44
|
-
# * specify the database name only for binfo, bman etc.
|
45
|
-
# * [options] db:entry
|
46
|
-
# * specify the database name and the entry name to retrieve.
|
47
|
-
# * [options] db entry1 entry2 ...
|
48
|
-
# * specify the database name and the list of entries to retrieve.
|
49
|
-
#
|
50
|
-
# Note that options in the above example can be omitted. If 'arg' is
|
51
|
-
# empty, the help message with a list of options for 'com' will be
|
52
|
-
# shown by default. Supported database names will be found at the
|
53
|
-
# GenomeNet DBGET web page http://www.genome.jp/dbget/.
|
54
|
-
#
|
55
|
-
def DBGET.dbget(com, arg, serv = nil, port = nil)
|
56
|
-
|
57
|
-
unless serv or port # if both of serv and port are nil
|
58
|
-
if ENV["DBGET"] =~ /:/ # and ENV["DBGET"] exists
|
59
|
-
serv, port = ENV["DBGET"].split(':')
|
60
|
-
end
|
61
|
-
end
|
62
|
-
serv = serv ? serv : SERV
|
63
|
-
port = port ? port : PORT
|
64
|
-
|
65
|
-
if arg.empty?
|
66
|
-
arg = "-h" # DBGET help message
|
67
|
-
end
|
68
|
-
|
69
|
-
query = "#{com} #{arg}\n" # DBGET query string
|
70
|
-
|
71
|
-
sock = TCPSocket.open("#{serv}", "#{port}")
|
72
|
-
|
73
|
-
sock.write(query) # submit query
|
74
|
-
sock.flush # buffer flush
|
75
|
-
|
76
|
-
sock.gets # skip "+Helo DBgetServ ..."
|
77
|
-
sock.gets # skip "#If you see this message, ..."
|
78
|
-
sock.gets # skip "*Request-IDent"
|
79
|
-
|
80
|
-
result = sock.read # DBGET result
|
81
|
-
|
82
|
-
sock.close
|
83
|
-
|
84
|
-
return result
|
85
|
-
end
|
86
|
-
|
87
|
-
# Show the version information of the DBGET server.
|
88
|
-
def DBGET.version
|
89
|
-
dbget("bget", "-V")
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
#--
|
94
|
-
# bacc("db entry") - not supported : get accession(s)
|
95
|
-
# bent("db entry") - not supported : get entry name
|
96
|
-
# lmarge("db entry") - not supported
|
97
|
-
#++
|
98
|
-
|
99
|
-
# alink("db entry") method returns relations
|
100
|
-
def DBGET.alink(arg)
|
101
|
-
dbget("alink", arg)
|
102
|
-
end
|
103
|
-
|
104
|
-
# bfind("db keyword") method searches entries by keyword
|
105
|
-
def DBGET.bfind(arg)
|
106
|
-
dbget("bfind", arg)
|
107
|
-
end
|
108
|
-
|
109
|
-
# bget("db entry") method retrieves entries specified by the entry names
|
110
|
-
def DBGET.bget(arg)
|
111
|
-
dbget("bget", arg)
|
112
|
-
end
|
113
|
-
|
114
|
-
# seq("db entry") method retrieves the first sequence of the entry
|
115
|
-
#
|
116
|
-
# Shortcut to retrieve the sequence of the entry in FASTA format.
|
117
|
-
# This method is equivalent to Bio::DBGET.bget("-f -n 1 #{arg}") and
|
118
|
-
# 'arg' should be the "db:entry" or "db entry1 entry2 ..." format.
|
119
|
-
def DBGET.seq(arg)
|
120
|
-
dbget("bget", "-f -n 1 #{arg}")
|
121
|
-
end
|
122
|
-
|
123
|
-
# seq2("db entry") method retrieves the second sequence of the entry if any
|
124
|
-
#
|
125
|
-
# Shortcut to retrieve the second sequence of the entry in FASTA format.
|
126
|
-
# This method is equivalent to Bio::DBGET.bget("-f -n 2 #{arg}").
|
127
|
-
# Only useful when treating the KEGG GENES database entries which have
|
128
|
-
# both AASEQ and NTSEQ fields. This method is obsolete and it is
|
129
|
-
# recommended to use 'naseq' and 'aaseq' instead.
|
130
|
-
def DBGET.seq2(arg)
|
131
|
-
dbget("bget", "-f -n 2 #{arg}")
|
132
|
-
end
|
133
|
-
|
134
|
-
# naseq("db entry") method retrieves the nucleic acid sequence of the
|
135
|
-
# entry if any.
|
136
|
-
def DBGET.naseq(arg)
|
137
|
-
dbget("bget", "-f -n n #{arg}")
|
138
|
-
end
|
139
|
-
|
140
|
-
# aaseq("db entry") method retrieves the amino acid sequence of the
|
141
|
-
# entry if any.
|
142
|
-
def DBGET.aaseq(arg)
|
143
|
-
dbget("bget", "-f -n a #{arg}")
|
144
|
-
end
|
145
|
-
|
146
|
-
# binfo("db") method retrieves the database information
|
147
|
-
def DBGET.binfo(arg)
|
148
|
-
dbget("binfo", arg)
|
149
|
-
end
|
150
|
-
|
151
|
-
# blink("db entry") method retrieves the link information
|
152
|
-
def DBGET.blink(arg)
|
153
|
-
dbget("blink", arg)
|
154
|
-
end
|
155
|
-
|
156
|
-
# bman ("db entry") method shows the manual page
|
157
|
-
def DBGET.bman(arg)
|
158
|
-
dbget("bman", arg)
|
159
|
-
end
|
160
|
-
|
161
|
-
# bref("db entry") method retrieves the references and authors
|
162
|
-
def DBGET.bref(arg)
|
163
|
-
dbget("bref", arg)
|
164
|
-
end
|
165
|
-
|
166
|
-
# btab ("db entry") method retrives (and generates) the database alias table
|
167
|
-
def DBGET.btab(arg)
|
168
|
-
dbget("btab", arg)
|
169
|
-
end
|
170
|
-
|
171
|
-
# btit("db entry ..") method retrieves the entry definition
|
172
|
-
def DBGET.btit(arg)
|
173
|
-
dbget("btit", arg)
|
174
|
-
end
|
175
|
-
|
176
|
-
end
|
177
|
-
|
178
|
-
end # module Bio
|
179
|
-
|
180
|
-
|
181
|
-
if __FILE__ == $0
|
182
|
-
puts "### DBGET version"
|
183
|
-
p Bio::DBGET.version
|
184
|
-
puts "### DBGET.dbget('bfind', 'sce tyrosin kinase')"
|
185
|
-
puts Bio::DBGET.dbget('bfind', 'sce tyrosin kinase')
|
186
|
-
puts "### DBGET.bfind('sce tyrosin kinase')"
|
187
|
-
puts Bio::DBGET.bfind('sce tyrosin kinase')
|
188
|
-
puts "### DBGET.bget('sce:YDL028C')"
|
189
|
-
puts Bio::DBGET.bget('sce:YDL028C')
|
190
|
-
puts "### DBGET.binfo('dbget')"
|
191
|
-
puts Bio::DBGET.binfo('dbget')
|
192
|
-
end
|
193
|
-
|
194
|
-
|