bio 1.4.2 → 1.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +66 -0
- data/ChangeLog +989 -4524
- data/KNOWN_ISSUES.rdoc +67 -2
- data/README.rdoc +89 -23
- data/README_DEV.rdoc +93 -2
- data/RELEASE_NOTES.rdoc +167 -95
- data/Rakefile +199 -7
- data/bioruby.gemspec +27 -12
- data/bioruby.gemspec.erb +6 -3
- data/doc/ChangeLog-before-1.4.2 +5013 -0
- data/doc/RELEASE_NOTES-1.4.2.rdoc +132 -0
- data/doc/Tutorial.rd +21 -3
- data/doc/Tutorial.rd.html +20 -12
- data/etc/bioinformatics/seqdatabase.ini +13 -196
- data/gemfiles/Gemfile.travis-jruby1.8 +7 -0
- data/gemfiles/Gemfile.travis-jruby1.9 +10 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +7 -0
- data/gemfiles/Gemfile.travis-ruby1.9 +10 -0
- data/gemfiles/modify-Gemfile.rb +28 -0
- data/gemfiles/prepare-gemspec.rb +25 -0
- data/lib/bio/alignment.rb +1 -1
- data/lib/bio/appl/bl2seq/report.rb +3 -3
- data/lib/bio/appl/blast/ddbj.rb +0 -3
- data/lib/bio/appl/blast/format0.rb +4 -22
- data/lib/bio/appl/blast/genomenet.rb +33 -16
- data/lib/bio/appl/blast/ncbioptions.rb +8 -3
- data/lib/bio/appl/blast/remote.rb +6 -5
- data/lib/bio/appl/blast/report.rb +10 -6
- data/lib/bio/appl/blast/rpsblast.rb +3 -2
- data/lib/bio/appl/blast/wublast.rb +3 -3
- data/lib/bio/command.rb +118 -36
- data/lib/bio/data/na.rb +1 -1
- data/lib/bio/db/embl/embl.rb +74 -0
- data/lib/bio/db/embl/format_embl.rb +0 -4
- data/lib/bio/db/fasta.rb +57 -45
- data/lib/bio/db/fasta/defline.rb +1 -1
- data/lib/bio/db/fasta/format_fasta.rb +0 -4
- data/lib/bio/db/fasta/format_qual.rb +0 -5
- data/lib/bio/db/fastq/format_fastq.rb +0 -1
- data/lib/bio/db/genbank/format_genbank.rb +0 -4
- data/lib/bio/db/gff.rb +41 -12
- data/lib/bio/db/kegg/genes.rb +3 -3
- data/lib/bio/db/kegg/kgml.rb +465 -64
- data/lib/bio/db/newick.rb +0 -244
- data/lib/bio/db/pdb.rb +1 -4
- data/lib/bio/db/pdb/atom.rb +3 -2
- data/lib/bio/db/pdb/chain.rb +2 -3
- data/lib/bio/db/pdb/chemicalcomponent.rb +3 -2
- data/lib/bio/db/pdb/model.rb +2 -2
- data/lib/bio/db/pdb/pdb.rb +2 -1
- data/lib/bio/db/pdb/residue.rb +2 -2
- data/lib/bio/db/pdb/utils.rb +7 -4
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +52 -5
- data/lib/bio/feature.rb +2 -3
- data/lib/bio/io/flatfile/autodetection.rb +1 -1
- data/lib/bio/io/flatfile/buffer.rb +84 -0
- data/lib/bio/sequence.rb +6 -4
- data/lib/bio/sequence/aa.rb +3 -5
- data/lib/bio/sequence/adapter.rb +6 -6
- data/lib/bio/sequence/common.rb +3 -3
- data/lib/bio/sequence/compat.rb +2 -7
- data/lib/bio/sequence/dblink.rb +6 -5
- data/lib/bio/sequence/format.rb +0 -6
- data/lib/bio/sequence/format_raw.rb +0 -4
- data/lib/bio/sequence/generic.rb +3 -4
- data/lib/bio/sequence/na.rb +4 -6
- data/lib/bio/sequence/quality_score.rb +2 -0
- data/lib/bio/sequence/sequence_masker.rb +3 -0
- data/lib/bio/shell/core.rb +1 -0
- data/lib/bio/tree.rb +1 -2
- data/lib/bio/tree/output.rb +264 -0
- data/lib/bio/util/restriction_enzyme.rb +1 -3
- data/lib/bio/util/restriction_enzyme/analysis.rb +8 -5
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +4 -3
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +3 -2
- data/lib/bio/util/restriction_enzyme/dense_int_array.rb +3 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -3
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +3 -4
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +3 -4
- data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +3 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +3 -4
- data/lib/bio/version.rb +11 -2
- data/sample/seqdatabase.ini +210 -0
- data/test/bioruby_test_helper.rb +37 -12
- data/test/data/KEGG/test.kgml +37 -0
- data/test/data/command/echoarg2.bat +0 -0
- data/test/data/command/echoarg2.sh +4 -0
- data/test/functional/bio/test_command.rb +58 -28
- data/test/{functional → network}/bio/appl/blast/test_remote.rb +0 -0
- data/test/{functional → network}/bio/appl/test_blast.rb +0 -0
- data/test/{functional → network}/bio/appl/test_pts1.rb +0 -0
- data/test/{functional → network}/bio/io/test_ddbjrest.rb +0 -0
- data/test/{functional → network}/bio/io/test_ensembl.rb +0 -0
- data/test/{functional → network}/bio/io/test_pubmed.rb +0 -0
- data/test/{functional → network}/bio/io/test_soapwsdl.rb +0 -0
- data/test/{functional → network}/bio/io/test_togows.rb +0 -0
- data/test/network/bio/test_command.rb +35 -0
- data/test/runner.rb +16 -6
- data/test/unit/bio/appl/blast/test_report.rb +119 -0
- data/test/unit/bio/appl/blast/test_rpsblast.rb +1 -0
- data/test/unit/bio/data/test_na.rb +1 -1
- data/test/unit/bio/db/embl/test_embl.rb +2 -7
- data/test/unit/bio/db/embl/test_embl_rel89.rb +2 -7
- data/test/unit/bio/db/fasta/test_defline.rb +1 -1
- data/test/unit/bio/db/genbank/test_genpept.rb +1 -1
- data/test/unit/bio/db/kegg/test_drug.rb +1 -1
- data/test/unit/bio/db/kegg/test_genome.rb +1 -1
- data/test/unit/bio/db/kegg/test_glycan.rb +1 -1
- data/test/unit/bio/db/kegg/test_kgml.rb +1022 -0
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +2 -1
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +2 -0
- data/test/unit/bio/db/test_phyloxml.rb +54 -2
- data/test/unit/bio/db/test_phyloxml_writer.rb +15 -9
- data/test/unit/bio/db/test_soft.rb +1 -1
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +141 -0
- data/test/unit/bio/sequence/test_common.rb +36 -4
- data/test/unit/bio/sequence/test_na.rb +1 -1
- data/test/unit/bio/test_command.rb +9 -4
- data/test/unit/bio/test_sequence.rb +2 -2
- data/test/unit/bio/test_tree.rb +11 -11
- data/test/unit/bio/util/test_restriction_enzyme.rb +1 -1
- metadata +1428 -655
- data/rdoc.zsh +0 -8
data/lib/bio/feature.rb
CHANGED
@@ -5,12 +5,11 @@
|
|
5
5
|
# 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: feature.rb,v 1.13.2.1 2008/03/04 10:12:22 ngoto Exp $
|
9
|
-
|
10
|
-
require 'bio/location'
|
11
8
|
|
12
9
|
module Bio
|
13
10
|
|
11
|
+
autoload :Locations, 'bio/location' unless const_defined?(:Locations)
|
12
|
+
|
14
13
|
# = DESCRIPTION
|
15
14
|
# Container for the sequence annotation.
|
16
15
|
#
|
@@ -482,7 +482,7 @@ module Bio
|
|
482
482
|
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
483
483
|
|
484
484
|
fastq = RuleRegexp[ 'Bio::Fastq',
|
485
|
-
/^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))
|
485
|
+
/^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))+/ ],
|
486
486
|
|
487
487
|
fastaformat = RuleProc.new('Bio::FastaFormat',
|
488
488
|
'Bio::NBRF',
|
@@ -43,17 +43,101 @@ module Bio
|
|
43
43
|
# *arg is passed to File.open.
|
44
44
|
#
|
45
45
|
# Like File.open, a block can be accepted.
|
46
|
+
#
|
47
|
+
# Unlike File.open, the default is binary mode, unless text mode
|
48
|
+
# is explicity specified in mode.
|
46
49
|
def self.open_file(filename, *arg)
|
50
|
+
params = _parse_file_open_arg(*arg)
|
51
|
+
if params[:textmode] or /t/ =~ params[:fmode_string].to_s then
|
52
|
+
textmode = true
|
53
|
+
else
|
54
|
+
textmode = false
|
55
|
+
end
|
47
56
|
if block_given? then
|
48
57
|
File.open(filename, *arg) do |fobj|
|
58
|
+
fobj.binmode unless textmode
|
49
59
|
yield self.new(fobj, filename)
|
50
60
|
end
|
51
61
|
else
|
52
62
|
fobj = File.open(filename, *arg)
|
63
|
+
fobj.binmode unless textmode
|
53
64
|
self.new(fobj, filename)
|
54
65
|
end
|
55
66
|
end
|
56
67
|
|
68
|
+
# Parses file open mode parameter.
|
69
|
+
# mode must be an Integer or a String.
|
70
|
+
def self._parse_file_open_mode(mode)
|
71
|
+
modeint = nil
|
72
|
+
modestr = nil
|
73
|
+
begin
|
74
|
+
modeint = mode.to_int
|
75
|
+
rescue NoMethodError
|
76
|
+
end
|
77
|
+
unless modeint then
|
78
|
+
begin
|
79
|
+
modestr = mode.to_str
|
80
|
+
rescue NoMethodError
|
81
|
+
end
|
82
|
+
end
|
83
|
+
if modeint then
|
84
|
+
return { :fmode_integer => modeint }
|
85
|
+
end
|
86
|
+
if modestr then
|
87
|
+
fmode, ext_enc, int_enc = modestr.split(/\:/)
|
88
|
+
ret = { :fmode_string => fmode }
|
89
|
+
ret[:external_encoding] = ext_enc if ext_enc
|
90
|
+
ret[:internal_encoding] = int_enc if int_enc
|
91
|
+
return ret
|
92
|
+
end
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
private_class_method :_parse_file_open_mode
|
96
|
+
|
97
|
+
# Parses file open arguments
|
98
|
+
def self._parse_file_open_arg(*arg)
|
99
|
+
fmode_hash = nil
|
100
|
+
perm = nil
|
101
|
+
|
102
|
+
elem = arg.shift
|
103
|
+
if elem then
|
104
|
+
fmode_hash = _parse_file_open_mode(elem)
|
105
|
+
if fmode_hash then
|
106
|
+
elem = arg.shift
|
107
|
+
if elem then
|
108
|
+
begin
|
109
|
+
perm = elem.to_int
|
110
|
+
rescue NoMethodError
|
111
|
+
end
|
112
|
+
end
|
113
|
+
elem = arg.shift if perm
|
114
|
+
end
|
115
|
+
end
|
116
|
+
if elem.kind_of?(Hash) then
|
117
|
+
opt = elem.dup
|
118
|
+
else
|
119
|
+
opt = {}
|
120
|
+
end
|
121
|
+
if elem = opt[:mode] then
|
122
|
+
fmode_hash = _parse_file_open_mode(elem)
|
123
|
+
end
|
124
|
+
fmode_hash ||= {}
|
125
|
+
fmode_hash[:perm] = perm if perm
|
126
|
+
unless enc = opt[:encoding].to_s.empty? then
|
127
|
+
ext_enc, int_enc = enc.split(/\:/)
|
128
|
+
fmode_hash[:external_encoding] = ext_enc if ext_enc
|
129
|
+
fmode_hash[:internal_encoding] = int_enc if int_enc
|
130
|
+
end
|
131
|
+
|
132
|
+
[ :external_encoding, :internal_encoding,
|
133
|
+
:textmode, :binmode, :autoclose, :perm ].each do |key|
|
134
|
+
val = opt[key]
|
135
|
+
fmode_hash[key] = val if val
|
136
|
+
end
|
137
|
+
fmode_hash
|
138
|
+
end
|
139
|
+
private_class_method :_parse_file_open_arg
|
140
|
+
|
57
141
|
# Creates a new input stream wrapper from URI specified as _uri_.
|
58
142
|
# by using OpenURI.open_uri or URI#open.
|
59
143
|
# _uri_ must be a String or URI object.
|
data/lib/bio/sequence.rb
CHANGED
@@ -9,10 +9,6 @@
|
|
9
9
|
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
10
10
|
# License:: The Ruby License
|
11
11
|
#
|
12
|
-
# $Id:$
|
13
|
-
#
|
14
|
-
|
15
|
-
require 'bio/sequence/compat'
|
16
12
|
|
17
13
|
module Bio
|
18
14
|
|
@@ -74,6 +70,12 @@ class Sequence
|
|
74
70
|
autoload :QualityScore, 'bio/sequence/quality_score'
|
75
71
|
autoload :SequenceMasker, 'bio/sequence/sequence_masker'
|
76
72
|
|
73
|
+
#--
|
74
|
+
# require "bio/sequence/compat.rb" here to avoid circular require and
|
75
|
+
# possible superclass mismatch of AA class
|
76
|
+
#++
|
77
|
+
require 'bio/sequence/compat'
|
78
|
+
|
77
79
|
include Format
|
78
80
|
include SequenceMasker
|
79
81
|
|
data/lib/bio/sequence/aa.rb
CHANGED
@@ -6,14 +6,12 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: aa.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
|
10
|
-
#
|
11
|
-
|
12
|
-
require 'bio/sequence/common'
|
13
9
|
|
14
10
|
module Bio
|
15
11
|
|
16
|
-
autoload :AminoAcid, 'bio/data/aa'
|
12
|
+
autoload :AminoAcid, 'bio/data/aa' unless const_defined?(:AminoAcid)
|
13
|
+
|
14
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
17
15
|
|
18
16
|
class Sequence
|
19
17
|
|
data/lib/bio/sequence/adapter.rb
CHANGED
@@ -5,10 +5,10 @@
|
|
5
5
|
# Naohisa Goto <ng@bioruby.org>,
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
|
-
#
|
10
8
|
|
11
|
-
|
9
|
+
module Bio
|
10
|
+
|
11
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
12
12
|
|
13
13
|
# Internal use only. Normal users should not use this module.
|
14
14
|
#
|
@@ -17,7 +17,7 @@ require 'bio/sequence'
|
|
17
17
|
#
|
18
18
|
# This module is used by using "extend", not "include".
|
19
19
|
#
|
20
|
-
module
|
20
|
+
module Sequence::Adapter
|
21
21
|
|
22
22
|
autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
|
23
23
|
autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
|
@@ -107,6 +107,6 @@ __END_OF_DEF__
|
|
107
107
|
true
|
108
108
|
end
|
109
109
|
|
110
|
-
end #module
|
111
|
-
|
110
|
+
end #module Sequence::Adapter
|
112
111
|
|
112
|
+
end #module Bio
|
data/lib/bio/sequence/common.rb
CHANGED
@@ -6,12 +6,12 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id:$
|
10
|
-
#
|
11
9
|
|
12
10
|
module Bio
|
13
11
|
|
14
|
-
autoload :Locations, 'bio/location'
|
12
|
+
autoload :Locations, 'bio/location' unless const_defined?(:Locations)
|
13
|
+
|
14
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
15
15
|
|
16
16
|
class Sequence
|
17
17
|
|
data/lib/bio/sequence/compat.rb
CHANGED
@@ -6,17 +6,12 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id:$
|
10
|
-
#
|
11
|
-
|
12
9
|
|
13
10
|
module Bio
|
14
11
|
|
15
|
-
|
12
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
16
13
|
|
17
|
-
|
18
|
-
autoload :NA, 'bio/sequence/na'
|
19
|
-
autoload :AA, 'bio/sequence/aa'
|
14
|
+
class Sequence
|
20
15
|
|
21
16
|
# Return sequence as
|
22
17
|
# String[http://corelib.rubyonrails.org/classes/String.html].
|
data/lib/bio/sequence/dblink.rb
CHANGED
@@ -5,15 +5,15 @@
|
|
5
5
|
# Naohisa Goto <ng@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: dblink.rb,v 1.1.2.1 2008/06/17 15:44:22 ngoto Exp $
|
9
|
-
#
|
10
8
|
|
11
|
-
|
9
|
+
module Bio
|
10
|
+
|
11
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
12
12
|
|
13
13
|
# Bio::Sequence::DBLink stores IDs with the database name.
|
14
14
|
# Its main purpose is to store database cross-reference information
|
15
15
|
# for a sequence entry.
|
16
|
-
class
|
16
|
+
class Sequence::DBLink
|
17
17
|
|
18
18
|
# creates a new DBLink object
|
19
19
|
def initialize(database, primary_id, *secondary_ids)
|
@@ -49,6 +49,7 @@ class Bio::Sequence::DBLink
|
|
49
49
|
self.new(*(str.split(/\s*\;\s*/)))
|
50
50
|
end
|
51
51
|
|
52
|
-
end #class
|
52
|
+
end #class Sequence::DBLink
|
53
53
|
|
54
|
+
end #module Bio
|
54
55
|
|
data/lib/bio/sequence/format.rb
CHANGED
data/lib/bio/sequence/generic.rb
CHANGED
@@ -5,12 +5,11 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: generic.rb,v 1.5 2007/04/05 23:35:41 trevor Exp $
|
9
|
-
#
|
10
|
-
|
11
|
-
require 'bio/sequence/common'
|
12
8
|
|
13
9
|
module Bio
|
10
|
+
|
11
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
12
|
+
|
14
13
|
class Sequence
|
15
14
|
|
16
15
|
class Generic < String #:nodoc:
|
data/lib/bio/sequence/na.rb
CHANGED
@@ -6,15 +6,13 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: na.rb,v 1.7 2007/04/23 16:43:51 trevor Exp $
|
10
|
-
#
|
11
|
-
|
12
|
-
require 'bio/sequence/common'
|
13
9
|
|
14
10
|
module Bio
|
15
11
|
|
16
|
-
autoload :NucleicAcid, 'bio/data/na'
|
17
|
-
autoload :CodonTable, 'bio/data/codontable'
|
12
|
+
autoload :NucleicAcid, 'bio/data/na' unless const_defined?(:NucleicAcid)
|
13
|
+
autoload :CodonTable, 'bio/data/codontable' unless const_defined?(:CodonTable)
|
14
|
+
|
15
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
18
16
|
|
19
17
|
class Sequence
|
20
18
|
|
data/lib/bio/shell/core.rb
CHANGED
data/lib/bio/tree.rb
CHANGED
@@ -0,0 +1,264 @@
|
|
1
|
+
#
|
2
|
+
# = bio/tree/output.rb - Phylogenetic tree formatter
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2004-2006
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
# This file contains formatter of Newick, NHX and Phylip distance matrix.
|
12
|
+
#
|
13
|
+
# == References
|
14
|
+
#
|
15
|
+
# * http://evolution.genetics.washington.edu/phylip/newick_doc.html
|
16
|
+
# * http://www.phylosoft.org/forester/NHX.html
|
17
|
+
#
|
18
|
+
|
19
|
+
module Bio
|
20
|
+
class Tree
|
21
|
+
|
22
|
+
#---
|
23
|
+
# newick output
|
24
|
+
#+++
|
25
|
+
|
26
|
+
# default options
|
27
|
+
DEFAULT_OPTIONS =
|
28
|
+
{ :indent => ' ' }
|
29
|
+
|
30
|
+
def __get_option(key, options)
|
31
|
+
if (r = options[key]) != nil then
|
32
|
+
r
|
33
|
+
elsif @options && (r = @options[key]) != nil then
|
34
|
+
r
|
35
|
+
else
|
36
|
+
DEFAULT_OPTIONS[key]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
private :__get_option
|
40
|
+
|
41
|
+
|
42
|
+
# formats Newick label (unquoted_label or quoted_label)
|
43
|
+
def __to_newick_format_label(str, options)
|
44
|
+
if __get_option(:parser, options) == :naive then
|
45
|
+
return str.to_s
|
46
|
+
end
|
47
|
+
str = str.to_s
|
48
|
+
if /([\(\)\,\:\[\]\_\'\x00-\x1f\x7f])/ =~ str then
|
49
|
+
# quoted_label
|
50
|
+
return "\'" + str.gsub(/\'/, "\'\'") + "\'"
|
51
|
+
end
|
52
|
+
# unquoted_label
|
53
|
+
return str.gsub(/ /, '_')
|
54
|
+
end
|
55
|
+
private :__to_newick_format_label
|
56
|
+
|
57
|
+
# formats leaf
|
58
|
+
def __to_newick_format_leaf(node, edge, options)
|
59
|
+
|
60
|
+
label = __to_newick_format_label(get_node_name(node), options)
|
61
|
+
|
62
|
+
dist = get_edge_distance_string(edge)
|
63
|
+
|
64
|
+
bs = get_node_bootstrap_string(node)
|
65
|
+
|
66
|
+
if __get_option(:branch_length_style, options) == :disabled
|
67
|
+
dist = nil
|
68
|
+
end
|
69
|
+
|
70
|
+
case __get_option(:bootstrap_style, options)
|
71
|
+
when :disabled
|
72
|
+
label + (dist ? ":#{dist}" : '')
|
73
|
+
when :molphy
|
74
|
+
label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
|
75
|
+
when :traditional
|
76
|
+
label + (bs ? bs : '') + (dist ? ":#{dist}" : '')
|
77
|
+
else
|
78
|
+
# default: same as molphy style
|
79
|
+
label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
|
80
|
+
end
|
81
|
+
end
|
82
|
+
private :__to_newick_format_leaf
|
83
|
+
|
84
|
+
# formats leaf for NHX
|
85
|
+
def __to_newick_format_leaf_NHX(node, edge, options)
|
86
|
+
|
87
|
+
label = __to_newick_format_label(get_node_name(node), options)
|
88
|
+
|
89
|
+
dist = get_edge_distance_string(edge)
|
90
|
+
|
91
|
+
bs = get_node_bootstrap_string(node)
|
92
|
+
|
93
|
+
if __get_option(:branch_length_style, options) == :disabled
|
94
|
+
dist = nil
|
95
|
+
end
|
96
|
+
|
97
|
+
nhx = {}
|
98
|
+
|
99
|
+
# bootstrap
|
100
|
+
nhx[:B] = bs if bs and !(bs.empty?)
|
101
|
+
# EC number
|
102
|
+
nhx[:E] = node.ec_number if node.instance_eval {
|
103
|
+
defined?(@ec_number) && self.ec_number
|
104
|
+
}
|
105
|
+
# scientific name
|
106
|
+
nhx[:S] = node.scientific_name if node.instance_eval {
|
107
|
+
defined?(@scientific_name) && self.scientific_name
|
108
|
+
}
|
109
|
+
# taxonomy id
|
110
|
+
nhx[:T] = node.taxonomy_id if node.instance_eval {
|
111
|
+
defined?(@taxonomy_id) && self.taxonomy_id
|
112
|
+
}
|
113
|
+
|
114
|
+
# :D (gene duplication or speciation)
|
115
|
+
if node.instance_eval { defined?(@events) && !(self.events.empty?) } then
|
116
|
+
if node.events.include?(:gene_duplication)
|
117
|
+
nhx[:D] = 'Y'
|
118
|
+
elsif node.events.include?(:speciation)
|
119
|
+
nhx[:D] = 'N'
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# log likelihood
|
124
|
+
nhx[:L] = edge.log_likelihood if edge.instance_eval {
|
125
|
+
defined?(@log_likelihood) && self.log_likelihood }
|
126
|
+
# width
|
127
|
+
nhx[:W] = edge.width if edge.instance_eval {
|
128
|
+
defined?(@width) && self.width }
|
129
|
+
|
130
|
+
# merges other parameters
|
131
|
+
flag = node.instance_eval { defined? @nhx_parameters }
|
132
|
+
nhx.merge!(node.nhx_parameters) if flag
|
133
|
+
flag = edge.instance_eval { defined? @nhx_parameters }
|
134
|
+
nhx.merge!(edge.nhx_parameters) if flag
|
135
|
+
|
136
|
+
nhx_string = nhx.keys.sort{ |a,b| a.to_s <=> b.to_s }.collect do |key|
|
137
|
+
"#{key.to_s}=#{nhx[key].to_s}"
|
138
|
+
end.join(':')
|
139
|
+
nhx_string = "[&&NHX:" + nhx_string + "]" unless nhx_string.empty?
|
140
|
+
|
141
|
+
label + (dist ? ":#{dist}" : '') + nhx_string
|
142
|
+
end
|
143
|
+
private :__to_newick_format_leaf_NHX
|
144
|
+
|
145
|
+
#
|
146
|
+
def __to_newick(parents, source, depth, format_leaf,
|
147
|
+
options, &block)
|
148
|
+
result = []
|
149
|
+
if indent_string = __get_option(:indent, options) then
|
150
|
+
indent0 = indent_string * depth
|
151
|
+
indent = indent_string * (depth + 1)
|
152
|
+
newline = "\n"
|
153
|
+
else
|
154
|
+
indent0 = indent = newline = ''
|
155
|
+
end
|
156
|
+
out_edges = self.out_edges(source)
|
157
|
+
if block_given? then
|
158
|
+
out_edges.sort! { |edge1, edge2| yield(edge1[1], edge2[1]) }
|
159
|
+
else
|
160
|
+
out_edges.sort! do |edge1, edge2|
|
161
|
+
o1 = edge1[1].order_number
|
162
|
+
o2 = edge2[1].order_number
|
163
|
+
if o1 and o2 then
|
164
|
+
o1 <=> o2
|
165
|
+
else
|
166
|
+
edge1[1].name.to_s <=> edge2[1].name.to_s
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
out_edges.each do |src, tgt, edge|
|
171
|
+
if parents.include?(tgt) then
|
172
|
+
;;
|
173
|
+
elsif self.out_degree(tgt) == 1 then
|
174
|
+
result << indent + __send__(format_leaf, tgt, edge, options)
|
175
|
+
else
|
176
|
+
result <<
|
177
|
+
__to_newick([ src ].concat(parents), tgt, depth + 1,
|
178
|
+
format_leaf, options) +
|
179
|
+
__send__(format_leaf, tgt, edge, options)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
indent0 + "(" + newline + result.join(',' + newline) +
|
183
|
+
(result.size > 0 ? newline : '') + indent0 + ')'
|
184
|
+
end
|
185
|
+
private :__to_newick
|
186
|
+
|
187
|
+
# Returns a newick formatted string.
|
188
|
+
# If block is given, the order of the node is sorted
|
189
|
+
# (as the same manner as Enumerable#sort).
|
190
|
+
#
|
191
|
+
# Available options:
|
192
|
+
# <tt>:indent</tt>::
|
193
|
+
# indent string; set false to disable (default: ' ')
|
194
|
+
# <tt>:bootstrap_style</tt>::
|
195
|
+
# <tt>:disabled</tt> disables bootstrap representations.
|
196
|
+
# <tt>:traditional</tt> for traditional style.
|
197
|
+
# <tt>:molphy</tt> for Molphy style (default).
|
198
|
+
def output_newick(options = {}, &block) #:yields: node1, node2
|
199
|
+
root = @root
|
200
|
+
root ||= self.nodes.first
|
201
|
+
return '();' unless root
|
202
|
+
__to_newick([], root, 0, :__to_newick_format_leaf, options, &block) +
|
203
|
+
__to_newick_format_leaf(root, Edge.new, options) +
|
204
|
+
";\n"
|
205
|
+
end
|
206
|
+
|
207
|
+
alias newick output_newick
|
208
|
+
|
209
|
+
|
210
|
+
# Returns a NHX (New Hampshire eXtended) formatted string.
|
211
|
+
# If block is given, the order of the node is sorted
|
212
|
+
# (as the same manner as Enumerable#sort).
|
213
|
+
#
|
214
|
+
# Available options:
|
215
|
+
# <tt>:indent</tt>::
|
216
|
+
# indent string; set false to disable (default: ' ')
|
217
|
+
#
|
218
|
+
def output_nhx(options = {}, &block) #:yields: node1, node2
|
219
|
+
root = @root
|
220
|
+
root ||= self.nodes.first
|
221
|
+
return '();' unless root
|
222
|
+
__to_newick([], root, 0,
|
223
|
+
:__to_newick_format_leaf_NHX, options, &block) +
|
224
|
+
__to_newick_format_leaf_NHX(root, Edge.new, options) +
|
225
|
+
";\n"
|
226
|
+
end
|
227
|
+
|
228
|
+
# Returns formatted text (or something) of the tree
|
229
|
+
# Currently supported format is: :newick, :nhx
|
230
|
+
def output(format, *arg, &block)
|
231
|
+
case format
|
232
|
+
when :newick
|
233
|
+
output_newick(*arg, &block)
|
234
|
+
when :nhx
|
235
|
+
output_nhx(*arg, &block)
|
236
|
+
when :phylip_distance_matrix
|
237
|
+
output_phylip_distance_matrix(*arg, &block)
|
238
|
+
else
|
239
|
+
raise 'Unknown format'
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
#---
|
244
|
+
# This method isn't suitable to written in this file?
|
245
|
+
#+++
|
246
|
+
|
247
|
+
# Generates phylip-style distance matrix as a string.
|
248
|
+
# if nodes is not given, all leaves in the tree are used.
|
249
|
+
# If the names of some of the given (or default) nodes
|
250
|
+
# are not defined or are empty, the names are automatically generated.
|
251
|
+
def output_phylip_distance_matrix(nodes = nil, options = {})
|
252
|
+
nodes = self.leaves unless nodes
|
253
|
+
names = nodes.collect do |x|
|
254
|
+
y = get_node_name(x)
|
255
|
+
y = sprintf("%x", x.__id__.abs) if y.empty?
|
256
|
+
y
|
257
|
+
end
|
258
|
+
m = self.distance_matrix(nodes)
|
259
|
+
Bio::Phylip::DistanceMatrix.generate(m, names, options)
|
260
|
+
end
|
261
|
+
|
262
|
+
end #class Tree
|
263
|
+
|
264
|
+
end #module Bio
|