bio 1.4.2 → 1.4.3
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +66 -0
- data/ChangeLog +989 -4524
- data/KNOWN_ISSUES.rdoc +67 -2
- data/README.rdoc +89 -23
- data/README_DEV.rdoc +93 -2
- data/RELEASE_NOTES.rdoc +167 -95
- data/Rakefile +199 -7
- data/bioruby.gemspec +27 -12
- data/bioruby.gemspec.erb +6 -3
- data/doc/ChangeLog-before-1.4.2 +5013 -0
- data/doc/RELEASE_NOTES-1.4.2.rdoc +132 -0
- data/doc/Tutorial.rd +21 -3
- data/doc/Tutorial.rd.html +20 -12
- data/etc/bioinformatics/seqdatabase.ini +13 -196
- data/gemfiles/Gemfile.travis-jruby1.8 +7 -0
- data/gemfiles/Gemfile.travis-jruby1.9 +10 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +7 -0
- data/gemfiles/Gemfile.travis-ruby1.9 +10 -0
- data/gemfiles/modify-Gemfile.rb +28 -0
- data/gemfiles/prepare-gemspec.rb +25 -0
- data/lib/bio/alignment.rb +1 -1
- data/lib/bio/appl/bl2seq/report.rb +3 -3
- data/lib/bio/appl/blast/ddbj.rb +0 -3
- data/lib/bio/appl/blast/format0.rb +4 -22
- data/lib/bio/appl/blast/genomenet.rb +33 -16
- data/lib/bio/appl/blast/ncbioptions.rb +8 -3
- data/lib/bio/appl/blast/remote.rb +6 -5
- data/lib/bio/appl/blast/report.rb +10 -6
- data/lib/bio/appl/blast/rpsblast.rb +3 -2
- data/lib/bio/appl/blast/wublast.rb +3 -3
- data/lib/bio/command.rb +118 -36
- data/lib/bio/data/na.rb +1 -1
- data/lib/bio/db/embl/embl.rb +74 -0
- data/lib/bio/db/embl/format_embl.rb +0 -4
- data/lib/bio/db/fasta.rb +57 -45
- data/lib/bio/db/fasta/defline.rb +1 -1
- data/lib/bio/db/fasta/format_fasta.rb +0 -4
- data/lib/bio/db/fasta/format_qual.rb +0 -5
- data/lib/bio/db/fastq/format_fastq.rb +0 -1
- data/lib/bio/db/genbank/format_genbank.rb +0 -4
- data/lib/bio/db/gff.rb +41 -12
- data/lib/bio/db/kegg/genes.rb +3 -3
- data/lib/bio/db/kegg/kgml.rb +465 -64
- data/lib/bio/db/newick.rb +0 -244
- data/lib/bio/db/pdb.rb +1 -4
- data/lib/bio/db/pdb/atom.rb +3 -2
- data/lib/bio/db/pdb/chain.rb +2 -3
- data/lib/bio/db/pdb/chemicalcomponent.rb +3 -2
- data/lib/bio/db/pdb/model.rb +2 -2
- data/lib/bio/db/pdb/pdb.rb +2 -1
- data/lib/bio/db/pdb/residue.rb +2 -2
- data/lib/bio/db/pdb/utils.rb +7 -4
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +52 -5
- data/lib/bio/feature.rb +2 -3
- data/lib/bio/io/flatfile/autodetection.rb +1 -1
- data/lib/bio/io/flatfile/buffer.rb +84 -0
- data/lib/bio/sequence.rb +6 -4
- data/lib/bio/sequence/aa.rb +3 -5
- data/lib/bio/sequence/adapter.rb +6 -6
- data/lib/bio/sequence/common.rb +3 -3
- data/lib/bio/sequence/compat.rb +2 -7
- data/lib/bio/sequence/dblink.rb +6 -5
- data/lib/bio/sequence/format.rb +0 -6
- data/lib/bio/sequence/format_raw.rb +0 -4
- data/lib/bio/sequence/generic.rb +3 -4
- data/lib/bio/sequence/na.rb +4 -6
- data/lib/bio/sequence/quality_score.rb +2 -0
- data/lib/bio/sequence/sequence_masker.rb +3 -0
- data/lib/bio/shell/core.rb +1 -0
- data/lib/bio/tree.rb +1 -2
- data/lib/bio/tree/output.rb +264 -0
- data/lib/bio/util/restriction_enzyme.rb +1 -3
- data/lib/bio/util/restriction_enzyme/analysis.rb +8 -5
- data/lib/bio/util/restriction_enzyme/analysis_basic.rb +4 -3
- data/lib/bio/util/restriction_enzyme/cut_symbol.rb +3 -2
- data/lib/bio/util/restriction_enzyme/dense_int_array.rb +3 -0
- data/lib/bio/util/restriction_enzyme/double_stranded.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/aligned_strands.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_location_pair_in_enzyme_notation.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations.rb +3 -4
- data/lib/bio/util/restriction_enzyme/double_stranded/cut_locations_in_enzyme_notation.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/cut_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/cut_ranges.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/horizontal_cut_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragment.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/sequence_range/fragments.rb +3 -4
- data/lib/bio/util/restriction_enzyme/range/vertical_cut_range.rb +3 -4
- data/lib/bio/util/restriction_enzyme/single_strand.rb +3 -3
- data/lib/bio/util/restriction_enzyme/single_strand/cut_locations_in_enzyme_notation.rb +3 -4
- data/lib/bio/util/restriction_enzyme/single_strand_complement.rb +3 -4
- data/lib/bio/util/restriction_enzyme/sorted_num_array.rb +3 -0
- data/lib/bio/util/restriction_enzyme/string_formatting.rb +3 -4
- data/lib/bio/version.rb +11 -2
- data/sample/seqdatabase.ini +210 -0
- data/test/bioruby_test_helper.rb +37 -12
- data/test/data/KEGG/test.kgml +37 -0
- data/test/data/command/echoarg2.bat +0 -0
- data/test/data/command/echoarg2.sh +4 -0
- data/test/functional/bio/test_command.rb +58 -28
- data/test/{functional → network}/bio/appl/blast/test_remote.rb +0 -0
- data/test/{functional → network}/bio/appl/test_blast.rb +0 -0
- data/test/{functional → network}/bio/appl/test_pts1.rb +0 -0
- data/test/{functional → network}/bio/io/test_ddbjrest.rb +0 -0
- data/test/{functional → network}/bio/io/test_ensembl.rb +0 -0
- data/test/{functional → network}/bio/io/test_pubmed.rb +0 -0
- data/test/{functional → network}/bio/io/test_soapwsdl.rb +0 -0
- data/test/{functional → network}/bio/io/test_togows.rb +0 -0
- data/test/network/bio/test_command.rb +35 -0
- data/test/runner.rb +16 -6
- data/test/unit/bio/appl/blast/test_report.rb +119 -0
- data/test/unit/bio/appl/blast/test_rpsblast.rb +1 -0
- data/test/unit/bio/data/test_na.rb +1 -1
- data/test/unit/bio/db/embl/test_embl.rb +2 -7
- data/test/unit/bio/db/embl/test_embl_rel89.rb +2 -7
- data/test/unit/bio/db/fasta/test_defline.rb +1 -1
- data/test/unit/bio/db/genbank/test_genpept.rb +1 -1
- data/test/unit/bio/db/kegg/test_drug.rb +1 -1
- data/test/unit/bio/db/kegg/test_genome.rb +1 -1
- data/test/unit/bio/db/kegg/test_glycan.rb +1 -1
- data/test/unit/bio/db/kegg/test_kgml.rb +1022 -0
- data/test/unit/bio/db/sanger_chromatogram/test_abif.rb +2 -1
- data/test/unit/bio/db/sanger_chromatogram/test_scf.rb +4 -2
- data/test/unit/bio/db/test_newick.rb +2 -0
- data/test/unit/bio/db/test_phyloxml.rb +54 -2
- data/test/unit/bio/db/test_phyloxml_writer.rb +15 -9
- data/test/unit/bio/db/test_soft.rb +1 -1
- data/test/unit/bio/io/flatfile/test_autodetection.rb +6 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +141 -0
- data/test/unit/bio/sequence/test_common.rb +36 -4
- data/test/unit/bio/sequence/test_na.rb +1 -1
- data/test/unit/bio/test_command.rb +9 -4
- data/test/unit/bio/test_sequence.rb +2 -2
- data/test/unit/bio/test_tree.rb +11 -11
- data/test/unit/bio/util/test_restriction_enzyme.rb +1 -1
- metadata +1428 -655
- data/rdoc.zsh +0 -8
data/lib/bio/feature.rb
CHANGED
@@ -5,12 +5,11 @@
|
|
5
5
|
# 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: feature.rb,v 1.13.2.1 2008/03/04 10:12:22 ngoto Exp $
|
9
|
-
|
10
|
-
require 'bio/location'
|
11
8
|
|
12
9
|
module Bio
|
13
10
|
|
11
|
+
autoload :Locations, 'bio/location' unless const_defined?(:Locations)
|
12
|
+
|
14
13
|
# = DESCRIPTION
|
15
14
|
# Container for the sequence annotation.
|
16
15
|
#
|
@@ -482,7 +482,7 @@ module Bio
|
|
482
482
|
/^seq1 \= .*\, \d+ bp(\r|\r?\n)seq2 \= .*\, \d+ bp(\r|\r?\n)/ ],
|
483
483
|
|
484
484
|
fastq = RuleRegexp[ 'Bio::Fastq',
|
485
|
-
/^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))
|
485
|
+
/^\@.+(?:\r|\r?\n)(?:[^\@\+].*(?:\r|\r?\n))+/ ],
|
486
486
|
|
487
487
|
fastaformat = RuleProc.new('Bio::FastaFormat',
|
488
488
|
'Bio::NBRF',
|
@@ -43,17 +43,101 @@ module Bio
|
|
43
43
|
# *arg is passed to File.open.
|
44
44
|
#
|
45
45
|
# Like File.open, a block can be accepted.
|
46
|
+
#
|
47
|
+
# Unlike File.open, the default is binary mode, unless text mode
|
48
|
+
# is explicity specified in mode.
|
46
49
|
def self.open_file(filename, *arg)
|
50
|
+
params = _parse_file_open_arg(*arg)
|
51
|
+
if params[:textmode] or /t/ =~ params[:fmode_string].to_s then
|
52
|
+
textmode = true
|
53
|
+
else
|
54
|
+
textmode = false
|
55
|
+
end
|
47
56
|
if block_given? then
|
48
57
|
File.open(filename, *arg) do |fobj|
|
58
|
+
fobj.binmode unless textmode
|
49
59
|
yield self.new(fobj, filename)
|
50
60
|
end
|
51
61
|
else
|
52
62
|
fobj = File.open(filename, *arg)
|
63
|
+
fobj.binmode unless textmode
|
53
64
|
self.new(fobj, filename)
|
54
65
|
end
|
55
66
|
end
|
56
67
|
|
68
|
+
# Parses file open mode parameter.
|
69
|
+
# mode must be an Integer or a String.
|
70
|
+
def self._parse_file_open_mode(mode)
|
71
|
+
modeint = nil
|
72
|
+
modestr = nil
|
73
|
+
begin
|
74
|
+
modeint = mode.to_int
|
75
|
+
rescue NoMethodError
|
76
|
+
end
|
77
|
+
unless modeint then
|
78
|
+
begin
|
79
|
+
modestr = mode.to_str
|
80
|
+
rescue NoMethodError
|
81
|
+
end
|
82
|
+
end
|
83
|
+
if modeint then
|
84
|
+
return { :fmode_integer => modeint }
|
85
|
+
end
|
86
|
+
if modestr then
|
87
|
+
fmode, ext_enc, int_enc = modestr.split(/\:/)
|
88
|
+
ret = { :fmode_string => fmode }
|
89
|
+
ret[:external_encoding] = ext_enc if ext_enc
|
90
|
+
ret[:internal_encoding] = int_enc if int_enc
|
91
|
+
return ret
|
92
|
+
end
|
93
|
+
nil
|
94
|
+
end
|
95
|
+
private_class_method :_parse_file_open_mode
|
96
|
+
|
97
|
+
# Parses file open arguments
|
98
|
+
def self._parse_file_open_arg(*arg)
|
99
|
+
fmode_hash = nil
|
100
|
+
perm = nil
|
101
|
+
|
102
|
+
elem = arg.shift
|
103
|
+
if elem then
|
104
|
+
fmode_hash = _parse_file_open_mode(elem)
|
105
|
+
if fmode_hash then
|
106
|
+
elem = arg.shift
|
107
|
+
if elem then
|
108
|
+
begin
|
109
|
+
perm = elem.to_int
|
110
|
+
rescue NoMethodError
|
111
|
+
end
|
112
|
+
end
|
113
|
+
elem = arg.shift if perm
|
114
|
+
end
|
115
|
+
end
|
116
|
+
if elem.kind_of?(Hash) then
|
117
|
+
opt = elem.dup
|
118
|
+
else
|
119
|
+
opt = {}
|
120
|
+
end
|
121
|
+
if elem = opt[:mode] then
|
122
|
+
fmode_hash = _parse_file_open_mode(elem)
|
123
|
+
end
|
124
|
+
fmode_hash ||= {}
|
125
|
+
fmode_hash[:perm] = perm if perm
|
126
|
+
unless enc = opt[:encoding].to_s.empty? then
|
127
|
+
ext_enc, int_enc = enc.split(/\:/)
|
128
|
+
fmode_hash[:external_encoding] = ext_enc if ext_enc
|
129
|
+
fmode_hash[:internal_encoding] = int_enc if int_enc
|
130
|
+
end
|
131
|
+
|
132
|
+
[ :external_encoding, :internal_encoding,
|
133
|
+
:textmode, :binmode, :autoclose, :perm ].each do |key|
|
134
|
+
val = opt[key]
|
135
|
+
fmode_hash[key] = val if val
|
136
|
+
end
|
137
|
+
fmode_hash
|
138
|
+
end
|
139
|
+
private_class_method :_parse_file_open_arg
|
140
|
+
|
57
141
|
# Creates a new input stream wrapper from URI specified as _uri_.
|
58
142
|
# by using OpenURI.open_uri or URI#open.
|
59
143
|
# _uri_ must be a String or URI object.
|
data/lib/bio/sequence.rb
CHANGED
@@ -9,10 +9,6 @@
|
|
9
9
|
# Jan Aerts <jan.aerts@bbsrc.ac.uk>
|
10
10
|
# License:: The Ruby License
|
11
11
|
#
|
12
|
-
# $Id:$
|
13
|
-
#
|
14
|
-
|
15
|
-
require 'bio/sequence/compat'
|
16
12
|
|
17
13
|
module Bio
|
18
14
|
|
@@ -74,6 +70,12 @@ class Sequence
|
|
74
70
|
autoload :QualityScore, 'bio/sequence/quality_score'
|
75
71
|
autoload :SequenceMasker, 'bio/sequence/sequence_masker'
|
76
72
|
|
73
|
+
#--
|
74
|
+
# require "bio/sequence/compat.rb" here to avoid circular require and
|
75
|
+
# possible superclass mismatch of AA class
|
76
|
+
#++
|
77
|
+
require 'bio/sequence/compat'
|
78
|
+
|
77
79
|
include Format
|
78
80
|
include SequenceMasker
|
79
81
|
|
data/lib/bio/sequence/aa.rb
CHANGED
@@ -6,14 +6,12 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: aa.rb,v 1.4 2007/04/05 23:35:41 trevor Exp $
|
10
|
-
#
|
11
|
-
|
12
|
-
require 'bio/sequence/common'
|
13
9
|
|
14
10
|
module Bio
|
15
11
|
|
16
|
-
autoload :AminoAcid, 'bio/data/aa'
|
12
|
+
autoload :AminoAcid, 'bio/data/aa' unless const_defined?(:AminoAcid)
|
13
|
+
|
14
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
17
15
|
|
18
16
|
class Sequence
|
19
17
|
|
data/lib/bio/sequence/adapter.rb
CHANGED
@@ -5,10 +5,10 @@
|
|
5
5
|
# Naohisa Goto <ng@bioruby.org>,
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
|
-
#
|
10
8
|
|
11
|
-
|
9
|
+
module Bio
|
10
|
+
|
11
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
12
12
|
|
13
13
|
# Internal use only. Normal users should not use this module.
|
14
14
|
#
|
@@ -17,7 +17,7 @@ require 'bio/sequence'
|
|
17
17
|
#
|
18
18
|
# This module is used by using "extend", not "include".
|
19
19
|
#
|
20
|
-
module
|
20
|
+
module Sequence::Adapter
|
21
21
|
|
22
22
|
autoload :GenBank, 'bio/db/genbank/genbank_to_biosequence'
|
23
23
|
autoload :EMBL, 'bio/db/embl/embl_to_biosequence'
|
@@ -107,6 +107,6 @@ __END_OF_DEF__
|
|
107
107
|
true
|
108
108
|
end
|
109
109
|
|
110
|
-
end #module
|
111
|
-
|
110
|
+
end #module Sequence::Adapter
|
112
111
|
|
112
|
+
end #module Bio
|
data/lib/bio/sequence/common.rb
CHANGED
@@ -6,12 +6,12 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id:$
|
10
|
-
#
|
11
9
|
|
12
10
|
module Bio
|
13
11
|
|
14
|
-
autoload :Locations, 'bio/location'
|
12
|
+
autoload :Locations, 'bio/location' unless const_defined?(:Locations)
|
13
|
+
|
14
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
15
15
|
|
16
16
|
class Sequence
|
17
17
|
|
data/lib/bio/sequence/compat.rb
CHANGED
@@ -6,17 +6,12 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id:$
|
10
|
-
#
|
11
|
-
|
12
9
|
|
13
10
|
module Bio
|
14
11
|
|
15
|
-
|
12
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
16
13
|
|
17
|
-
|
18
|
-
autoload :NA, 'bio/sequence/na'
|
19
|
-
autoload :AA, 'bio/sequence/aa'
|
14
|
+
class Sequence
|
20
15
|
|
21
16
|
# Return sequence as
|
22
17
|
# String[http://corelib.rubyonrails.org/classes/String.html].
|
data/lib/bio/sequence/dblink.rb
CHANGED
@@ -5,15 +5,15 @@
|
|
5
5
|
# Naohisa Goto <ng@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: dblink.rb,v 1.1.2.1 2008/06/17 15:44:22 ngoto Exp $
|
9
|
-
#
|
10
8
|
|
11
|
-
|
9
|
+
module Bio
|
10
|
+
|
11
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
12
12
|
|
13
13
|
# Bio::Sequence::DBLink stores IDs with the database name.
|
14
14
|
# Its main purpose is to store database cross-reference information
|
15
15
|
# for a sequence entry.
|
16
|
-
class
|
16
|
+
class Sequence::DBLink
|
17
17
|
|
18
18
|
# creates a new DBLink object
|
19
19
|
def initialize(database, primary_id, *secondary_ids)
|
@@ -49,6 +49,7 @@ class Bio::Sequence::DBLink
|
|
49
49
|
self.new(*(str.split(/\s*\;\s*/)))
|
50
50
|
end
|
51
51
|
|
52
|
-
end #class
|
52
|
+
end #class Sequence::DBLink
|
53
53
|
|
54
|
+
end #module Bio
|
54
55
|
|
data/lib/bio/sequence/format.rb
CHANGED
data/lib/bio/sequence/generic.rb
CHANGED
@@ -5,12 +5,11 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: generic.rb,v 1.5 2007/04/05 23:35:41 trevor Exp $
|
9
|
-
#
|
10
|
-
|
11
|
-
require 'bio/sequence/common'
|
12
8
|
|
13
9
|
module Bio
|
10
|
+
|
11
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
12
|
+
|
14
13
|
class Sequence
|
15
14
|
|
16
15
|
class Generic < String #:nodoc:
|
data/lib/bio/sequence/na.rb
CHANGED
@@ -6,15 +6,13 @@
|
|
6
6
|
# Ryan Raaum <ryan@raaum.org>
|
7
7
|
# License:: The Ruby License
|
8
8
|
#
|
9
|
-
# $Id: na.rb,v 1.7 2007/04/23 16:43:51 trevor Exp $
|
10
|
-
#
|
11
|
-
|
12
|
-
require 'bio/sequence/common'
|
13
9
|
|
14
10
|
module Bio
|
15
11
|
|
16
|
-
autoload :NucleicAcid, 'bio/data/na'
|
17
|
-
autoload :CodonTable, 'bio/data/codontable'
|
12
|
+
autoload :NucleicAcid, 'bio/data/na' unless const_defined?(:NucleicAcid)
|
13
|
+
autoload :CodonTable, 'bio/data/codontable' unless const_defined?(:CodonTable)
|
14
|
+
|
15
|
+
require 'bio/sequence' unless const_defined?(:Sequence)
|
18
16
|
|
19
17
|
class Sequence
|
20
18
|
|
data/lib/bio/shell/core.rb
CHANGED
data/lib/bio/tree.rb
CHANGED
@@ -0,0 +1,264 @@
|
|
1
|
+
#
|
2
|
+
# = bio/tree/output.rb - Phylogenetic tree formatter
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2004-2006
|
5
|
+
# Naohisa Goto <ng@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
#
|
9
|
+
# == Description
|
10
|
+
#
|
11
|
+
# This file contains formatter of Newick, NHX and Phylip distance matrix.
|
12
|
+
#
|
13
|
+
# == References
|
14
|
+
#
|
15
|
+
# * http://evolution.genetics.washington.edu/phylip/newick_doc.html
|
16
|
+
# * http://www.phylosoft.org/forester/NHX.html
|
17
|
+
#
|
18
|
+
|
19
|
+
module Bio
|
20
|
+
class Tree
|
21
|
+
|
22
|
+
#---
|
23
|
+
# newick output
|
24
|
+
#+++
|
25
|
+
|
26
|
+
# default options
|
27
|
+
DEFAULT_OPTIONS =
|
28
|
+
{ :indent => ' ' }
|
29
|
+
|
30
|
+
def __get_option(key, options)
|
31
|
+
if (r = options[key]) != nil then
|
32
|
+
r
|
33
|
+
elsif @options && (r = @options[key]) != nil then
|
34
|
+
r
|
35
|
+
else
|
36
|
+
DEFAULT_OPTIONS[key]
|
37
|
+
end
|
38
|
+
end
|
39
|
+
private :__get_option
|
40
|
+
|
41
|
+
|
42
|
+
# formats Newick label (unquoted_label or quoted_label)
|
43
|
+
def __to_newick_format_label(str, options)
|
44
|
+
if __get_option(:parser, options) == :naive then
|
45
|
+
return str.to_s
|
46
|
+
end
|
47
|
+
str = str.to_s
|
48
|
+
if /([\(\)\,\:\[\]\_\'\x00-\x1f\x7f])/ =~ str then
|
49
|
+
# quoted_label
|
50
|
+
return "\'" + str.gsub(/\'/, "\'\'") + "\'"
|
51
|
+
end
|
52
|
+
# unquoted_label
|
53
|
+
return str.gsub(/ /, '_')
|
54
|
+
end
|
55
|
+
private :__to_newick_format_label
|
56
|
+
|
57
|
+
# formats leaf
|
58
|
+
def __to_newick_format_leaf(node, edge, options)
|
59
|
+
|
60
|
+
label = __to_newick_format_label(get_node_name(node), options)
|
61
|
+
|
62
|
+
dist = get_edge_distance_string(edge)
|
63
|
+
|
64
|
+
bs = get_node_bootstrap_string(node)
|
65
|
+
|
66
|
+
if __get_option(:branch_length_style, options) == :disabled
|
67
|
+
dist = nil
|
68
|
+
end
|
69
|
+
|
70
|
+
case __get_option(:bootstrap_style, options)
|
71
|
+
when :disabled
|
72
|
+
label + (dist ? ":#{dist}" : '')
|
73
|
+
when :molphy
|
74
|
+
label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
|
75
|
+
when :traditional
|
76
|
+
label + (bs ? bs : '') + (dist ? ":#{dist}" : '')
|
77
|
+
else
|
78
|
+
# default: same as molphy style
|
79
|
+
label + (dist ? ":#{dist}" : '') + (bs ? "[#{bs}]" : '')
|
80
|
+
end
|
81
|
+
end
|
82
|
+
private :__to_newick_format_leaf
|
83
|
+
|
84
|
+
# formats leaf for NHX
|
85
|
+
def __to_newick_format_leaf_NHX(node, edge, options)
|
86
|
+
|
87
|
+
label = __to_newick_format_label(get_node_name(node), options)
|
88
|
+
|
89
|
+
dist = get_edge_distance_string(edge)
|
90
|
+
|
91
|
+
bs = get_node_bootstrap_string(node)
|
92
|
+
|
93
|
+
if __get_option(:branch_length_style, options) == :disabled
|
94
|
+
dist = nil
|
95
|
+
end
|
96
|
+
|
97
|
+
nhx = {}
|
98
|
+
|
99
|
+
# bootstrap
|
100
|
+
nhx[:B] = bs if bs and !(bs.empty?)
|
101
|
+
# EC number
|
102
|
+
nhx[:E] = node.ec_number if node.instance_eval {
|
103
|
+
defined?(@ec_number) && self.ec_number
|
104
|
+
}
|
105
|
+
# scientific name
|
106
|
+
nhx[:S] = node.scientific_name if node.instance_eval {
|
107
|
+
defined?(@scientific_name) && self.scientific_name
|
108
|
+
}
|
109
|
+
# taxonomy id
|
110
|
+
nhx[:T] = node.taxonomy_id if node.instance_eval {
|
111
|
+
defined?(@taxonomy_id) && self.taxonomy_id
|
112
|
+
}
|
113
|
+
|
114
|
+
# :D (gene duplication or speciation)
|
115
|
+
if node.instance_eval { defined?(@events) && !(self.events.empty?) } then
|
116
|
+
if node.events.include?(:gene_duplication)
|
117
|
+
nhx[:D] = 'Y'
|
118
|
+
elsif node.events.include?(:speciation)
|
119
|
+
nhx[:D] = 'N'
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# log likelihood
|
124
|
+
nhx[:L] = edge.log_likelihood if edge.instance_eval {
|
125
|
+
defined?(@log_likelihood) && self.log_likelihood }
|
126
|
+
# width
|
127
|
+
nhx[:W] = edge.width if edge.instance_eval {
|
128
|
+
defined?(@width) && self.width }
|
129
|
+
|
130
|
+
# merges other parameters
|
131
|
+
flag = node.instance_eval { defined? @nhx_parameters }
|
132
|
+
nhx.merge!(node.nhx_parameters) if flag
|
133
|
+
flag = edge.instance_eval { defined? @nhx_parameters }
|
134
|
+
nhx.merge!(edge.nhx_parameters) if flag
|
135
|
+
|
136
|
+
nhx_string = nhx.keys.sort{ |a,b| a.to_s <=> b.to_s }.collect do |key|
|
137
|
+
"#{key.to_s}=#{nhx[key].to_s}"
|
138
|
+
end.join(':')
|
139
|
+
nhx_string = "[&&NHX:" + nhx_string + "]" unless nhx_string.empty?
|
140
|
+
|
141
|
+
label + (dist ? ":#{dist}" : '') + nhx_string
|
142
|
+
end
|
143
|
+
private :__to_newick_format_leaf_NHX
|
144
|
+
|
145
|
+
#
|
146
|
+
def __to_newick(parents, source, depth, format_leaf,
|
147
|
+
options, &block)
|
148
|
+
result = []
|
149
|
+
if indent_string = __get_option(:indent, options) then
|
150
|
+
indent0 = indent_string * depth
|
151
|
+
indent = indent_string * (depth + 1)
|
152
|
+
newline = "\n"
|
153
|
+
else
|
154
|
+
indent0 = indent = newline = ''
|
155
|
+
end
|
156
|
+
out_edges = self.out_edges(source)
|
157
|
+
if block_given? then
|
158
|
+
out_edges.sort! { |edge1, edge2| yield(edge1[1], edge2[1]) }
|
159
|
+
else
|
160
|
+
out_edges.sort! do |edge1, edge2|
|
161
|
+
o1 = edge1[1].order_number
|
162
|
+
o2 = edge2[1].order_number
|
163
|
+
if o1 and o2 then
|
164
|
+
o1 <=> o2
|
165
|
+
else
|
166
|
+
edge1[1].name.to_s <=> edge2[1].name.to_s
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
out_edges.each do |src, tgt, edge|
|
171
|
+
if parents.include?(tgt) then
|
172
|
+
;;
|
173
|
+
elsif self.out_degree(tgt) == 1 then
|
174
|
+
result << indent + __send__(format_leaf, tgt, edge, options)
|
175
|
+
else
|
176
|
+
result <<
|
177
|
+
__to_newick([ src ].concat(parents), tgt, depth + 1,
|
178
|
+
format_leaf, options) +
|
179
|
+
__send__(format_leaf, tgt, edge, options)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
indent0 + "(" + newline + result.join(',' + newline) +
|
183
|
+
(result.size > 0 ? newline : '') + indent0 + ')'
|
184
|
+
end
|
185
|
+
private :__to_newick
|
186
|
+
|
187
|
+
# Returns a newick formatted string.
|
188
|
+
# If block is given, the order of the node is sorted
|
189
|
+
# (as the same manner as Enumerable#sort).
|
190
|
+
#
|
191
|
+
# Available options:
|
192
|
+
# <tt>:indent</tt>::
|
193
|
+
# indent string; set false to disable (default: ' ')
|
194
|
+
# <tt>:bootstrap_style</tt>::
|
195
|
+
# <tt>:disabled</tt> disables bootstrap representations.
|
196
|
+
# <tt>:traditional</tt> for traditional style.
|
197
|
+
# <tt>:molphy</tt> for Molphy style (default).
|
198
|
+
def output_newick(options = {}, &block) #:yields: node1, node2
|
199
|
+
root = @root
|
200
|
+
root ||= self.nodes.first
|
201
|
+
return '();' unless root
|
202
|
+
__to_newick([], root, 0, :__to_newick_format_leaf, options, &block) +
|
203
|
+
__to_newick_format_leaf(root, Edge.new, options) +
|
204
|
+
";\n"
|
205
|
+
end
|
206
|
+
|
207
|
+
alias newick output_newick
|
208
|
+
|
209
|
+
|
210
|
+
# Returns a NHX (New Hampshire eXtended) formatted string.
|
211
|
+
# If block is given, the order of the node is sorted
|
212
|
+
# (as the same manner as Enumerable#sort).
|
213
|
+
#
|
214
|
+
# Available options:
|
215
|
+
# <tt>:indent</tt>::
|
216
|
+
# indent string; set false to disable (default: ' ')
|
217
|
+
#
|
218
|
+
def output_nhx(options = {}, &block) #:yields: node1, node2
|
219
|
+
root = @root
|
220
|
+
root ||= self.nodes.first
|
221
|
+
return '();' unless root
|
222
|
+
__to_newick([], root, 0,
|
223
|
+
:__to_newick_format_leaf_NHX, options, &block) +
|
224
|
+
__to_newick_format_leaf_NHX(root, Edge.new, options) +
|
225
|
+
";\n"
|
226
|
+
end
|
227
|
+
|
228
|
+
# Returns formatted text (or something) of the tree
|
229
|
+
# Currently supported format is: :newick, :nhx
|
230
|
+
def output(format, *arg, &block)
|
231
|
+
case format
|
232
|
+
when :newick
|
233
|
+
output_newick(*arg, &block)
|
234
|
+
when :nhx
|
235
|
+
output_nhx(*arg, &block)
|
236
|
+
when :phylip_distance_matrix
|
237
|
+
output_phylip_distance_matrix(*arg, &block)
|
238
|
+
else
|
239
|
+
raise 'Unknown format'
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
#---
|
244
|
+
# This method isn't suitable to written in this file?
|
245
|
+
#+++
|
246
|
+
|
247
|
+
# Generates phylip-style distance matrix as a string.
|
248
|
+
# if nodes is not given, all leaves in the tree are used.
|
249
|
+
# If the names of some of the given (or default) nodes
|
250
|
+
# are not defined or are empty, the names are automatically generated.
|
251
|
+
def output_phylip_distance_matrix(nodes = nil, options = {})
|
252
|
+
nodes = self.leaves unless nodes
|
253
|
+
names = nodes.collect do |x|
|
254
|
+
y = get_node_name(x)
|
255
|
+
y = sprintf("%x", x.__id__.abs) if y.empty?
|
256
|
+
y
|
257
|
+
end
|
258
|
+
m = self.distance_matrix(nodes)
|
259
|
+
Bio::Phylip::DistanceMatrix.generate(m, names, options)
|
260
|
+
end
|
261
|
+
|
262
|
+
end #class Tree
|
263
|
+
|
264
|
+
end #module Bio
|