newick-ruby 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +27 -0
- data/bin/newickAlphabetize +12 -0
- data/bin/newickCompare +33 -0
- data/bin/newickDist +22 -0
- data/bin/newickDraw +72 -0
- data/bin/newickReorder +14 -0
- data/bin/newickReroot +24 -0
- data/bin/newickTaxa +27 -0
- data/example/jgi_19094_1366.m000227-Phatr2.tree +50 -0
- data/lib/Newick.rb +865 -0
- data/test/tc_Newick.rb +30 -0
- metadata +98 -0
data/README
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Ruby routines for manipulating Newick trees. See http://en.wikipedia.org/wiki/Newick_format for a description of what Newick trees are if you don't know, but chances are, if you don't already know, this isn't going to be very useful code to you.
|
2
|
+
|
3
|
+
I'm aware that BioRuby has Newick routines as of late, although I'm not sure if they do all my code does, plus I started my code several years ago when BioRuby was just starting out. I'm all for anyone taking what they find useful from my code and integrating it into BioRuby, though.
|
4
|
+
|
5
|
+
One of things I'm most proud of is my midpoint rooting method, as I have yet to see another command-line rerooting method that can take in arbitrary Newick trees.
|
6
|
+
|
7
|
+
I've created wrappers around most of the major functionality of my code, such as rerooting and drawing trees. The tree drawing routines (and its wrapper newickDraw) use the fpdf gem for rendering PDF files. Other than that, my code is stand alone.
|
8
|
+
|
9
|
+
jgi_19094_1366.m000227-Phatr2.tree is an example Newick tree for playing with the routines.
|
10
|
+
|
11
|
+
Let me know what you think:
|
12
|
+
|
13
|
+
Jonathan Badger
|
14
|
+
J. Craig Venter Institute
|
15
|
+
jhbadger@gmail.com
|
16
|
+
|
17
|
+
|
18
|
+
License
|
19
|
+
(The MIT License)
|
20
|
+
|
21
|
+
Copyright © 2010 Jonathan Badger
|
22
|
+
|
23
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ‘Software’), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
24
|
+
|
25
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
26
|
+
|
27
|
+
THE SOFTWARE IS PROVIDED ‘AS IS’, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/bin/newickCompare
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'Newick'
|
4
|
+
|
5
|
+
|
6
|
+
if (ARGV.size != 2)
|
7
|
+
STDERR.printf("usage: %s tree-file tree-file\n", $0)
|
8
|
+
exit(1)
|
9
|
+
end
|
10
|
+
|
11
|
+
tree1File, tree2File = ARGV
|
12
|
+
|
13
|
+
tree1 = NewickTree.fromFile(tree1File)
|
14
|
+
tree2 = NewickTree.fromFile(tree2File)
|
15
|
+
|
16
|
+
begin
|
17
|
+
diff1, diff2 = tree1.compare(tree2)
|
18
|
+
if (diff1.empty? && diff2.empty?)
|
19
|
+
printf("The trees are identical\n")
|
20
|
+
else
|
21
|
+
printf("Clades in #{tree1File} but not #{tree2File}:\n")
|
22
|
+
diff1.each {|clade|
|
23
|
+
p clade
|
24
|
+
}
|
25
|
+
printf("Clades in #{tree2File} but not #{tree1File}:\n")
|
26
|
+
diff2.each {|clade|
|
27
|
+
p clade
|
28
|
+
}
|
29
|
+
end
|
30
|
+
rescue
|
31
|
+
STDERR.printf("The two trees have different taxa and are not comparable\n")
|
32
|
+
exit(1)
|
33
|
+
end
|
data/bin/newickDist
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'Newick'
|
4
|
+
|
5
|
+
if (ARGV.size != 1)
|
6
|
+
STDERR.printf("usage: %s tree-file\n", $0)
|
7
|
+
exit(1)
|
8
|
+
end
|
9
|
+
|
10
|
+
file = ARGV.pop
|
11
|
+
|
12
|
+
tree = NewickTree.fromFile(file)
|
13
|
+
dMatrix = tree.distanceMatrix
|
14
|
+
|
15
|
+
printf(" %d\n", dMatrix.keys.size)
|
16
|
+
dMatrix.keys.each {|key1|
|
17
|
+
printf("%-16s ", key1)
|
18
|
+
dMatrix.keys.each {|key2|
|
19
|
+
printf("%f ", dMatrix[key1][key2])
|
20
|
+
}
|
21
|
+
printf("\n")
|
22
|
+
}
|
data/bin/newickDraw
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'Newick'
|
4
|
+
require 'optparse'
|
5
|
+
require 'rubygems'
|
6
|
+
require 'ostruct'
|
7
|
+
require 'fpdf'
|
8
|
+
|
9
|
+
opt = OpenStruct.new
|
10
|
+
opt.brackets = false
|
11
|
+
opt.label = false
|
12
|
+
opt.highlights = nil
|
13
|
+
opt.file = nil
|
14
|
+
opt.raw = false
|
15
|
+
|
16
|
+
ARGV.options {|o|
|
17
|
+
o.banner << " tree-file [..tree-file]"
|
18
|
+
o.on("-b ", "--opt.brackets ", String,
|
19
|
+
"load list of label brackets") {|b| opt.brackets = b}
|
20
|
+
o.on("-h ", "--highlights ", String,
|
21
|
+
"highlight these ids (commas)") {|h| opt.highlights = h}
|
22
|
+
o.on("-f ", "--file ", String,
|
23
|
+
"file with ids to highlight") {|f| opt.file = f}
|
24
|
+
o.on("-l ", "--label ", String, "add label to tree pdf") {|l| opt.label = l}
|
25
|
+
o.on("-r", "--raw", "draw tree with raw names (false)") {opt.raw = true}
|
26
|
+
begin
|
27
|
+
o.parse!
|
28
|
+
rescue
|
29
|
+
STDERR.puts $!.message
|
30
|
+
STDERR.puts o
|
31
|
+
exit(1)
|
32
|
+
end
|
33
|
+
if (ARGV.size < 1)
|
34
|
+
STDERR.puts o
|
35
|
+
exit(1)
|
36
|
+
end
|
37
|
+
}
|
38
|
+
|
39
|
+
|
40
|
+
highlights = Hash.new
|
41
|
+
if (opt.highlights)
|
42
|
+
opt.highlights.split(",").each {|highlight|
|
43
|
+
highlights[highlight] = [255, 0, 0]
|
44
|
+
}
|
45
|
+
elsif (opt.file)
|
46
|
+
File.new(opt.file).each {|line|
|
47
|
+
name, r, g, b = line.chomp.split(" ")
|
48
|
+
r, g, b = 255, 0, 0 if (r.nil?)
|
49
|
+
if (r == "red")
|
50
|
+
r, g, b = 255, 0, 0
|
51
|
+
elsif (r == "green")
|
52
|
+
r, g, b = 0, 255, 0
|
53
|
+
elsif (r == "blue")
|
54
|
+
r, g, b = 0, 0, 255
|
55
|
+
end
|
56
|
+
highlights[name] = [r.to_i, g.to_i, b.to_i]
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
brackets = []
|
61
|
+
if (opt.brackets)
|
62
|
+
File.new(opt.brackets).each {|line|
|
63
|
+
x, y1, y2, opt.label, r, p = line.chomp.split(" ")
|
64
|
+
opt.brackets.push([x.to_f, y1.to_f, y2.to_f, opt.label, r, p])
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
ARGV.each {|arg|
|
69
|
+
tree = NewickTree.fromFile(arg)
|
70
|
+
tree.draw(arg + ".pdf", "width", linker = :giLink, opt.label, highlights,
|
71
|
+
brackets, opt.raw)
|
72
|
+
}
|
data/bin/newickReorder
ADDED
data/bin/newickReroot
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'Newick'
|
4
|
+
|
5
|
+
if (ARGV.size != 1 && ARGV.size != 2)
|
6
|
+
STDERR.printf("usage: %s tree-file [outgroup]\n", $0)
|
7
|
+
exit(1)
|
8
|
+
end
|
9
|
+
|
10
|
+
treeFile, outgroup = ARGV
|
11
|
+
|
12
|
+
tree = NewickTree.fromFile(treeFile)
|
13
|
+
|
14
|
+
if (outgroup.nil?)
|
15
|
+
print tree.midpointRoot.to_s + "\n"
|
16
|
+
else
|
17
|
+
outNode = tree.findNode(outgroup)
|
18
|
+
if (outNode.nil?)
|
19
|
+
STDERR.printf("taxon #{outgroup} not found in tree!\n")
|
20
|
+
exit(1)
|
21
|
+
else
|
22
|
+
print tree.reroot(outNode).to_s + "\n"
|
23
|
+
end
|
24
|
+
end
|
data/bin/newickTaxa
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'Newick'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
ARGV.options {|opts|
|
7
|
+
opts.banner << " tree-file [..tree-file]"
|
8
|
+
begin
|
9
|
+
opts.parse!
|
10
|
+
rescue
|
11
|
+
STDERR.puts $!.message
|
12
|
+
STDERR.puts opts
|
13
|
+
exit(1)
|
14
|
+
end
|
15
|
+
if (ARGV.size < 1)
|
16
|
+
STDERR.puts opts
|
17
|
+
exit(1)
|
18
|
+
end
|
19
|
+
}
|
20
|
+
|
21
|
+
|
22
|
+
ARGV.each {|arg|
|
23
|
+
tree = NewickTree.fromFile(arg)
|
24
|
+
tree.taxa.each {|taxon|
|
25
|
+
printf("%s\n", taxon)
|
26
|
+
}
|
27
|
+
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
((((((((gi89055836-NC_007802__Jannaschia_sp._CCS1:0.1487,
|
2
|
+
gi126733698-NZ_AAYB01000001__Roseobacter_sp._CCS2:0.13374)38:0.02173,
|
3
|
+
(gi146277633-NC_009428__Rhodobacter_sphaeroides_ATCC_17025:0.07301,
|
4
|
+
(gi126461776-NC_009049__Rhodobacter_sphaeroides_ATCC_17029,
|
5
|
+
(gi221638757-NC_011963__Rhodobacter_sphaeroides_KD131,
|
6
|
+
gi77462884-NC_007493__Rhodobacter_sphaeroides_2.4.1)):0.0644)100:0.08132)10:0.00753,
|
7
|
+
((((((gi85703662-NZ_AAMV01000001__Roseovarius_sp._217:0.03368,
|
8
|
+
gi149201960-NZ_ABCL01000002__Roseovarius_sp._TM1035:0.04265)100:0.06881,
|
9
|
+
(gi56695340-NC_003911__Ruegeria_pomeroyi_DSS-3:0.08841,
|
10
|
+
gi161381492-ABID01000001__Oceanibulbus_indolifex_HEL-45:0.08717)67:0.02279)17:0.00349,
|
11
|
+
gi149915730-NZ_ABCR01000010__Roseobacter_sp._AzwK-3b:0.11483)29:0.00623,
|
12
|
+
gi83949869-NZ_AALY01000001__Roseovarius_nubinhibens_ISM:0.10446)35:0.00982,
|
13
|
+
gi84499551-NZ_AAMO01000001__Oceanicola_batsensis_HTCC2597:0.12567)22:0.00834,
|
14
|
+
gi114762130-NZ_AATQ01000002__Roseovarius_sp._HTCC2601:0.14982)14:0.01609)33:0.00763,
|
15
|
+
gi126724773-NZ_AAXZ01000001__Rhodobacterales_bacterium_HTCC2150:0.17111)55:0.0191,
|
16
|
+
(gi86139282-NZ_AANB01000010__Roseobacter_sp._MED193:0.11315,
|
17
|
+
gi126737474-NZ_AAYC01000001__Roseobacter_sp._SK209-2-6:0.11586)98:0.083)100:0.06501,
|
18
|
+
gi114770387-NZ_AATR01000003__alpha_proteobacterium_HTCC2255:0.29961)88:0.04304,
|
19
|
+
(gi68128010-LMF__Leishmania_major_strain_Friedlin:0.25126,
|
20
|
+
Esi0134_0023-EctoSi0409__Ectocarpus_siliculosus:0.26783)79:0.03562)19:0.00574999999999998,
|
21
|
+
((((jgi_19094_1366.m000227-Phatr2:0.28783,
|
22
|
+
gi126725898-NZ_AAXZ01000002__Rhodobacterales_bacterium_HTCC2150:0.25415)50:0.0282,
|
23
|
+
GSPATP00027661001-PTET__Paramecium_tetraurelia:0.30004)14:0.01339,
|
24
|
+
(gi150005830-NC_009614__Bacteroides_vulgatus_ATCC_8482:0.29225,
|
25
|
+
(GSPATP00031374001-PTET__Paramecium_tetraurelia:0.23246,
|
26
|
+
GSPATP00000334001-PTET__Paramecium_tetraurelia:0.27135)58:0.03217)19:0.01428)12:0.00801,
|
27
|
+
((((GSPATP00035296001-PTET__Paramecium_tetraurelia:0.064,
|
28
|
+
GSPATP00017028001-PTET__Paramecium_tetraurelia:0.06577)100:0.12482,
|
29
|
+
GSPATP00020452001-PTET__Paramecium_tetraurelia:0.21487)90:0.03934,
|
30
|
+
GSPATP00014508001-PTET__Paramecium_tetraurelia:0.26982)31:0.01084,
|
31
|
+
(GSPATP00035980001-PTET__Paramecium_tetraurelia:0.28115,
|
32
|
+
((gi4268;-TTA__Tetrahymena_thermophila:0.18536,
|
33
|
+
(GSPATP00031578001-PTET__Paramecium_tetraurelia:0.0635,
|
34
|
+
GSPATP00027990001-PTET__Paramecium_tetraurelia:0.06627)100:0.13906)83:0.031,
|
35
|
+
((((gi3342;-TTA__Tetrahymena_thermophila:0.14329,
|
36
|
+
((gi10081;-TTA__Tetrahymena_thermophila:0.13329,
|
37
|
+
(GSPATP00036803001-PTET__Paramecium_tetraurelia:0.0107,
|
38
|
+
GSPATP00021332001-PTET__Paramecium_tetraurelia:0.0122)100:0.14152)34:0.00509,
|
39
|
+
GSPATP00021466001-PTET__Paramecium_tetraurelia:0.1285)23:0.00652)71:0.02086,
|
40
|
+
((GSPATP00028753001-PTET__Paramecium_tetraurelia:0.00662,
|
41
|
+
GSPATP00026866001-PTET__Paramecium_tetraurelia:0.00865)100:0.14064,
|
42
|
+
(GSPATP00016293001-PTET__Paramecium_tetraurelia:0.07457,
|
43
|
+
GSPATP00012481001-PTET__Paramecium_tetraurelia:0.06284)100:0.08837)69:0.02542)86:0.02708,
|
44
|
+
(((gi3128;-TTA__Tetrahymena_thermophila:0.13981,
|
45
|
+
GSPATP00016960001-PTET__Paramecium_tetraurelia:0.1579)32:0.01042,
|
46
|
+
((GSPATP00016480001-PTET__Paramecium_tetraurelia:0.04289,
|
47
|
+
GSPATP00007200001-PTET__Paramecium_tetraurelia:0.03344)100:0.12625,
|
48
|
+
GSPATP00013125001-PTET__Paramecium_tetraurelia:0.17146)23:0.00199)43:0.01384,
|
49
|
+
GSPATP00005896001-PTET__Paramecium_tetraurelia:0.21183)85:0.03536)33:0.014,
|
50
|
+
km10041-km__Karlodinium_micrum:0.23603):0.0032)13:0.01936)42:0.00412)9:0.03714)82:0.000530000000000023);
|
data/lib/Newick.rb
ADDED
@@ -0,0 +1,865 @@
|
|
1
|
+
# Exception raised when a parse error happens in processing a Newick tree
|
2
|
+
class NewickParseError < RuntimeError
|
3
|
+
end
|
4
|
+
|
5
|
+
# Represents a token (substring with meaning) in a Newick parse
|
6
|
+
class NewickToken
|
7
|
+
# semantic meaning of token (label, weight, etc.)
|
8
|
+
attr_reader :type
|
9
|
+
# string value of token
|
10
|
+
attr_reader :value
|
11
|
+
|
12
|
+
def initialize(type, value)
|
13
|
+
@type = type
|
14
|
+
@value = value
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
# Splits a Newick tree string into tokens that NewickTree uses
|
21
|
+
class NewickTokenizer
|
22
|
+
|
23
|
+
def initialize(str)
|
24
|
+
@str = str
|
25
|
+
@pos = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
# returns the next character in the string and updates position
|
29
|
+
def nextChar
|
30
|
+
if (@pos < @str.size)
|
31
|
+
c = @str[@pos].chr
|
32
|
+
@pos += 1
|
33
|
+
return c
|
34
|
+
else
|
35
|
+
return nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# returns the next token in the string and updates position
|
40
|
+
def nextToken
|
41
|
+
c = nextChar
|
42
|
+
if (c == " " || c == "\n" || c == "\r")
|
43
|
+
return nextToken
|
44
|
+
elsif (c == "(" || c == ")" || c == ',')
|
45
|
+
return NewickToken.new("SYMBOL", c)
|
46
|
+
elsif (c == ":")
|
47
|
+
if (@str.index((/([0-9|\.|\-|e|E]+)/), @pos) == @pos)
|
48
|
+
@pos += $1.length
|
49
|
+
return NewickToken.new("WEIGHT", $1)
|
50
|
+
else
|
51
|
+
raise NewickParseError, "Illegal weight at pos #{@pos} of #{@str}"
|
52
|
+
end
|
53
|
+
elsif (c == "'")
|
54
|
+
if (@str.index(/(\'[^\']*\')/, @pos - 1) == @pos - 1)
|
55
|
+
@pos += $1.length - 1
|
56
|
+
return NewickToken.new("LABEL", $1)
|
57
|
+
else
|
58
|
+
raise NewickParseError, "Illegal label at pos #{@pos} of #{@str}"
|
59
|
+
end
|
60
|
+
elsif (@str.index(/([^,():]+)/, @pos - 1) == @pos - 1)
|
61
|
+
@pos += $1.length - 1
|
62
|
+
return NewickToken.new("LABEL", $1)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# returms the next token in the string without changing position
|
67
|
+
def peekToken
|
68
|
+
origPos = @pos
|
69
|
+
token = nextToken
|
70
|
+
@pos = origPos
|
71
|
+
return token
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
# Represents a single node in a NewickTree
|
77
|
+
class NewickNode
|
78
|
+
# parent node of node
|
79
|
+
attr :parent, true
|
80
|
+
# edge length of node
|
81
|
+
attr :edgeLen, true
|
82
|
+
# name of node
|
83
|
+
attr :name, true
|
84
|
+
# child nodes of node
|
85
|
+
attr_reader :children
|
86
|
+
# x position of node
|
87
|
+
attr :x, true
|
88
|
+
# y position of node
|
89
|
+
attr :y, true
|
90
|
+
|
91
|
+
def initialize(name, edgeLen)
|
92
|
+
@parent = nil
|
93
|
+
@name = name
|
94
|
+
@edgeLen = edgeLen
|
95
|
+
@children = []
|
96
|
+
end
|
97
|
+
|
98
|
+
# adds child node to list of children and sets child's parent to self
|
99
|
+
def addChild(child)
|
100
|
+
child.parent = self
|
101
|
+
@children.push(child)
|
102
|
+
end
|
103
|
+
|
104
|
+
# removes child node from list of children and sets child's parent to nil
|
105
|
+
def removeChild(child)
|
106
|
+
@children.delete(child)
|
107
|
+
child.parent = nil
|
108
|
+
end
|
109
|
+
|
110
|
+
# returns string representation of node
|
111
|
+
def to_s(showLen = true, bootStrap = "node")
|
112
|
+
s = ""
|
113
|
+
if (!leaf?)
|
114
|
+
s += "("
|
115
|
+
@children.each {|child|
|
116
|
+
s += child.to_s(showLen, bootStrap)
|
117
|
+
s += "," if (child != @children.last)
|
118
|
+
}
|
119
|
+
s += ")"
|
120
|
+
end
|
121
|
+
if (leaf? || bootStrap == "node")
|
122
|
+
s += @name
|
123
|
+
end
|
124
|
+
s += ":#{@edgeLen}" if (showLen && @edgeLen != 0)
|
125
|
+
if (!leaf? && name.to_i > 0 && bootStrap == "branch")
|
126
|
+
s += ":#{name}"
|
127
|
+
end
|
128
|
+
return s
|
129
|
+
end
|
130
|
+
|
131
|
+
# returns array of names of leaves (taxa) that are contained in the node
|
132
|
+
def taxa(bootstrap = false)
|
133
|
+
taxa = []
|
134
|
+
if (!leaf?)
|
135
|
+
taxa.push(@name) if (bootstrap)
|
136
|
+
@children.each {|child|
|
137
|
+
child.taxa.each {|taxon|
|
138
|
+
taxa.push(taxon)
|
139
|
+
}
|
140
|
+
}
|
141
|
+
else
|
142
|
+
taxa.push(name)
|
143
|
+
end
|
144
|
+
return taxa.sort
|
145
|
+
end
|
146
|
+
|
147
|
+
# returns array of leaves (taxa) are contained in the node
|
148
|
+
def leaves
|
149
|
+
nodes = []
|
150
|
+
descendants.each {|node|
|
151
|
+
nodes.push(node) if (node.leaf?)
|
152
|
+
}
|
153
|
+
return nodes
|
154
|
+
end
|
155
|
+
|
156
|
+
# returns array of non leaves (taxa) that are contained in the node
|
157
|
+
def intNodes
|
158
|
+
nodes = []
|
159
|
+
descendants.each {|child|
|
160
|
+
nodes.push(child) if (!child.leaf?)
|
161
|
+
}
|
162
|
+
return nodes
|
163
|
+
end
|
164
|
+
|
165
|
+
# returns node with given name, or nil if not found
|
166
|
+
def findNode(name)
|
167
|
+
found = nil
|
168
|
+
if (@name =~/#{name}/)
|
169
|
+
found = self
|
170
|
+
else
|
171
|
+
@children.each {|child|
|
172
|
+
found = child.findNode(name)
|
173
|
+
break if found
|
174
|
+
}
|
175
|
+
end
|
176
|
+
return found
|
177
|
+
end
|
178
|
+
|
179
|
+
# reverses the parent-child relationship (used in rerooting tree)
|
180
|
+
def reverseChildParent
|
181
|
+
return if (@parent.nil?)
|
182
|
+
oldParent = @parent
|
183
|
+
oldParent.removeChild(self)
|
184
|
+
if (!oldParent.parent.nil?)
|
185
|
+
oldParent.reverseChildParent
|
186
|
+
end
|
187
|
+
addChild(oldParent)
|
188
|
+
oldParent.edgeLen = @edgeLen
|
189
|
+
@edgeLen = 0
|
190
|
+
end
|
191
|
+
|
192
|
+
# True if given node is child (or grandchild, etc.) of self. False otherwise
|
193
|
+
def include?(node)
|
194
|
+
while(node.parent != nil)
|
195
|
+
return true if (node.parent == self)
|
196
|
+
node = node.parent
|
197
|
+
end
|
198
|
+
return false
|
199
|
+
end
|
200
|
+
|
201
|
+
# True if node has no children (and therefore is a leaf)
|
202
|
+
def leaf?
|
203
|
+
if (@children.empty?)
|
204
|
+
return true
|
205
|
+
else
|
206
|
+
return false
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# returns array of all descendant nodes
|
211
|
+
def descendants
|
212
|
+
descendants = []
|
213
|
+
@children.each {|child|
|
214
|
+
descendants.push(child)
|
215
|
+
child.descendants.each {|grandchild|
|
216
|
+
descendants.push(grandchild)
|
217
|
+
}
|
218
|
+
}
|
219
|
+
return descendants
|
220
|
+
end
|
221
|
+
|
222
|
+
# return array of all sibling nodes
|
223
|
+
def siblings
|
224
|
+
siblings = []
|
225
|
+
if (parent.nil?)
|
226
|
+
return siblings
|
227
|
+
else
|
228
|
+
@parent.children.each {|child|
|
229
|
+
siblings.push(child) if (child!=self)
|
230
|
+
}
|
231
|
+
return siblings
|
232
|
+
end
|
233
|
+
end
|
234
|
+
|
235
|
+
# reorders descendant nodes alphabetically and by size
|
236
|
+
def reorder
|
237
|
+
return if (@children.empty?)
|
238
|
+
@children.sort! {|x, y| x.name <=> y.name}
|
239
|
+
@children.each {|child|
|
240
|
+
child.reorder
|
241
|
+
}
|
242
|
+
return self
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
# returns the last common ancestor node of self and given node
|
248
|
+
def lca(node)
|
249
|
+
if (self.include?(node))
|
250
|
+
return self
|
251
|
+
elsif (node.include?(self))
|
252
|
+
return node
|
253
|
+
else
|
254
|
+
return @parent.lca(node)
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# returns the distance to the ancestor node
|
259
|
+
def distToAncestor(ancestor)
|
260
|
+
dist = 0
|
261
|
+
node = self
|
262
|
+
while(node != ancestor)
|
263
|
+
dist += node.edgeLen
|
264
|
+
node = node.parent
|
265
|
+
end
|
266
|
+
return dist
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
# returns number of nodes to the ancestor node
|
271
|
+
def nodesToAncestor(ancestor)
|
272
|
+
if (!ancestor.include?(self))
|
273
|
+
return nil
|
274
|
+
elsif (ancestor == self)
|
275
|
+
return 0
|
276
|
+
elsif (ancestor == @parent)
|
277
|
+
return 1
|
278
|
+
else
|
279
|
+
return 1 + @parent.nodesToAncestor(ancestor)
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
|
284
|
+
# returns number of nodes to other node
|
285
|
+
def nodesToNode(node)
|
286
|
+
lca = lca(node)
|
287
|
+
if (lca == self)
|
288
|
+
return node.nodesToAncestor(self)
|
289
|
+
elsif (lca == node)
|
290
|
+
return nodesToAncestor(node)
|
291
|
+
else
|
292
|
+
return nodesToAncestor(lca) + node.nodesToAncestor(lca)
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
# calculates node Y positions
|
297
|
+
def calcYPos
|
298
|
+
ySum = 0
|
299
|
+
@children.each {|child|
|
300
|
+
ySum += child.y
|
301
|
+
}
|
302
|
+
@y = ySum / @children.size
|
303
|
+
end
|
304
|
+
|
305
|
+
# calculates node X positions
|
306
|
+
def calcXPos
|
307
|
+
if (parent.nil?)
|
308
|
+
@x = 0
|
309
|
+
else
|
310
|
+
#@edgeLen = 1 if (@edgeLen == 0)
|
311
|
+
@x = parent.x + @edgeLen
|
312
|
+
end
|
313
|
+
if (!leaf?)
|
314
|
+
@children.each {|child|
|
315
|
+
child.calcXPos
|
316
|
+
}
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# returns the maximum X value in node
|
321
|
+
def xMax
|
322
|
+
xMax = 0
|
323
|
+
children.each {|child|
|
324
|
+
xMax = child.edgeLen if (child.x > xMax)
|
325
|
+
}
|
326
|
+
return xMax
|
327
|
+
end
|
328
|
+
|
329
|
+
# returns the maximum Y value in node
|
330
|
+
def yMax
|
331
|
+
yMax = 0
|
332
|
+
children.each {|child|
|
333
|
+
yMax = child.y if (child.y > yMax)
|
334
|
+
}
|
335
|
+
return yMax
|
336
|
+
end
|
337
|
+
|
338
|
+
# returns the minimum Y value in node
|
339
|
+
def yMin
|
340
|
+
yMin = 1e6
|
341
|
+
children.each {|child|
|
342
|
+
yMin = child.y if (child.y < yMin)
|
343
|
+
}
|
344
|
+
return yMin
|
345
|
+
end
|
346
|
+
|
347
|
+
end
|
348
|
+
|
349
|
+
class NewickTree
|
350
|
+
attr_reader :root
|
351
|
+
def initialize(treeString)
|
352
|
+
tokenizer = NewickTokenizer.new(treeString)
|
353
|
+
@root = buildTree(nil, tokenizer)
|
354
|
+
end
|
355
|
+
|
356
|
+
# create new NewickTree from tree stored in file
|
357
|
+
def NewickTree.fromFile(fileName)
|
358
|
+
treeString = ""
|
359
|
+
inFile = File.new(fileName)
|
360
|
+
inFile.each {|line|
|
361
|
+
treeString += line.chomp
|
362
|
+
}
|
363
|
+
inFile.close
|
364
|
+
treeString.gsub!(/\[[^\]]*\]/,"") # remove comments before parsing
|
365
|
+
return NewickTree.new(treeString)
|
366
|
+
end
|
367
|
+
|
368
|
+
# internal function used for building tree structure from string
|
369
|
+
def buildTree(parent, tokenizer)
|
370
|
+
while (!(token = tokenizer.nextToken).nil?)
|
371
|
+
if (token.type == "LABEL")
|
372
|
+
name = token.value
|
373
|
+
edgeLen = 0
|
374
|
+
if (tokenizer.peekToken.type == "WEIGHT")
|
375
|
+
edgeLen = tokenizer.nextToken.value.to_f
|
376
|
+
end
|
377
|
+
node = NewickNode.new(name, edgeLen)
|
378
|
+
return node
|
379
|
+
elsif (token.value == "(")
|
380
|
+
node = NewickNode.new("", 0)
|
381
|
+
forever = true
|
382
|
+
while (forever)
|
383
|
+
child = buildTree(node, tokenizer)
|
384
|
+
node.addChild(child)
|
385
|
+
break if tokenizer.peekToken.value != ","
|
386
|
+
tokenizer.nextToken
|
387
|
+
end
|
388
|
+
if (tokenizer.nextToken.value != ")")
|
389
|
+
raise NewickParseError, "Expected ')' but found: #{token.value}"
|
390
|
+
else
|
391
|
+
peek = tokenizer.peekToken
|
392
|
+
if (peek.value == ")" || peek.value == "," || peek.value == ";")
|
393
|
+
return node
|
394
|
+
elsif (peek.type == "WEIGHT")
|
395
|
+
node.edgeLen = tokenizer.nextToken.value.to_f
|
396
|
+
return node
|
397
|
+
elsif (peek.type == "LABEL")
|
398
|
+
token = tokenizer.nextToken
|
399
|
+
node.name = token.value
|
400
|
+
if (tokenizer.peekToken.type == "WEIGHT")
|
401
|
+
node.edgeLen = tokenizer.nextToken.value.to_f
|
402
|
+
end
|
403
|
+
return node
|
404
|
+
end
|
405
|
+
end
|
406
|
+
else
|
407
|
+
raise NewickParseError,
|
408
|
+
"Expected '(' or label but found: #{token.value}"
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
# return string representation of tree
|
414
|
+
def to_s(showLen = true, bootStrap = "node")
|
415
|
+
return @root.to_s(showLen, bootStrap) + ";"
|
416
|
+
end
|
417
|
+
|
418
|
+
# write string representation of tree to file
|
419
|
+
def write(fileName, showLen = true, bootStrap = "node")
|
420
|
+
file = File.new(fileName, "w")
|
421
|
+
file.print @root.to_s(showLen, bootStrap) + ";\n"
|
422
|
+
file.close
|
423
|
+
end
|
424
|
+
|
425
|
+
# reorders leaves alphabetically and size
|
426
|
+
def reorder
|
427
|
+
@root.reorder
|
428
|
+
return self
|
429
|
+
end
|
430
|
+
|
431
|
+
# renames nodes and creates an alias file, returning aliased tree and hash
|
432
|
+
def alias(aliasFile = nil, longAlias = false)
|
433
|
+
ali = Hash.new
|
434
|
+
aliF = File.new(aliasFile, "w") if (!aliasFile.nil?)
|
435
|
+
if (longAlias)
|
436
|
+
taxon = "SEQ" + "0"* taxa.sort {|x,y| x.length <=> y.length}.last.length
|
437
|
+
else
|
438
|
+
taxon = "SEQ0000001"
|
439
|
+
end
|
440
|
+
@root.descendants.each {|node|
|
441
|
+
if (node.name != "" && node.name.to_i == 0)
|
442
|
+
ali[taxon] = node.name
|
443
|
+
aliF.printf("%s\t%s\n", taxon, node.name) if (!aliasFile.nil?)
|
444
|
+
node.name = taxon.dup
|
445
|
+
taxon.succ!
|
446
|
+
end
|
447
|
+
}
|
448
|
+
aliF.close if (!aliasFile.nil?)
|
449
|
+
return self, ali
|
450
|
+
end
|
451
|
+
|
452
|
+
# renames nodes according to alias hash
|
453
|
+
def unAlias(aliasNames)
|
454
|
+
@root.descendants.each {|node|
|
455
|
+
node.name = aliasNames[node.name] if (!aliasNames[node.name].nil?)
|
456
|
+
}
|
457
|
+
return self
|
458
|
+
end
|
459
|
+
|
460
|
+
# renames nodes according to inverse alias hash
|
461
|
+
def reAlias(aliasNames)
|
462
|
+
@root.descendants.each {|node|
|
463
|
+
aliasNames.keys.each {|key|
|
464
|
+
node.name = key if (aliasNames[key] == node.name)
|
465
|
+
}
|
466
|
+
}
|
467
|
+
return self
|
468
|
+
end
|
469
|
+
|
470
|
+
# return array of all taxa in tree
|
471
|
+
def taxa
|
472
|
+
return @root.taxa
|
473
|
+
end
|
474
|
+
|
475
|
+
# returns a 2D hash of pairwise distances on tree
|
476
|
+
def distanceMatrix
|
477
|
+
dMatrix = Hash.new
|
478
|
+
@root.taxa.each {|taxon1|
|
479
|
+
dMatrix[taxon1] = Hash.new
|
480
|
+
taxon1Node = @root.findNode(taxon1)
|
481
|
+
@root.taxa.each {|taxon2|
|
482
|
+
if (taxon1 == taxon2)
|
483
|
+
dMatrix[taxon1][taxon2] = 0.0
|
484
|
+
else
|
485
|
+
taxon2Node = @root.findNode(taxon2)
|
486
|
+
lca = taxon1Node.lca(taxon2Node)
|
487
|
+
dMatrix[taxon1][taxon2] = taxon1Node.distToAncestor(lca) +
|
488
|
+
taxon2Node.distToAncestor(lca)
|
489
|
+
end
|
490
|
+
}
|
491
|
+
}
|
492
|
+
return dMatrix
|
493
|
+
end
|
494
|
+
|
495
|
+
# returns lists of clades different between two trees
|
496
|
+
def compare(tree)
|
497
|
+
tree1 = self.dup.unroot
|
498
|
+
tree2 = tree.dup.unroot
|
499
|
+
|
500
|
+
diff1 = []
|
501
|
+
diff2 = []
|
502
|
+
if (tree1.taxa == tree2.taxa)
|
503
|
+
clades1 = tree1.clades
|
504
|
+
clades2 = tree2.clades
|
505
|
+
clades1.each {|clade|
|
506
|
+
if (!clades2.include?(clade))
|
507
|
+
diff1.push(clade)
|
508
|
+
end
|
509
|
+
}
|
510
|
+
clades2.each {|clade|
|
511
|
+
if (!clades1.include?(clade))
|
512
|
+
diff2.push(clade)
|
513
|
+
end
|
514
|
+
}
|
515
|
+
else
|
516
|
+
raise NewickParseError, "The trees have different taxa!"
|
517
|
+
end
|
518
|
+
return diff1, diff2
|
519
|
+
end
|
520
|
+
|
521
|
+
# return node with the given name
|
522
|
+
def findNode(name)
|
523
|
+
return @root.findNode(name)
|
524
|
+
end
|
525
|
+
|
526
|
+
# unroot the tree
|
527
|
+
def unroot
|
528
|
+
if (@root.children.size != 2)
|
529
|
+
return self # already unrooted
|
530
|
+
end
|
531
|
+
left, right = @root.children
|
532
|
+
left, right = right, left if (right.leaf?) # don't uproot leaf side
|
533
|
+
left.edgeLen += right.edgeLen
|
534
|
+
right.children.each {|child|
|
535
|
+
@root.addChild(child)
|
536
|
+
}
|
537
|
+
@root.removeChild(right)
|
538
|
+
return self
|
539
|
+
end
|
540
|
+
|
541
|
+
# root the tree on a given node
|
542
|
+
def reroot(node)
|
543
|
+
unroot
|
544
|
+
left = node
|
545
|
+
right = left.parent
|
546
|
+
right.removeChild(node)
|
547
|
+
right.reverseChildParent
|
548
|
+
if (left.edgeLen != 0)
|
549
|
+
right.edgeLen = left.edgeLen / 2.0
|
550
|
+
left.edgeLen = right.edgeLen
|
551
|
+
end
|
552
|
+
@root = NewickNode.new("", 0)
|
553
|
+
@root.addChild(left)
|
554
|
+
@root.addChild(right)
|
555
|
+
return self
|
556
|
+
end
|
557
|
+
|
558
|
+
# returns the two most distant leaves and their distance apart
|
559
|
+
def mostDistantLeaves
|
560
|
+
greatestDist = 0
|
561
|
+
dist = Hash.new
|
562
|
+
org1, org2 = nil, nil
|
563
|
+
@root.leaves.each {|node1|
|
564
|
+
@root.leaves.each {|node2|
|
565
|
+
dist[node1] = Hash.new if dist[node1].nil?
|
566
|
+
dist[node2] = Hash.new if dist[node2].nil?
|
567
|
+
next if (!dist[node1][node2].nil?)
|
568
|
+
lca = node1.lca(node2)
|
569
|
+
dist[node1][node2] = node1.distToAncestor(lca) +
|
570
|
+
node2.distToAncestor(lca)
|
571
|
+
dist[node2][node1] = dist[node1][node2]
|
572
|
+
if (dist[node1][node2] > greatestDist)
|
573
|
+
org1 = node1
|
574
|
+
org2 = node2
|
575
|
+
greatestDist = dist[node1][node2]
|
576
|
+
end
|
577
|
+
}
|
578
|
+
}
|
579
|
+
return org1, org2, greatestDist
|
580
|
+
end
|
581
|
+
|
582
|
+
# add EC numbers from alignment
|
583
|
+
def addECnums(alignFile)
|
584
|
+
ec = Hash.new
|
585
|
+
File.new(alignFile).each {|line|
|
586
|
+
if (line =~ /^>/)
|
587
|
+
definition = line.chomp[1..line.length]
|
588
|
+
name = definition.split(" ").first
|
589
|
+
if (definition =~ /\[EC:([0-9|\.]*)/)
|
590
|
+
ec[name] = name + "_" + $1
|
591
|
+
end
|
592
|
+
end
|
593
|
+
}
|
594
|
+
unAlias(ec)
|
595
|
+
end
|
596
|
+
|
597
|
+
# root the tree on midpoint distance
|
598
|
+
def midpointRoot
|
599
|
+
unroot
|
600
|
+
org1, org2, dist = mostDistantLeaves
|
601
|
+
midDist = dist / 2.0
|
602
|
+
return self if (midDist == 0)
|
603
|
+
if (org1.distToAncestor(@root) > org2.distToAncestor(@root))
|
604
|
+
node = org1
|
605
|
+
else
|
606
|
+
node = org2
|
607
|
+
end
|
608
|
+
distTraveled = 0
|
609
|
+
while(!node.nil?)
|
610
|
+
distTraveled += node.edgeLen
|
611
|
+
break if (distTraveled >= midDist)
|
612
|
+
node = node.parent
|
613
|
+
end
|
614
|
+
oldDist = node.edgeLen
|
615
|
+
left, right = node, node.parent
|
616
|
+
right.removeChild(node)
|
617
|
+
right.reverseChildParent
|
618
|
+
left.edgeLen = distTraveled - midDist
|
619
|
+
right.edgeLen = oldDist - left.edgeLen
|
620
|
+
@root = NewickNode.new("", 0)
|
621
|
+
@root.addChild(left)
|
622
|
+
@root.addChild(right)
|
623
|
+
return self
|
624
|
+
end
|
625
|
+
|
626
|
+
# returns array of arrays representing the tree clades
|
627
|
+
def clades(bootstrap = false)
|
628
|
+
clades = []
|
629
|
+
@root.descendants.each {|clade|
|
630
|
+
clades.push(clade.taxa(bootstrap)) if (!clade.children.empty?)
|
631
|
+
}
|
632
|
+
return clades
|
633
|
+
end
|
634
|
+
|
635
|
+
# add bootstrap values (given in clade arrays) to a tree
|
636
|
+
def addBootStrap(bootClades)
|
637
|
+
@root.descendants.each {|clade|
|
638
|
+
next if clade.leaf?
|
639
|
+
bootClades.each {|bClade|
|
640
|
+
boot, rest = bClade.first, bClade[1..bClade.size - 1]
|
641
|
+
if (rest == clade.taxa ) # same clade found
|
642
|
+
clade.name = boot
|
643
|
+
end
|
644
|
+
}
|
645
|
+
}
|
646
|
+
end
|
647
|
+
|
648
|
+
# return array of arrays of taxa representing relatives at each level
|
649
|
+
def relatives(taxon)
|
650
|
+
node = findNode(taxon)
|
651
|
+
if (node.nil?)
|
652
|
+
return nil
|
653
|
+
else
|
654
|
+
relatives = []
|
655
|
+
while(!node.parent.nil?)
|
656
|
+
relatives.push(node.parent.taxa - node.taxa)
|
657
|
+
node = node.parent
|
658
|
+
end
|
659
|
+
return relatives
|
660
|
+
end
|
661
|
+
end
|
662
|
+
|
663
|
+
|
664
|
+
# Fixes PHYLIP's mistake of using branch lengths and not node values
|
665
|
+
def fixPhylip
|
666
|
+
@root.descendants.each {|child|
|
667
|
+
br = child.edgeLen.to_i
|
668
|
+
child.edgeLen = 0
|
669
|
+
if (br > 0 && !child.leaf?)
|
670
|
+
child.name = br.to_s
|
671
|
+
end
|
672
|
+
}
|
673
|
+
end
|
674
|
+
|
675
|
+
|
676
|
+
# calculates leaf node positions (backwards from leaves, given spacing)
|
677
|
+
def calcPos(yUnit)
|
678
|
+
yPos = 0.25
|
679
|
+
@root.reorder
|
680
|
+
leaves = @root.leaves.sort {|x, y| x.nodesToNode(y) <=> y.nodesToNode(x)}
|
681
|
+
leaves.each {|leaf|
|
682
|
+
leaf.y = yPos
|
683
|
+
yPos += yUnit
|
684
|
+
}
|
685
|
+
nodes = @root.intNodes.sort{|x, y| y.nodesToAncestor(@root) <=>
|
686
|
+
x.nodesToAncestor(@root)}
|
687
|
+
nodes.each {|node|
|
688
|
+
node.calcYPos
|
689
|
+
}
|
690
|
+
@root.calcYPos
|
691
|
+
@root.calcXPos
|
692
|
+
nodes = @root.intNodes.sort{|x, y| x.nodesToAncestor(@root) <=>
|
693
|
+
y.nodesToAncestor(@root)}
|
694
|
+
nodes.each {|node|
|
695
|
+
@root.calcXPos # (forwards from root)
|
696
|
+
}
|
697
|
+
end
|
698
|
+
|
699
|
+
# function to generate gi link to ncbi for draw, below
|
700
|
+
def giLink(entry)
|
701
|
+
ncbiLink = "http://www.ncbi.nlm.nih.gov/entrez/"
|
702
|
+
protLink = "viewer.fcgi?db=protein&val="
|
703
|
+
if (entry =~ /^gi[\_]*([0-9]*)/ || entry =~ /(^[A-Z|0-9]*)\|/)
|
704
|
+
return ncbiLink + protLink + $1
|
705
|
+
else
|
706
|
+
return nil
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
# returns PDF representation of branching structure of tree
|
711
|
+
def draw(pdfFile, boot="width", linker = :giLink, labelName = false,
|
712
|
+
highlights = Hash.new, brackets = nil, rawNames = false)
|
713
|
+
pdf=FPDF.new('P', "cm")
|
714
|
+
pdf.SetTitle(pdfFile)
|
715
|
+
pdf.SetCreator("newickDraw")
|
716
|
+
pdf.SetAuthor(ENV["USER"]) if (!ENV["USER"].nil?)
|
717
|
+
pdf.AddPage
|
718
|
+
yUnit = nil
|
719
|
+
lineWidth = nil
|
720
|
+
fontSize = nil
|
721
|
+
bootScale = 0.6
|
722
|
+
if (taxa.size < 30)
|
723
|
+
fontSize = 10
|
724
|
+
yUnit = 0.5
|
725
|
+
lineWidth = 0.02
|
726
|
+
elsif (taxa.size < 60)
|
727
|
+
fontSize = 8
|
728
|
+
yUnit = 0.25
|
729
|
+
lineWidth = 0.01
|
730
|
+
elsif (taxa.size < 150)
|
731
|
+
fontSize = 8
|
732
|
+
yUnit = 0.197
|
733
|
+
lineWidth = 0.01
|
734
|
+
elsif (taxa.size < 300)
|
735
|
+
fontSize = 2
|
736
|
+
yUnit = 0.09
|
737
|
+
lineWidth = 0.005
|
738
|
+
elsif (taxa.size < 400)
|
739
|
+
fontSize = 2
|
740
|
+
yUnit = 0.055
|
741
|
+
lineWidth = 0.002
|
742
|
+
elsif (taxa.size < 800)
|
743
|
+
fontSize = 1
|
744
|
+
yUnit = 0.030
|
745
|
+
lineWidth = 0.0015
|
746
|
+
else
|
747
|
+
fontSize = 0.5
|
748
|
+
yUnit = 0.020
|
749
|
+
lineWidth = 0.0010
|
750
|
+
end
|
751
|
+
bootScale = 0.5 * fontSize
|
752
|
+
pdf.SetFont('Times','B', fontSize)
|
753
|
+
calcPos(yUnit) # calculate node pos before drawing
|
754
|
+
max = 0
|
755
|
+
@root.leaves.each {|leaf|
|
756
|
+
d = leaf.distToAncestor(@root)
|
757
|
+
max = d if (max < d)
|
758
|
+
}
|
759
|
+
xScale = 10.0/max
|
760
|
+
xOffSet = 0.25
|
761
|
+
pdf.SetLineWidth(lineWidth)
|
762
|
+
pdf.SetTextColor(0, 0, 0)
|
763
|
+
pdf.Line(0, @root.y, xOffSet, @root.y)
|
764
|
+
pdf.Line(xOffSet, @root.yMin, xOffSet, @root.yMax)
|
765
|
+
@root.descendants.each {|child|
|
766
|
+
if (!child.leaf?)
|
767
|
+
if (child.name.to_i > 75 && boot == "width") # good bootstrap
|
768
|
+
pdf.SetLineWidth(lineWidth * 5)
|
769
|
+
else
|
770
|
+
pdf.SetLineWidth(lineWidth)
|
771
|
+
end
|
772
|
+
bootX = xOffSet + child.x*xScale
|
773
|
+
bootY = ((child.yMin + child.yMax) / 2.0)
|
774
|
+
pdf.SetXY(bootX, bootY)
|
775
|
+
pdf.SetFont('Times','B', bootScale)
|
776
|
+
pdf.Write(0, child.name.to_s)
|
777
|
+
pdf.SetFont('Times','B', fontSize)
|
778
|
+
pdf.Line(xOffSet + child.x*xScale, child.yMin,
|
779
|
+
xOffSet + child.x*xScale, child.yMax)
|
780
|
+
else
|
781
|
+
if (child.parent.name.to_i > 75 && boot == "width") # good bootstrap
|
782
|
+
pdf.SetLineWidth(lineWidth * 5)
|
783
|
+
else
|
784
|
+
pdf.SetLineWidth(lineWidth)
|
785
|
+
end
|
786
|
+
pdf.SetXY(xOffSet + child.x*xScale, child.y)
|
787
|
+
efields = child.name.split("__")
|
788
|
+
entry, species = efields.first, efields.last
|
789
|
+
if (entry =~/\{([^\}]*)\}/)
|
790
|
+
species = $1
|
791
|
+
end
|
792
|
+
species = entry if species.nil? && !rawNames
|
793
|
+
species = child.name if rawNames
|
794
|
+
hl = false
|
795
|
+
highlights.keys.each{|highlight|
|
796
|
+
hl = highlights[highlight] if (entry.index(highlight))
|
797
|
+
}
|
798
|
+
if (pdfFile.index(entry)) # name of query taxon
|
799
|
+
pdf.SetTextColor(255,0, 0) # red
|
800
|
+
pdf.Write(0, entry)
|
801
|
+
pdf.SetTextColor(0, 0, 0) # black
|
802
|
+
elsif (linker && link = send(linker, entry))
|
803
|
+
pdf.SetTextColor(255,0, 0) if hl # red
|
804
|
+
pdf.Write(0, species, link)
|
805
|
+
pdf.SetTextColor(0, 0, 0) if hl # black
|
806
|
+
elsif (!species.nil?)
|
807
|
+
pdf.SetTextColor(hl[0],hl[1], hl[2]) if hl
|
808
|
+
pdf.Write(0, species)
|
809
|
+
pdf.SetTextColor(0, 0, 0) if hl # black
|
810
|
+
else
|
811
|
+
pdf.SetTextColor(hl[0],hl[1], hl[2]) if hl # red
|
812
|
+
pdf.Write(0, entry)
|
813
|
+
pdf.SetTextColor(0, 0, 0) if hl # black
|
814
|
+
end
|
815
|
+
end
|
816
|
+
pdf.Line(xOffSet + child.parent.x*xScale, child.y,
|
817
|
+
xOffSet + child.x*xScale, child.y)
|
818
|
+
}
|
819
|
+
if (labelName)
|
820
|
+
pdf.SetFont('Times','B', 24)
|
821
|
+
pdf.SetXY(0, pdf.GetY + 1)
|
822
|
+
pdf.Write(0, File.basename(pdfFile,".pdf"))
|
823
|
+
end
|
824
|
+
if (brackets)
|
825
|
+
brackets.each {|bracket|
|
826
|
+
x, y1, y2, label, r, p = bracket
|
827
|
+
next if label.nil?
|
828
|
+
pdf.SetLineWidth(lineWidth * 5)
|
829
|
+
pdf.SetFont('Times','B', fontSize*1.5)
|
830
|
+
pdf.Line(x, y1, x, y2)
|
831
|
+
pdf.Line(x, y1, x - 0.3, y1)
|
832
|
+
pdf.Line(x, y2, x - 0.3, y2)
|
833
|
+
pdf.SetXY(x, (y1+y2)/2)
|
834
|
+
pdf.Write(0, label)
|
835
|
+
if (r == "r")
|
836
|
+
pdf.SetTextColor(255, 0, 0)
|
837
|
+
pdf.SetXY(x + 1.8, -0.65+(y1+y2)/2)
|
838
|
+
pdf.SetFont('Times','B', fontSize*10)
|
839
|
+
pdf.Write(0, " .")
|
840
|
+
pdf.SetTextColor(0, 0, 0)
|
841
|
+
end
|
842
|
+
if (p == "p" || r == "p")
|
843
|
+
pdf.SetTextColor(255, 0, 255)
|
844
|
+
pdf.SetXY(x + 2.3, -0.65+(y1+y2)/2)
|
845
|
+
pdf.SetFont('Times','B', fontSize*10)
|
846
|
+
pdf.Write(0, " .")
|
847
|
+
pdf.SetTextColor(0, 0, 0)
|
848
|
+
end
|
849
|
+
}
|
850
|
+
end
|
851
|
+
pdf.SetLineWidth(lineWidth * 5)
|
852
|
+
pdf.Line(1, pdf.GetY + 1, 1 + 0.1*xScale, pdf.GetY + 1)
|
853
|
+
pdf.SetFont('Times','B', fontSize)
|
854
|
+
pdf.SetXY(1 + 0.1*xScale, pdf.GetY + 1)
|
855
|
+
pdf.Write(0, "0.1")
|
856
|
+
if (pdfFile =~/^--/)
|
857
|
+
return pdf.Output
|
858
|
+
else
|
859
|
+
pdf.Output(pdfFile)
|
860
|
+
end
|
861
|
+
end
|
862
|
+
end
|
863
|
+
|
864
|
+
|
865
|
+
|
data/test/tc_Newick.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require "Newick"
|
2
|
+
require "test/unit"
|
3
|
+
|
4
|
+
class TestNewickTree < Test::Unit::TestCase
|
5
|
+
def test_to_s
|
6
|
+
tree = NewickTree.new("(A:0.65,(B:0.1,C:0.2)90:0.5);")
|
7
|
+
assert_equal(tree.to_s(false, false), "(A,(B,C));")
|
8
|
+
assert_equal(tree.to_s(true, false), "(A:0.65,(B:0.1,C:0.2):0.5);")
|
9
|
+
assert_equal(tree.to_s, "(A:0.65,(B:0.1,C:0.2)90:0.5);")
|
10
|
+
end
|
11
|
+
def test_reorder
|
12
|
+
tree = NewickTree.new("(B,(A,D),C);")
|
13
|
+
assert_equal(tree.reorder.to_s, "((A,D),B,C);")
|
14
|
+
end
|
15
|
+
def test_alias
|
16
|
+
tree = NewickTree.new("((Apple,Pear),Grape);")
|
17
|
+
aliTree, ali = tree.alias
|
18
|
+
assert_equal(aliTree.to_s, "((SEQ0000001,SEQ0000002),SEQ0000003);")
|
19
|
+
assert_equal(ali, {"SEQ0000001" => "Apple", "SEQ0000002" => "Pear", "SEQ0000003" => "Grape"})
|
20
|
+
end
|
21
|
+
def test_unAlias
|
22
|
+
tree = NewickTree.new("(SEQ0000001, SEQ0000002, SEQ0000003);")
|
23
|
+
ali = {"SEQ0000001" => "Frog", "SEQ0000002" => "Whale", "SEQ0000003" => "Kumquat"}
|
24
|
+
assert_equal(tree.unAlias(ali).to_s, "(Frog,Whale,Kumquat);")
|
25
|
+
end
|
26
|
+
def test_taxa
|
27
|
+
tree = NewickTree.new("(A:0.65,(B:0.1,C:0.2)90:0.5);")
|
28
|
+
assert_equal(tree.taxa, ["A","B","C"])
|
29
|
+
end
|
30
|
+
end
|
metadata
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: newick-ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 23
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 0
|
10
|
+
version: 1.0.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Jonathan Badger
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-07-26 00:00:00 -07:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: fpdf
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 5
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 5
|
33
|
+
- 3
|
34
|
+
version: 1.5.3
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
description: newick-ruby provides routines for parsing newick-format phylogenetic trees.
|
38
|
+
email: jhbadger@gmail.com
|
39
|
+
executables:
|
40
|
+
- newickAlphabetize
|
41
|
+
- newickCompare
|
42
|
+
- newickDist
|
43
|
+
- newickDraw
|
44
|
+
- newickReorder
|
45
|
+
- newickReroot
|
46
|
+
- newickTaxa
|
47
|
+
extensions: []
|
48
|
+
|
49
|
+
extra_rdoc_files: []
|
50
|
+
|
51
|
+
files:
|
52
|
+
- example/jgi_19094_1366.m000227-Phatr2.tree
|
53
|
+
- lib/Newick.rb
|
54
|
+
- bin/newickAlphabetize
|
55
|
+
- bin/newickCompare
|
56
|
+
- bin/newickDist
|
57
|
+
- bin/newickDraw
|
58
|
+
- bin/newickReorder
|
59
|
+
- bin/newickReroot
|
60
|
+
- bin/newickTaxa
|
61
|
+
- README
|
62
|
+
- test/tc_Newick.rb
|
63
|
+
has_rdoc: true
|
64
|
+
homepage: http://github.com/jhbadger/Newick-ruby
|
65
|
+
licenses: []
|
66
|
+
|
67
|
+
post_install_message:
|
68
|
+
rdoc_options: []
|
69
|
+
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
83
|
+
requirements:
|
84
|
+
- - ">="
|
85
|
+
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
89
|
+
version: "0"
|
90
|
+
requirements: []
|
91
|
+
|
92
|
+
rubyforge_project:
|
93
|
+
rubygems_version: 1.3.7
|
94
|
+
signing_key:
|
95
|
+
specification_version: 3
|
96
|
+
summary: newick-ruby provides routines for parsing newick-format phylogenetic trees.
|
97
|
+
test_files:
|
98
|
+
- test/tc_Newick.rb
|