tree_clusters 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/lib/tree_clusters/version.rb +1 -1
- data/lib/tree_clusters.rb +117 -6
- data/test_files/small.mapping +8 -0
- data/test_files/small.tre +1 -0
- data/tree_clusters.gemspec +1 -0
- metadata +23 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 884a3b714acdf6b5b933c89ce0d6a82aaf9bcbca
|
4
|
+
data.tar.gz: 1e3573ead3b5ad82149cec6ccbdf8a8b94eb9939
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 207ebce001ff2d7aaf1bd4095c8dc3718057637b86edb7b0988e14d0da1b6ad28c267f907b0cfe4e8df908d08a463fd5dd8d200c1a2864a4e27e337fd001b553
|
7
|
+
data.tar.gz: aea3b7dab128fbf1d08e6cd0ac813cc415652d2586f8be80c696f8946ac447b0251b1bbbc61acbd2f85760ab7452ae65db781f98d1d30d1feb2e02c93727d1fe
|
data/.gitignore
CHANGED
data/lib/tree_clusters.rb
CHANGED
@@ -32,14 +32,93 @@ module TreeClusters
|
|
32
32
|
# @yieldparam clade [Clade] a clade of the tree
|
33
33
|
#
|
34
34
|
# @return [Enumerator<Clade>] enumerator of Clade objects
|
35
|
-
def all_clades tree
|
36
|
-
return enum_for(:all_clades, tree) unless block_given?
|
35
|
+
def all_clades tree, metadata=nil
|
36
|
+
return enum_for(:all_clades, tree, metadata) unless block_given?
|
37
37
|
|
38
38
|
tree.clade_nodes.reverse.each do |node|
|
39
|
-
yield Clade.new node, tree
|
39
|
+
yield Clade.new node, tree, metadata
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
+
def snazzy_clades tree, metadata
|
44
|
+
snazzy_clades = {}
|
45
|
+
|
46
|
+
clades = self.
|
47
|
+
all_clades(tree, metadata).
|
48
|
+
sort_by { |clade| clade.all_leaves.count }.
|
49
|
+
reverse
|
50
|
+
|
51
|
+
metadata.each do |md_cat, leaf2mdtag|
|
52
|
+
already_checked = Set.new
|
53
|
+
single_tag_clades = {}
|
54
|
+
|
55
|
+
clades.each do |clade|
|
56
|
+
assert clade.all_leaves.count > 1,
|
57
|
+
"A clade cannot also be a leaf"
|
58
|
+
|
59
|
+
unless clade.all_leaves.all? do |leaf|
|
60
|
+
already_checked.include? leaf
|
61
|
+
end
|
62
|
+
md_tags = clade.all_leaves.map do |leaf|
|
63
|
+
assert leaf2mdtag.has_key?(leaf),
|
64
|
+
"leaf #{leaf} is missing from leaf2mdtag ht"
|
65
|
+
|
66
|
+
leaf2mdtag[leaf]
|
67
|
+
end
|
68
|
+
|
69
|
+
# this clade is mono-phyletic w.r.t. this metadata category.
|
70
|
+
if md_tags.uniq.count == 1
|
71
|
+
clade.all_leaves.each do |leaf|
|
72
|
+
already_checked << leaf
|
73
|
+
end
|
74
|
+
|
75
|
+
assert !single_tag_clades.has_key?(clade),
|
76
|
+
"clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
|
77
|
+
|
78
|
+
single_tag_clades[clade] = md_tags.first
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
single_tag_clades.each do |clade, md_tag|
|
84
|
+
non_clade_leaves = tree.taxa - clade.all_leaves
|
85
|
+
|
86
|
+
non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
|
87
|
+
[leaf, leaf2mdtag[leaf]]
|
88
|
+
end.select { |ary| ary.last == md_tag }
|
89
|
+
|
90
|
+
if non_clade_leaves_with_this_md_tag.count.zero?
|
91
|
+
if snazzy_clades.has_key? clade
|
92
|
+
snazzy_clades[clade][md_cat] = md_tag
|
93
|
+
else
|
94
|
+
snazzy_clades[clade] = { md_cat => md_tag }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
snazzy_clades
|
101
|
+
end
|
102
|
+
|
103
|
+
def read_mapping_file fname
|
104
|
+
md_cat_names = nil
|
105
|
+
metadata = TreeClusters::Attrs.new
|
106
|
+
|
107
|
+
File.open(fname, "rt").each_line.with_index do |line, idx|
|
108
|
+
leaf_name, *metadata_vals = line.chomp.split "\t"
|
109
|
+
|
110
|
+
if idx.zero?
|
111
|
+
md_cat_names = metadata_vals
|
112
|
+
else
|
113
|
+
metadata_vals.each_with_index do |val, val_idx|
|
114
|
+
metadata.add md_cat_names[val_idx], leaf_name, val
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
metadata
|
120
|
+
end
|
121
|
+
|
43
122
|
# A Hash table for genome/leaf/taxa attributes
|
44
123
|
class Attrs < Hash
|
45
124
|
|
@@ -113,11 +192,13 @@ module TreeClusters
|
|
113
192
|
:each_sibling_leaf_set,
|
114
193
|
:parent_leaves,
|
115
194
|
:non_parent_leaves,
|
116
|
-
:other_leaves
|
195
|
+
:other_leaves,
|
196
|
+
:single_tag_info,
|
197
|
+
:all_tags
|
117
198
|
|
118
199
|
# @param node [NewickNode] a NewickNode from a NewickTree
|
119
200
|
# @param tree [NewickTree] a NewickTree
|
120
|
-
def initialize node, tree
|
201
|
+
def initialize node, tree, metadata=nil
|
121
202
|
@name = node.name
|
122
203
|
@all_leaves = descendant_leaves node
|
123
204
|
|
@@ -148,6 +229,14 @@ module TreeClusters
|
|
148
229
|
|
149
230
|
@non_parent_leaves =
|
150
231
|
Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
|
232
|
+
|
233
|
+
if metadata
|
234
|
+
@metadata = metadata
|
235
|
+
@all_tags ||= get_all_tags
|
236
|
+
@single_tag_info ||= get_single_tag_info
|
237
|
+
else
|
238
|
+
@single_tag_info = nil
|
239
|
+
end
|
151
240
|
end
|
152
241
|
|
153
242
|
# Compares two Clades field by field.
|
@@ -163,7 +252,8 @@ module TreeClusters
|
|
163
252
|
self.all_sibling_leaves == clade.all_sibling_leaves &&
|
164
253
|
self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
|
165
254
|
self.parent_leaves == clade.parent_leaves &&
|
166
|
-
self.other_leaves == clade.other_leaves
|
255
|
+
self.other_leaves == clade.other_leaves &&
|
256
|
+
self.single_tag_info == clade.single_tag_info
|
167
257
|
)
|
168
258
|
end
|
169
259
|
|
@@ -174,6 +264,27 @@ module TreeClusters
|
|
174
264
|
|
175
265
|
private
|
176
266
|
|
267
|
+
def get_single_tag_info
|
268
|
+
@all_tags.map do |md_cat, set|
|
269
|
+
[md_cat, set.count == 1 ? set.to_a.first : nil]
|
270
|
+
end.to_h
|
271
|
+
end
|
272
|
+
|
273
|
+
def get_all_tags
|
274
|
+
# name2tag has leaf names => metadata tag and is an Attrs
|
275
|
+
@metadata.map do |md_cat, name2tag|
|
276
|
+
tag_info = self.all_leaves.map do |leaf|
|
277
|
+
assert name2tag.has_key?(leaf),
|
278
|
+
"leaf #{leaf} is not present in name2tag ht for " +
|
279
|
+
"md_cat #{md_cat}"
|
280
|
+
|
281
|
+
name2tag[leaf]
|
282
|
+
end
|
283
|
+
|
284
|
+
[md_cat, Set.new(tag_info)]
|
285
|
+
end.to_h
|
286
|
+
end
|
287
|
+
|
177
288
|
def descendant_leaves node
|
178
289
|
if node.leaf?
|
179
290
|
[node.name]
|
@@ -0,0 +1,8 @@
|
|
1
|
+
name coolness snazzyness sillyness jauntiness oddness
|
2
|
+
a-1 cool snazzy 1 jaunty quite odd
|
3
|
+
a-2 cool snazzy 7 notjaunty quite odd
|
4
|
+
b-1 notcool snazzy 3 jaunty not odd
|
5
|
+
b-2 notcool snazzy 4 notjaunty not odd
|
6
|
+
bb-1 notcool notsnazzy 5 notjaunty not odd
|
7
|
+
bbb-1 notcool notsnazzy 1 jaunty rather odd
|
8
|
+
bbb-2 notcool notsnazzy 7 jaunty rather odd
|
@@ -0,0 +1 @@
|
|
1
|
+
((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
|
data/tree_clusters.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tree_clusters
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
@@ -114,6 +114,26 @@ dependencies:
|
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 1.0.4
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: trollop
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '2.1'
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: 2.1.2
|
127
|
+
type: :runtime
|
128
|
+
prerelease: false
|
129
|
+
version_requirements: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - "~>"
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '2.1'
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: 2.1.2
|
117
137
|
description: Snazzy code for working with each cluster in a tree.
|
118
138
|
email:
|
119
139
|
- moorer@udel.edu
|
@@ -135,6 +155,8 @@ files:
|
|
135
155
|
- lib/tree_clusters/version.rb
|
136
156
|
- test_files/non_bifurcating.aln
|
137
157
|
- test_files/non_bifurcating.tre
|
158
|
+
- test_files/small.mapping
|
159
|
+
- test_files/small.tre
|
138
160
|
- test_files/test.tre
|
139
161
|
- tree_clusters.gemspec
|
140
162
|
homepage: https://github.com/mooreryan/tree_clusters
|