tree_clusters 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/lib/tree_clusters/version.rb +1 -1
- data/lib/tree_clusters.rb +117 -6
- data/test_files/small.mapping +8 -0
- data/test_files/small.tre +1 -0
- data/tree_clusters.gemspec +1 -0
- metadata +23 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 884a3b714acdf6b5b933c89ce0d6a82aaf9bcbca
|
4
|
+
data.tar.gz: 1e3573ead3b5ad82149cec6ccbdf8a8b94eb9939
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 207ebce001ff2d7aaf1bd4095c8dc3718057637b86edb7b0988e14d0da1b6ad28c267f907b0cfe4e8df908d08a463fd5dd8d200c1a2864a4e27e337fd001b553
|
7
|
+
data.tar.gz: aea3b7dab128fbf1d08e6cd0ac813cc415652d2586f8be80c696f8946ac447b0251b1bbbc61acbd2f85760ab7452ae65db781f98d1d30d1feb2e02c93727d1fe
|
data/.gitignore
CHANGED
data/lib/tree_clusters.rb
CHANGED
@@ -32,14 +32,93 @@ module TreeClusters
|
|
32
32
|
# @yieldparam clade [Clade] a clade of the tree
|
33
33
|
#
|
34
34
|
# @return [Enumerator<Clade>] enumerator of Clade objects
|
35
|
-
def all_clades tree
|
36
|
-
return enum_for(:all_clades, tree) unless block_given?
|
35
|
+
def all_clades tree, metadata=nil
|
36
|
+
return enum_for(:all_clades, tree, metadata) unless block_given?
|
37
37
|
|
38
38
|
tree.clade_nodes.reverse.each do |node|
|
39
|
-
yield Clade.new node, tree
|
39
|
+
yield Clade.new node, tree, metadata
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
43
|
+
def snazzy_clades tree, metadata
|
44
|
+
snazzy_clades = {}
|
45
|
+
|
46
|
+
clades = self.
|
47
|
+
all_clades(tree, metadata).
|
48
|
+
sort_by { |clade| clade.all_leaves.count }.
|
49
|
+
reverse
|
50
|
+
|
51
|
+
metadata.each do |md_cat, leaf2mdtag|
|
52
|
+
already_checked = Set.new
|
53
|
+
single_tag_clades = {}
|
54
|
+
|
55
|
+
clades.each do |clade|
|
56
|
+
assert clade.all_leaves.count > 1,
|
57
|
+
"A clade cannot also be a leaf"
|
58
|
+
|
59
|
+
unless clade.all_leaves.all? do |leaf|
|
60
|
+
already_checked.include? leaf
|
61
|
+
end
|
62
|
+
md_tags = clade.all_leaves.map do |leaf|
|
63
|
+
assert leaf2mdtag.has_key?(leaf),
|
64
|
+
"leaf #{leaf} is missing from leaf2mdtag ht"
|
65
|
+
|
66
|
+
leaf2mdtag[leaf]
|
67
|
+
end
|
68
|
+
|
69
|
+
# this clade is mono-phyletic w.r.t. this metadata category.
|
70
|
+
if md_tags.uniq.count == 1
|
71
|
+
clade.all_leaves.each do |leaf|
|
72
|
+
already_checked << leaf
|
73
|
+
end
|
74
|
+
|
75
|
+
assert !single_tag_clades.has_key?(clade),
|
76
|
+
"clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
|
77
|
+
|
78
|
+
single_tag_clades[clade] = md_tags.first
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
single_tag_clades.each do |clade, md_tag|
|
84
|
+
non_clade_leaves = tree.taxa - clade.all_leaves
|
85
|
+
|
86
|
+
non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
|
87
|
+
[leaf, leaf2mdtag[leaf]]
|
88
|
+
end.select { |ary| ary.last == md_tag }
|
89
|
+
|
90
|
+
if non_clade_leaves_with_this_md_tag.count.zero?
|
91
|
+
if snazzy_clades.has_key? clade
|
92
|
+
snazzy_clades[clade][md_cat] = md_tag
|
93
|
+
else
|
94
|
+
snazzy_clades[clade] = { md_cat => md_tag }
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
snazzy_clades
|
101
|
+
end
|
102
|
+
|
103
|
+
def read_mapping_file fname
|
104
|
+
md_cat_names = nil
|
105
|
+
metadata = TreeClusters::Attrs.new
|
106
|
+
|
107
|
+
File.open(fname, "rt").each_line.with_index do |line, idx|
|
108
|
+
leaf_name, *metadata_vals = line.chomp.split "\t"
|
109
|
+
|
110
|
+
if idx.zero?
|
111
|
+
md_cat_names = metadata_vals
|
112
|
+
else
|
113
|
+
metadata_vals.each_with_index do |val, val_idx|
|
114
|
+
metadata.add md_cat_names[val_idx], leaf_name, val
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
metadata
|
120
|
+
end
|
121
|
+
|
43
122
|
# A Hash table for genome/leaf/taxa attributes
|
44
123
|
class Attrs < Hash
|
45
124
|
|
@@ -113,11 +192,13 @@ module TreeClusters
|
|
113
192
|
:each_sibling_leaf_set,
|
114
193
|
:parent_leaves,
|
115
194
|
:non_parent_leaves,
|
116
|
-
:other_leaves
|
195
|
+
:other_leaves,
|
196
|
+
:single_tag_info,
|
197
|
+
:all_tags
|
117
198
|
|
118
199
|
# @param node [NewickNode] a NewickNode from a NewickTree
|
119
200
|
# @param tree [NewickTree] a NewickTree
|
120
|
-
def initialize node, tree
|
201
|
+
def initialize node, tree, metadata=nil
|
121
202
|
@name = node.name
|
122
203
|
@all_leaves = descendant_leaves node
|
123
204
|
|
@@ -148,6 +229,14 @@ module TreeClusters
|
|
148
229
|
|
149
230
|
@non_parent_leaves =
|
150
231
|
Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
|
232
|
+
|
233
|
+
if metadata
|
234
|
+
@metadata = metadata
|
235
|
+
@all_tags ||= get_all_tags
|
236
|
+
@single_tag_info ||= get_single_tag_info
|
237
|
+
else
|
238
|
+
@single_tag_info = nil
|
239
|
+
end
|
151
240
|
end
|
152
241
|
|
153
242
|
# Compares two Clades field by field.
|
@@ -163,7 +252,8 @@ module TreeClusters
|
|
163
252
|
self.all_sibling_leaves == clade.all_sibling_leaves &&
|
164
253
|
self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
|
165
254
|
self.parent_leaves == clade.parent_leaves &&
|
166
|
-
self.other_leaves == clade.other_leaves
|
255
|
+
self.other_leaves == clade.other_leaves &&
|
256
|
+
self.single_tag_info == clade.single_tag_info
|
167
257
|
)
|
168
258
|
end
|
169
259
|
|
@@ -174,6 +264,27 @@ module TreeClusters
|
|
174
264
|
|
175
265
|
private
|
176
266
|
|
267
|
+
def get_single_tag_info
|
268
|
+
@all_tags.map do |md_cat, set|
|
269
|
+
[md_cat, set.count == 1 ? set.to_a.first : nil]
|
270
|
+
end.to_h
|
271
|
+
end
|
272
|
+
|
273
|
+
def get_all_tags
|
274
|
+
# name2tag has leaf names => metadata tag and is an Attrs
|
275
|
+
@metadata.map do |md_cat, name2tag|
|
276
|
+
tag_info = self.all_leaves.map do |leaf|
|
277
|
+
assert name2tag.has_key?(leaf),
|
278
|
+
"leaf #{leaf} is not present in name2tag ht for " +
|
279
|
+
"md_cat #{md_cat}"
|
280
|
+
|
281
|
+
name2tag[leaf]
|
282
|
+
end
|
283
|
+
|
284
|
+
[md_cat, Set.new(tag_info)]
|
285
|
+
end.to_h
|
286
|
+
end
|
287
|
+
|
177
288
|
def descendant_leaves node
|
178
289
|
if node.leaf?
|
179
290
|
[node.name]
|
@@ -0,0 +1,8 @@
|
|
1
|
+
name coolness snazzyness sillyness jauntiness oddness
|
2
|
+
a-1 cool snazzy 1 jaunty quite odd
|
3
|
+
a-2 cool snazzy 7 notjaunty quite odd
|
4
|
+
b-1 notcool snazzy 3 jaunty not odd
|
5
|
+
b-2 notcool snazzy 4 notjaunty not odd
|
6
|
+
bb-1 notcool notsnazzy 5 notjaunty not odd
|
7
|
+
bbb-1 notcool notsnazzy 1 jaunty rather odd
|
8
|
+
bbb-2 notcool notsnazzy 7 jaunty rather odd
|
@@ -0,0 +1 @@
|
|
1
|
+
((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
|
data/tree_clusters.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tree_clusters
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
@@ -114,6 +114,26 @@ dependencies:
|
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 1.0.4
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: trollop
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '2.1'
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: 2.1.2
|
127
|
+
type: :runtime
|
128
|
+
prerelease: false
|
129
|
+
version_requirements: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - "~>"
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '2.1'
|
134
|
+
- - ">="
|
135
|
+
- !ruby/object:Gem::Version
|
136
|
+
version: 2.1.2
|
117
137
|
description: Snazzy code for working with each cluster in a tree.
|
118
138
|
email:
|
119
139
|
- moorer@udel.edu
|
@@ -135,6 +155,8 @@ files:
|
|
135
155
|
- lib/tree_clusters/version.rb
|
136
156
|
- test_files/non_bifurcating.aln
|
137
157
|
- test_files/non_bifurcating.tre
|
158
|
+
- test_files/small.mapping
|
159
|
+
- test_files/small.tre
|
138
160
|
- test_files/test.tre
|
139
161
|
- tree_clusters.gemspec
|
140
162
|
homepage: https://github.com/mooreryan/tree_clusters
|