tree_clusters 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/exe/snazzy_clades_key_cols +36 -3
- data/lib/tree_clusters.rb +28 -8
- data/lib/tree_clusters/version.rb +1 -1
- data/test_files/small.tre +1 -1
- data/test_files/test.tre +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d7f7982ea210cef7357e004ceb85d8b17389a32
|
4
|
+
data.tar.gz: a924d2bfd587ce03678c391f2a5f761848169726
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b4540a485a4635194a170c74c1bae7cd4eeb5b58df3d3a934933a161ef918903461ba75924f3baf65724050b2d3d122ac953a0822f5218c7ac0f8b37db72405
|
7
|
+
data.tar.gz: fced0e673bcacf6dc113d6112bd5d02b9d5fcf5756ae50d1b77cb41b1c8f97b76a3fe2ef86f8f6c7ea8ddeee96a5b5aa06600cdef310842276a486b829763eae
|
data/.gitignore
CHANGED
data/exe/snazzy_clades_key_cols
CHANGED
@@ -19,9 +19,42 @@ opts = Trollop.options do
|
|
19
19
|
|
20
20
|
banner <<-EOS
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
|
23
|
+
Checking IDs
|
24
|
+
------------
|
25
|
+
|
26
|
+
IDs for the sequences must match between the three input files.
|
27
|
+
|
28
|
+
The tree file is allowed to have quoted taxa names, but the mapping
|
29
|
+
file and alignment file are not.
|
30
|
+
|
31
|
+
If your alignment file has spaces in the name, the ID part of the
|
32
|
+
header (i.e., the part up until the space) must match with the
|
33
|
+
sequence IDs in the tree and the mapping file.
|
34
|
+
|
35
|
+
Example: This would be okay.
|
36
|
+
|
37
|
+
tree file:
|
38
|
+
('genome_A', 'genome_B');
|
39
|
+
|
40
|
+
aln file:
|
41
|
+
>genome_A apple pie
|
42
|
+
AAAAA
|
43
|
+
>genome_B brown sugar
|
44
|
+
AATTA
|
45
|
+
|
46
|
+
mapping file:
|
47
|
+
name coolness
|
48
|
+
genome_A cool
|
49
|
+
genome_B notcool
|
50
|
+
|
51
|
+
|
52
|
+
Subtracting parent nodes
|
53
|
+
------------------------
|
54
|
+
|
55
|
+
If a clade's parent would be the root of the tree, no columns will
|
56
|
+
be subtracted when removing the parent columns as it would be the
|
57
|
+
entire alignment.
|
25
58
|
|
26
59
|
Options:
|
27
60
|
EOS
|
data/lib/tree_clusters.rb
CHANGED
@@ -22,6 +22,10 @@ class NewickTree
|
|
22
22
|
end
|
23
23
|
return clades
|
24
24
|
end
|
25
|
+
|
26
|
+
def unquoted_taxa
|
27
|
+
self.taxa.map { |str| str.tr %q{"'}, "" }
|
28
|
+
end
|
25
29
|
end
|
26
30
|
|
27
31
|
# Top level namespace of the Gem.
|
@@ -82,8 +86,10 @@ module TreeClusters
|
|
82
86
|
Set.new low_ent_cols
|
83
87
|
end
|
84
88
|
|
89
|
+
# @note If there are quoted names in the tree file, they are
|
90
|
+
# unquoted first.
|
85
91
|
def check_ids tree, mapping, aln
|
86
|
-
tree_ids = Set.new(NewickTree.fromFile(tree).
|
92
|
+
tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa)
|
87
93
|
|
88
94
|
mapping_ids = Set.new
|
89
95
|
File.open(mapping, "rt").each_line.with_index do |line, idx|
|
@@ -143,8 +149,11 @@ module TreeClusters
|
|
143
149
|
metadata.each do |md_cat, leaf2mdtag|
|
144
150
|
already_checked = Set.new
|
145
151
|
single_tag_clades = {}
|
152
|
+
p [md_cat, leaf2mdtag]
|
146
153
|
|
147
154
|
clades.each do |clade|
|
155
|
+
p [clade.name, clade.all_leaves]
|
156
|
+
|
148
157
|
assert clade.all_leaves.count > 1,
|
149
158
|
"A clade cannot also be a leaf"
|
150
159
|
|
@@ -173,7 +182,7 @@ module TreeClusters
|
|
173
182
|
end
|
174
183
|
|
175
184
|
single_tag_clades.each do |clade, md_tag|
|
176
|
-
non_clade_leaves = tree.
|
185
|
+
non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
|
177
186
|
|
178
187
|
non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
|
179
188
|
[leaf, leaf2mdtag[leaf]]
|
@@ -288,10 +297,15 @@ module TreeClusters
|
|
288
297
|
:single_tag_info,
|
289
298
|
:all_tags
|
290
299
|
|
300
|
+
# @note If a node name is quoted, then those quotes are removed
|
301
|
+
# first.
|
302
|
+
#
|
291
303
|
# @param node [NewickNode] a NewickNode from a NewickTree
|
292
304
|
# @param tree [NewickTree] a NewickTree
|
293
305
|
def initialize node, tree, metadata=nil
|
294
|
-
|
306
|
+
tree_taxa = tree.unquoted_taxa
|
307
|
+
|
308
|
+
@name = unquote node.name
|
295
309
|
@all_leaves = descendant_leaves node
|
296
310
|
|
297
311
|
if (children = node.children).count == 2
|
@@ -317,10 +331,10 @@ module TreeClusters
|
|
317
331
|
@parent_leaves = descendant_leaves parent
|
318
332
|
|
319
333
|
@other_leaves =
|
320
|
-
Object::Set.new(
|
334
|
+
Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
|
321
335
|
|
322
336
|
@non_parent_leaves =
|
323
|
-
Object::Set.new(
|
337
|
+
Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
|
324
338
|
|
325
339
|
if metadata
|
326
340
|
@metadata = metadata
|
@@ -345,7 +359,8 @@ module TreeClusters
|
|
345
359
|
self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
|
346
360
|
self.parent_leaves == clade.parent_leaves &&
|
347
361
|
self.other_leaves == clade.other_leaves &&
|
348
|
-
self.single_tag_info == clade.single_tag_info
|
362
|
+
self.single_tag_info == clade.single_tag_info &&
|
363
|
+
self.all_tags == clade.all_tags
|
349
364
|
)
|
350
365
|
end
|
351
366
|
|
@@ -379,14 +394,19 @@ module TreeClusters
|
|
379
394
|
|
380
395
|
def descendant_leaves node
|
381
396
|
if node.leaf?
|
382
|
-
[node.name]
|
397
|
+
[unquote(node.name)]
|
383
398
|
else
|
384
399
|
node.
|
385
400
|
descendants.
|
386
401
|
flatten.
|
387
402
|
uniq.
|
388
|
-
select { |node| node.leaf? }.
|
403
|
+
select { |node| node.leaf? }.
|
404
|
+
map { |node| unquote(node.name) }
|
389
405
|
end
|
390
406
|
end
|
407
|
+
|
408
|
+
def unquote str
|
409
|
+
str.tr %q{"'}, ""
|
410
|
+
end
|
391
411
|
end
|
392
412
|
end
|
data/test_files/small.tre
CHANGED
@@ -1 +1 @@
|
|
1
|
-
((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
|
1
|
+
(('a-1', 'a-2')cluster_A, (("b-1", b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
|
data/test_files/test.tre
CHANGED
@@ -1 +1 @@
|
|
1
|
-
((((((g1:1, g2:1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, g6:2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
|
1
|
+
((((((g1:1, 'g2':1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, "g6":2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
|