tree_clusters 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/exe/snazzy_clades_key_cols +36 -3
- data/lib/tree_clusters.rb +28 -8
- data/lib/tree_clusters/version.rb +1 -1
- data/test_files/small.tre +1 -1
- data/test_files/test.tre +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d7f7982ea210cef7357e004ceb85d8b17389a32
|
4
|
+
data.tar.gz: a924d2bfd587ce03678c391f2a5f761848169726
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7b4540a485a4635194a170c74c1bae7cd4eeb5b58df3d3a934933a161ef918903461ba75924f3baf65724050b2d3d122ac953a0822f5218c7ac0f8b37db72405
|
7
|
+
data.tar.gz: fced0e673bcacf6dc113d6112bd5d02b9d5fcf5756ae50d1b77cb41b1c8f97b76a3fe2ef86f8f6c7ea8ddeee96a5b5aa06600cdef310842276a486b829763eae
|
data/.gitignore
CHANGED
data/exe/snazzy_clades_key_cols
CHANGED
@@ -19,9 +19,42 @@ opts = Trollop.options do
|
|
19
19
|
|
20
20
|
banner <<-EOS
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
|
23
|
+
Checking IDs
|
24
|
+
------------
|
25
|
+
|
26
|
+
IDs for the sequences must match between the three input files.
|
27
|
+
|
28
|
+
The tree file is allowed to have quoted taxa names, but the mapping
|
29
|
+
file and alignment file are not.
|
30
|
+
|
31
|
+
If your alignment file has spaces in the name, the ID part of the
|
32
|
+
header (i.e., the part up until the space) must match with the
|
33
|
+
sequence IDs in the tree and the mapping file.
|
34
|
+
|
35
|
+
Example: This would be okay.
|
36
|
+
|
37
|
+
tree file:
|
38
|
+
('genome_A', 'genome_B');
|
39
|
+
|
40
|
+
aln file:
|
41
|
+
>genome_A apple pie
|
42
|
+
AAAAA
|
43
|
+
>genome_B brown sugar
|
44
|
+
AATTA
|
45
|
+
|
46
|
+
mapping file:
|
47
|
+
name coolness
|
48
|
+
genome_A cool
|
49
|
+
genome_B notcool
|
50
|
+
|
51
|
+
|
52
|
+
Subtracting parent nodes
|
53
|
+
------------------------
|
54
|
+
|
55
|
+
If a clade's parent would be the root of the tree, no columns will
|
56
|
+
be subtracted when removing the parent columns as it would be the
|
57
|
+
entire alignment.
|
25
58
|
|
26
59
|
Options:
|
27
60
|
EOS
|
data/lib/tree_clusters.rb
CHANGED
@@ -22,6 +22,10 @@ class NewickTree
|
|
22
22
|
end
|
23
23
|
return clades
|
24
24
|
end
|
25
|
+
|
26
|
+
def unquoted_taxa
|
27
|
+
self.taxa.map { |str| str.tr %q{"'}, "" }
|
28
|
+
end
|
25
29
|
end
|
26
30
|
|
27
31
|
# Top level namespace of the Gem.
|
@@ -82,8 +86,10 @@ module TreeClusters
|
|
82
86
|
Set.new low_ent_cols
|
83
87
|
end
|
84
88
|
|
89
|
+
# @note If there are quoted names in the tree file, they are
|
90
|
+
# unquoted first.
|
85
91
|
def check_ids tree, mapping, aln
|
86
|
-
tree_ids = Set.new(NewickTree.fromFile(tree).
|
92
|
+
tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa)
|
87
93
|
|
88
94
|
mapping_ids = Set.new
|
89
95
|
File.open(mapping, "rt").each_line.with_index do |line, idx|
|
@@ -143,8 +149,11 @@ module TreeClusters
|
|
143
149
|
metadata.each do |md_cat, leaf2mdtag|
|
144
150
|
already_checked = Set.new
|
145
151
|
single_tag_clades = {}
|
152
|
+
p [md_cat, leaf2mdtag]
|
146
153
|
|
147
154
|
clades.each do |clade|
|
155
|
+
p [clade.name, clade.all_leaves]
|
156
|
+
|
148
157
|
assert clade.all_leaves.count > 1,
|
149
158
|
"A clade cannot also be a leaf"
|
150
159
|
|
@@ -173,7 +182,7 @@ module TreeClusters
|
|
173
182
|
end
|
174
183
|
|
175
184
|
single_tag_clades.each do |clade, md_tag|
|
176
|
-
non_clade_leaves = tree.
|
185
|
+
non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
|
177
186
|
|
178
187
|
non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
|
179
188
|
[leaf, leaf2mdtag[leaf]]
|
@@ -288,10 +297,15 @@ module TreeClusters
|
|
288
297
|
:single_tag_info,
|
289
298
|
:all_tags
|
290
299
|
|
300
|
+
# @note If a node name is quoted, then those quotes are removed
|
301
|
+
# first.
|
302
|
+
#
|
291
303
|
# @param node [NewickNode] a NewickNode from a NewickTree
|
292
304
|
# @param tree [NewickTree] a NewickTree
|
293
305
|
def initialize node, tree, metadata=nil
|
294
|
-
|
306
|
+
tree_taxa = tree.unquoted_taxa
|
307
|
+
|
308
|
+
@name = unquote node.name
|
295
309
|
@all_leaves = descendant_leaves node
|
296
310
|
|
297
311
|
if (children = node.children).count == 2
|
@@ -317,10 +331,10 @@ module TreeClusters
|
|
317
331
|
@parent_leaves = descendant_leaves parent
|
318
332
|
|
319
333
|
@other_leaves =
|
320
|
-
Object::Set.new(
|
334
|
+
Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
|
321
335
|
|
322
336
|
@non_parent_leaves =
|
323
|
-
Object::Set.new(
|
337
|
+
Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
|
324
338
|
|
325
339
|
if metadata
|
326
340
|
@metadata = metadata
|
@@ -345,7 +359,8 @@ module TreeClusters
|
|
345
359
|
self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
|
346
360
|
self.parent_leaves == clade.parent_leaves &&
|
347
361
|
self.other_leaves == clade.other_leaves &&
|
348
|
-
self.single_tag_info == clade.single_tag_info
|
362
|
+
self.single_tag_info == clade.single_tag_info &&
|
363
|
+
self.all_tags == clade.all_tags
|
349
364
|
)
|
350
365
|
end
|
351
366
|
|
@@ -379,14 +394,19 @@ module TreeClusters
|
|
379
394
|
|
380
395
|
def descendant_leaves node
|
381
396
|
if node.leaf?
|
382
|
-
[node.name]
|
397
|
+
[unquote(node.name)]
|
383
398
|
else
|
384
399
|
node.
|
385
400
|
descendants.
|
386
401
|
flatten.
|
387
402
|
uniq.
|
388
|
-
select { |node| node.leaf? }.
|
403
|
+
select { |node| node.leaf? }.
|
404
|
+
map { |node| unquote(node.name) }
|
389
405
|
end
|
390
406
|
end
|
407
|
+
|
408
|
+
def unquote str
|
409
|
+
str.tr %q{"'}, ""
|
410
|
+
end
|
391
411
|
end
|
392
412
|
end
|
data/test_files/small.tre
CHANGED
@@ -1 +1 @@
|
|
1
|
-
((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
|
1
|
+
(('a-1', 'a-2')cluster_A, (("b-1", b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
|
data/test_files/test.tre
CHANGED
@@ -1 +1 @@
|
|
1
|
-
((((((g1:1, g2:1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, g6:2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
|
1
|
+
((((((g1:1, 'g2':1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, "g6":2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
|