cluster-affinity 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Sanket Wagle
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.2
2
+ Name: cluster_affinity
3
+ Version: 0.0.6
4
+ Summary: A tool to calculate the cluster affinity distance between two trees
5
+ Author-email: Sanket Wagle <swagle@iastate.edu>
6
+ Project-URL: Homepage, https://github.com/swagle8987/cluster_affinity
7
+ Project-URL: Issues, https://github.com/swagle8987/cluster_affinity/issues
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: dendropy
11
+ Requires-Dist: numpy
12
+ Requires-Dist: pytest
13
+
14
+ The Asymmetric Cluster Affinity cost is a phylogenetic cost based on calculating the symmetric difference between the cluster representations of trees. Currently the CLI tool supports calculating the cluster affinity distance from the source tree to the target tree.
15
+
16
+
17
+ ### Installation
18
+ Cluster Affinity is available in PyPi and can be installed as pip install cluster_affinity. Note that the package is built for Python 3.10 or higher. Cluster Affinity depends on dendropy, numpy and pytest.
19
+
20
+
21
+ ### Tutorial
22
+ ---
23
+ Currently the CLI tool supports comparing two trees and outputting the cluster affinity cost. The CLI command for the same is
24
+ ``
25
+ cluster_affinity t1 t2
26
+ ``
27
+ where t1 and t2 are paths to newick representations of the trees.
@@ -0,0 +1,14 @@
1
+ The Asymmetric Cluster Affinity cost is a phylogenetic cost based on calculating the symmetric difference between the cluster representations of trees. Currently the CLI tool supports calculating the cluster affinity distance from the source tree to the target tree.
2
+
3
+
4
+ ### Installation
5
+ Cluster Affinity is available in PyPi and can be installed as pip install cluster_affinity. Note that the package is built for Python 3.10 or higher. Cluster Affinity depends on dendropy, numpy and pytest.
6
+
7
+
8
+ ### Tutorial
9
+ ---
10
+ Currently the CLI tool supports comparing two trees and outputting the cluster affinity cost. The CLI command for the same is
11
+ ``
12
+ cluster_affinity t1 t2
13
+ ``
14
+ where t1 and t2 are paths to newick representations of the trees.
@@ -0,0 +1,20 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ dependencies = ["dendropy","numpy","pytest"]
7
+ name="cluster_affinity"
8
+ version="0.0.6"
9
+ authors = [
10
+ {name="Sanket Wagle", email="swagle@iastate.edu"}
11
+ ]
12
+ description="A tool to calculate the cluster affinity distance between two trees"
13
+ readme="README.md"
14
+
15
+ [project.scripts]
16
+ cluster_affinity = "main:cluster_affinity_script"
17
+
18
+ [project.urls]
19
+ Homepage = "https://github.com/swagle8987/cluster_affinity"
20
+ Issues = "https://github.com/swagle8987/cluster_affinity/issues"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env python
2
+
3
+ from setuptools import setup
4
+
5
+ if __name__ == "__main__":
6
+ setup()
@@ -0,0 +1,2 @@
1
+ from .cluster_computation import rooted_cluster_affinity
2
+ from .main import cluster_affinity_script
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.2
2
+ Name: cluster_affinity
3
+ Version: 0.0.6
4
+ Summary: A tool to calculate the cluster affinity distance between two trees
5
+ Author-email: Sanket Wagle <swagle@iastate.edu>
6
+ Project-URL: Homepage, https://github.com/swagle8987/cluster_affinity
7
+ Project-URL: Issues, https://github.com/swagle8987/cluster_affinity/issues
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: dendropy
11
+ Requires-Dist: numpy
12
+ Requires-Dist: pytest
13
+
14
+ The Asymmetric Cluster Affinity cost is a phylogenetic cost based on calculating the symmetric difference between the cluster representations of trees. Currently the CLI tool supports calculating the cluster affinity distance from the source tree to the target tree.
15
+
16
+
17
+ ### Installation
18
+ Cluster Affinity is available in PyPi and can be installed as pip install cluster_affinity. Note that the package is built for Python 3.10 or higher. Cluster Affinity depends on dendropy, numpy and pytest.
19
+
20
+
21
+ ### Tutorial
22
+ ---
23
+ Currently the CLI tool supports comparing two trees and outputting the cluster affinity cost. The CLI command for the same is
24
+ ``
25
+ cluster_affinity t1 t2
26
+ ``
27
+ where t1 and t2 are paths to newick representations of the trees.
@@ -0,0 +1,20 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ src/__init__.py
6
+ src/main.py
7
+ src/cluster_affinity.egg-info/PKG-INFO
8
+ src/cluster_affinity.egg-info/SOURCES.txt
9
+ src/cluster_affinity.egg-info/dependency_links.txt
10
+ src/cluster_affinity.egg-info/entry_points.txt
11
+ src/cluster_affinity.egg-info/requires.txt
12
+ src/cluster_affinity.egg-info/top_level.txt
13
+ src/cluster_computation/__init__.py
14
+ src/cluster_computation/cluster_affinity.py
15
+ src/cluster_computation/extendedtree.py
16
+ src/cluster_computation/heavy_path.py
17
+ src/cluster_computation/tau.py
18
+ src/cluster_computation/try.py
19
+ src/test/__init__.py
20
+ src/test/test_clustertree.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cluster_affinity = main:cluster_affinity_script
@@ -0,0 +1,3 @@
1
+ dendropy
2
+ numpy
3
+ pytest
@@ -0,0 +1,4 @@
1
+ __init__
2
+ cluster_computation
3
+ main
4
+ test
@@ -0,0 +1 @@
1
+ from .cluster_affinity import rooted_cluster_affinity
@@ -0,0 +1,53 @@
1
+ import math
2
+ import heapq
3
+ import numpy as np
4
+
5
+ '''
6
+ cluster_affinity: Tree -> Tree -> int
7
+ '''
8
+ def rooted_cluster_affinity(t1,t2):
9
+ t1_cmap = convert_tree_to_cmap(t1)
10
+ tree_dist = 0
11
+ for i in t1_cmap.values():
12
+ tree_dist += cluster_tree_dist(i,t2)
13
+ return tree_dist
14
+
15
+ '''
16
+ cluster_tree_dist: Cluster -> Tree -> Int
17
+ '''
18
+ def cluster_tree_dist(c,t2):
19
+ mindist = math.inf
20
+ intersection_lookup = dict()
21
+ size_lookup = dict()
22
+ for i in t2.postorder_node_iter():
23
+ intersection = 0
24
+ size = 0
25
+ if i.is_leaf():
26
+ size = 1
27
+ if i.taxon.label in c:
28
+ intersection = 1
29
+ else:
30
+ intersection = 0
31
+ size_lookup[i] = 1
32
+ else:
33
+ for ch in i.child_node_iter():
34
+ intersection += intersection_lookup[ch]
35
+ size += size_lookup[ch]
36
+ intersection_lookup[i] = intersection
37
+ size_lookup[i] = size
38
+ newdist = len(c) + size - 2*intersection
39
+ if mindist > newdist:
40
+ mindist = newdist
41
+ return mindist
42
+
43
+ def convert_tree_to_cmap(t):
44
+ cluster_map = dict()
45
+ for i in t.postorder_node_iter():
46
+ if i.is_leaf():
47
+ c = {i.taxon.label}
48
+ else:
49
+ c = set()
50
+ for ch in i.child_node_iter():
51
+ c = c | cluster_map[ch]
52
+ cluster_map[i] = c
53
+ return cluster_map
@@ -0,0 +1,46 @@
1
+ import dendropy
2
+
3
+ class ExtendedTree(dendropy.Tree):
4
+ def __init__(self,*args,**kwargs) -> None:
5
+ super().__init__(*args,**kwargs)
6
+ self.taxon_mapping = dict()
7
+ self.compute_sizes()
8
+
9
+ @classmethod
10
+ def node_factory(cls, **kwargs):
11
+ return NewNode(**kwargs)
12
+
13
+ def compute_sizes(self):
14
+ ind = 0
15
+ def recursive_compute(node):
16
+ nonlocal ind
17
+ if node.is_leaf() and node.parent_node: ## Because the seed node is also a leaf??
18
+ self.taxon_mapping[node.taxon.label] = node
19
+ node.size = 1
20
+ else:
21
+ node.size = sum([recursive_compute(i) for i in node.child_nodes()])
22
+ node.index = ind
23
+ ind += 1
24
+ return node.size
25
+ recursive_compute(self.seed_node)
26
+
27
+
28
+ def get_leaf_from_taxonlabel(self,l):
29
+ return self.taxon_mapping[l]
30
+
31
+
32
+
33
+ class NewNode(dendropy.Node):
34
+ def __init__(self,*args,**kwargs) -> None:
35
+ super().__init__(*args,**kwargs)
36
+ self.size = -1
37
+ self.index = -1
38
+ self.heavy = False
39
+
40
+ def is_heavy(self):
41
+ if not self._parent_node:
42
+ return True
43
+ elif self.parent_node.size <= 2*self.size:
44
+ return True
45
+ else:
46
+ return False
@@ -0,0 +1,120 @@
1
+ from extendedtree import ExtendedTree, NewNode
2
+ from math import floor
3
+
4
+ class IntervalNode:
5
+
6
+ def __init__(self, a, b) -> None:
7
+ self.start = a
8
+ self.end = b
9
+ self.left_child = None
10
+ self.right_child = None
11
+ self.parent = None
12
+
13
+ def set_left_child(self,lnode):
14
+ self.left_child = lnode
15
+ lnode.set_parent(self)
16
+
17
+ def set_right_child(self,rnode):
18
+ self.right_child = rnode
19
+ rnode.set_parent(self)
20
+
21
+ def set_parent(self, parent):
22
+ self.parent = parent
23
+
24
+ def get_interval(self):
25
+ return (self.start,self.end)
26
+
27
+ class PathSearchTree:
28
+ def __init__(self,path):
29
+ self.nodes = set(path)
30
+ self.path = path
31
+ self.root = IntervalNode(0,len(path)-1)
32
+ self.D = dict()
33
+ self.minval = dict()
34
+ self.maxval = dict()
35
+ self.interval_lookup = {(self.root.start,self.root.end):self.root}
36
+ nnodes = [self.root]
37
+ while nnodes:
38
+ n = nnodes.pop()
39
+ self.minval[n.start,n.end] = self.path[n.end].size
40
+ self.maxval[n.start,n.end] = self.path[n.start].size
41
+ if n.start != n.end:
42
+ self.D[n.start,n.end] = 0
43
+ l = n.end - n.start
44
+ lnode = IntervalNode(n.start,n.start + floor(l/2))
45
+ rnode = IntervalNode(n.start+floor(l/2)+1,n.end)
46
+ n.set_left_child(lnode)
47
+ n.set_right_child(rnode)
48
+ self.interval_lookup[(lnode.start,lnode.end)] = lnode
49
+ self.interval_lookup[(rnode.start,rnode.end)] = rnode
50
+ nnodes.extend([lnode,rnode])
51
+ else:
52
+ self.D[n.start,n.end] = self.path[n.start].size
53
+
54
+
55
+ def update_path(self,l,d):
56
+ x = self.path.index(l)
57
+ self.D[x,x] = self.D[x,x] + d
58
+ self.minval[x,x] = self.minval[x,x] + d
59
+ self.maxval[x,x] = self.maxval[x,x] + d
60
+ a,b = x,x
61
+ while self.root.start != a and self.root.end != b and a and b:
62
+ a_p,b_p = self.get_parent_interval((a,b)).get_interval()
63
+ a_s,b_s = self.get_sibling_interval((a,b)).get_interval()
64
+ if b == b_p:
65
+ self.D[a_s,b_s] = self.D[a_s,b_s] + d
66
+ self.minval[a_s,b_s] = self.minval[a_s,b_s] + d
67
+ self.maxval[a_s,b_s] = self.maxval[a_s,b_s] + d
68
+ self.minval[a_p,b_p] = min(self.minval[a_s,b_s],self.minval[a,b])+self.D[a_p,b_p]
69
+ self.maxval[a_p,b_p] = max(self.maxval[a_s,b_s],self.maxval[a,b])+self.D[a_p,b_p]
70
+ a,b = a_p,b_p
71
+
72
+ def get_parent_interval(self,interval):
73
+ return self.interval_lookup[interval].parent
74
+
75
+ def get_sibling_interval(self, interval):
76
+ n = self.interval_lookup[interval]
77
+ if n.parent == None:
78
+ ValueError("Root node has no parent")
79
+ elif n.start == n.parent.start:
80
+ return n.parent.right_child
81
+ else:
82
+ return n.parent.left_child
83
+
84
+ def contains(self, x):
85
+ if x in self.nodes:
86
+ return True
87
+ else:
88
+ return False
89
+
90
+ def __str__(self) -> str:
91
+ return ",".join([str(i) for i in self.path])
92
+
93
+
94
+ class HeavyPathDecomposition:
95
+ def __init__(self,tree: ExtendedTree):
96
+ self.paths = []
97
+ self.tree = tree
98
+ visited = set()
99
+ for l in tree.leaf_node_iter():
100
+ path = []
101
+ next_node = l
102
+ while next_node not in visited:
103
+ path.append(next_node)
104
+ visited.add(next_node)
105
+ if next_node.parent_node and next_node.is_heavy():
106
+ next_node = next_node._parent_node
107
+ else:
108
+ break
109
+ self.paths.append(PathSearchTree(path[::-1]))
110
+
111
+
112
+ def get_path(self,x):
113
+ for i in self.paths:
114
+ if i.contains(x):
115
+ return i
116
+
117
+ def __str__(self) -> str:
118
+ return "\n".join([str(i) for i in self.paths])
119
+
120
+
@@ -0,0 +1 @@
1
+ import dendropy
@@ -0,0 +1,9 @@
1
+ from heavy_path import *
2
+ import dendropy
3
+ from extendedtree import ExtendedTree
4
+ from cluster_affinity import rooted_cluster_affinity
5
+
6
+ t = ExtendedTree.get(data="((A,B),((C,D),(F,E)));", schema="newick",rooting="default-rooted")
7
+ t2 = ExtendedTree.get(data="((A,D),((C,B),(F,E)));", schema="newick",rooting="default-rooted")
8
+ print(rooted_cluster_affinity(t,t))
9
+ print(rooted_cluster_affinity(t,t2)) ## d((A,B)) + d((C,D)) + d((C,D,E,F)) = 1 + 1 + 2 = 4
@@ -0,0 +1,28 @@
1
+ import argparse
2
+ from cluster_computation import rooted_cluster_affinity
3
+ import dendropy
4
+
5
+
6
+ def cluster_affinity_script():
7
+
8
+ parser = argparse.ArgumentParser(
9
+ prog='Cluster Affinity',
10
+ description='Calculates the Asymmetric Cluster Affinity cost from t1 to t2',
11
+ )
12
+
13
+ parser.add_argument('t1', help='The source tree from which the cost is to be calculated')
14
+ parser.add_argument('t2', help='The target tree to which is to be calculated')
15
+
16
+
17
+ args = parser.parse_args()
18
+ tns = dendropy.TaxonNamespace(label="taxa")
19
+ t1 = dendropy.Tree.get(path=args.t1,taxon_namespace=tns,schema="newick",rooting="default-rooted")
20
+ t2 = dendropy.Tree.get(path=args.t2,taxon_namespace=tns,schema="newick",rooting="default-rooted")
21
+
22
+ if len(tns)>len(t1.poll_taxa()) or len(tns)>len(t2.poll_taxa()):
23
+ raise RuntimeWarning("The trees do not have the same taxon set")
24
+
25
+ print(rooted_cluster_affinity(t1,t2))
26
+
27
+ if __name__=="__main__":
28
+ cluster_affinity_script()
File without changes
@@ -0,0 +1,31 @@
1
+ import unittest
2
+ from ..cluster_computation import *
3
+ from dendropy import Tree,TaxonNamespace
4
+ from dendropy.simulate import treesim
5
+ import math
6
+ import numpy as np
7
+
8
+ class TestClusterComputation:
9
+ t1 = Tree.get(data="((A,B),(C,D));",schema="newick",rooting="default-rooted")
10
+ t2 = Tree.get(data="((A,C),(B,D));",schema="newick",rooting="default-rooted")
11
+
12
+ def test_cluster_affinity_zero(self):
13
+ dist = cluster_affinity.rooted_cluster_affinity(self.t1,self.t1)
14
+ assert dist == 0
15
+
16
+ def test_cluster_affinity(self):
17
+ dist = cluster_affinity.rooted_cluster_affinity(self.t1,self.t2)
18
+ assert dist == 2
19
+
20
+ def test_cluster_affinity_tau(self):
21
+ ntax = 100
22
+ taxon_ns = TaxonNamespace(["l{}".format(i) for i in range(ntax)])
23
+ for i in range(1000):
24
+ t1 = treesim.birth_death_tree(birth_rate=1.0,death_rate=0,num_extant_tips=len(taxon_ns),taxon_namespace=taxon_ns)
25
+ t2 = treesim.birth_death_tree(birth_rate=1.0,death_rate=0,num_extant_tips=len(taxon_ns),taxon_namespace=taxon_ns)
26
+ dist = cluster_affinity.rooted_cluster_affinity(t1,t2)
27
+ assert dist >= 0,"{} {} {} {}".format(t1.as_string(schema="newick"),
28
+ t2.as_string(schema="newick"),
29
+ (np.min(cluster_affinity.rooted_cluster_affinity_matrix(t1,t2),axis=0)),
30
+ cluster_affinity.rooted_cluster_affinity(t1,t2))
31
+ assert dist <= math.ceil(ntax*ntax - 2*ntax)/4