arg-dashboard 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arg_dashboard/__init__.py +934 -0
- arg_dashboard/arg.py +1019 -0
- arg_dashboard/arg_layout_force.py +365 -0
- arg_dashboard/arg_layout_json.py +269 -0
- arg_dashboard/arg_layout_mindist.py +412 -0
- arg_dashboard/assets/dashboard.css +139 -0
- arg_dashboard/assets/images/arg.png +0 -0
- arg_dashboard/assets/images/placeholder286x180.png +0 -0
- arg_dashboard/assets/tabs.css +34 -0
- arg_dashboard/index.py +6 -0
- arg_dashboard-0.1.19.dist-info/METADATA +88 -0
- arg_dashboard-0.1.19.dist-info/RECORD +16 -0
- arg_dashboard-0.1.19.dist-info/WHEEL +5 -0
- arg_dashboard-0.1.19.dist-info/entry_points.txt +2 -0
- arg_dashboard-0.1.19.dist-info/licenses/LICENSE +674 -0
- arg_dashboard-0.1.19.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARG Layout - Force-Directed (Unconstrained)
|
|
3
|
+
============================================
|
|
4
|
+
Parents are NOT required to be between their children's x-coordinates.
|
|
5
|
+
Uses edge repulsion to spread the graph while maintaining structure.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
import json
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def compute_arg_xpos(arg_data, n_iterations=100, edge_repulsion=0.02,
|
|
14
|
+
node_repulsion=0.02, child_attraction=0.15,
|
|
15
|
+
parent_attraction=0.05):
|
|
16
|
+
"""
|
|
17
|
+
Compute x-positions for an ARG using force-directed layout.
|
|
18
|
+
|
|
19
|
+
Parents can be positioned outside their children's x-range if
|
|
20
|
+
edge repulsion forces push them there.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
arg_data : dict
|
|
25
|
+
ARG data with 'Leaf', 'Coalescent', 'Recombination', and 'Lineage' keys
|
|
26
|
+
n_iterations : int
|
|
27
|
+
Number of force-directed iterations
|
|
28
|
+
edge_repulsion : float
|
|
29
|
+
Strength of edge-edge repulsion (spreads edges apart)
|
|
30
|
+
node_repulsion : float
|
|
31
|
+
Strength of node-node repulsion within layers
|
|
32
|
+
child_attraction : float
|
|
33
|
+
Strength of attraction toward children centroid
|
|
34
|
+
parent_attraction : float
|
|
35
|
+
Strength of attraction toward parents (for recombination nodes)
|
|
36
|
+
|
|
37
|
+
Returns
|
|
38
|
+
-------
|
|
39
|
+
dict
|
|
40
|
+
The input arg_data with updated 'xpos' values for all nodes
|
|
41
|
+
"""
|
|
42
|
+
# Extract all nodes and heights
|
|
43
|
+
all_nodes = []
|
|
44
|
+
heights = {}
|
|
45
|
+
|
|
46
|
+
for leaf in arg_data['Leaf']:
|
|
47
|
+
nid = leaf['nodeid']
|
|
48
|
+
all_nodes.append(nid)
|
|
49
|
+
heights[nid] = leaf['height']
|
|
50
|
+
|
|
51
|
+
for coal in arg_data['Coalescent']:
|
|
52
|
+
nid = coal['nodeid']
|
|
53
|
+
all_nodes.append(nid)
|
|
54
|
+
heights[nid] = coal['height']
|
|
55
|
+
|
|
56
|
+
for recomb in arg_data['Recombination']:
|
|
57
|
+
nid = recomb['nodeid']
|
|
58
|
+
all_nodes.append(nid)
|
|
59
|
+
heights[nid] = recomb['height']
|
|
60
|
+
|
|
61
|
+
# Build edges from Lineage data: (parent, child)
|
|
62
|
+
edges = []
|
|
63
|
+
for lineage in arg_data['Lineage']:
|
|
64
|
+
down = lineage['down']
|
|
65
|
+
up = lineage['up']
|
|
66
|
+
if up is not None:
|
|
67
|
+
edges.append((up, down))
|
|
68
|
+
|
|
69
|
+
# Build adjacency lists
|
|
70
|
+
children_map = defaultdict(list)
|
|
71
|
+
parents_map = defaultdict(list)
|
|
72
|
+
for parent, child in edges:
|
|
73
|
+
children_map[parent].append(child)
|
|
74
|
+
parents_map[child].append(parent)
|
|
75
|
+
|
|
76
|
+
# Identify leaves
|
|
77
|
+
leaves = [n for n in all_nodes if heights[n] == 0]
|
|
78
|
+
n_leaves = len(leaves)
|
|
79
|
+
x_min, x_max = 0.0, 1.0
|
|
80
|
+
|
|
81
|
+
# Group nodes by height
|
|
82
|
+
layers = defaultdict(list)
|
|
83
|
+
for node in all_nodes:
|
|
84
|
+
layers[heights[node]].append(node)
|
|
85
|
+
sorted_heights = sorted(layers.keys())
|
|
86
|
+
|
|
87
|
+
x_pos = {}
|
|
88
|
+
|
|
89
|
+
# Use original leaf positions if available
|
|
90
|
+
original_leaf_xpos = {}
|
|
91
|
+
for leaf in arg_data['Leaf']:
|
|
92
|
+
if 'xpos' in leaf and leaf['xpos'] is not None:
|
|
93
|
+
original_leaf_xpos[leaf['nodeid']] = leaf['xpos']
|
|
94
|
+
|
|
95
|
+
if original_leaf_xpos and len(original_leaf_xpos) == n_leaves:
|
|
96
|
+
for node in leaves:
|
|
97
|
+
x_pos[node] = original_leaf_xpos[node]
|
|
98
|
+
else:
|
|
99
|
+
leaves_sorted = sorted(leaves)
|
|
100
|
+
for i, node in enumerate(leaves_sorted):
|
|
101
|
+
if n_leaves > 1:
|
|
102
|
+
x_pos[node] = x_min + i * (x_max - x_min) / (n_leaves - 1)
|
|
103
|
+
else:
|
|
104
|
+
x_pos[node] = (x_min + x_max) / 2
|
|
105
|
+
|
|
106
|
+
# Initialize internal nodes at children centroid
|
|
107
|
+
def get_child_centroid(node):
|
|
108
|
+
if children_map[node]:
|
|
109
|
+
child_xs = [x_pos[c] for c in children_map[node] if c in x_pos]
|
|
110
|
+
if child_xs:
|
|
111
|
+
return np.mean(child_xs)
|
|
112
|
+
return (x_min + x_max) / 2
|
|
113
|
+
|
|
114
|
+
for h in sorted_heights:
|
|
115
|
+
if h == 0:
|
|
116
|
+
continue
|
|
117
|
+
for node in layers[h]:
|
|
118
|
+
x_pos[node] = get_child_centroid(node)
|
|
119
|
+
|
|
120
|
+
# Force-directed iterations
|
|
121
|
+
edge_list = list(edges)
|
|
122
|
+
|
|
123
|
+
for iteration in range(n_iterations):
|
|
124
|
+
# Learning rate with decay
|
|
125
|
+
lr = 0.5 * (1 - 0.5 * iteration / n_iterations)
|
|
126
|
+
|
|
127
|
+
# Initialize forces for non-leaf nodes
|
|
128
|
+
forces = {node: 0.0 for node in all_nodes if heights[node] > 0}
|
|
129
|
+
|
|
130
|
+
# 1. Edge-edge repulsion
|
|
131
|
+
for i, (p1, c1) in enumerate(edge_list):
|
|
132
|
+
for j, (p2, c2) in enumerate(edge_list[i+1:], i+1):
|
|
133
|
+
# Skip if edges share a node
|
|
134
|
+
if p1 == p2 or c1 == c2 or p1 == c2 or p2 == c1:
|
|
135
|
+
continue
|
|
136
|
+
|
|
137
|
+
# Check if edges overlap in y (height) range
|
|
138
|
+
y1_min, y1_max = min(heights[p1], heights[c1]), max(heights[p1], heights[c1])
|
|
139
|
+
y2_min, y2_max = min(heights[p2], heights[c2]), max(heights[p2], heights[c2])
|
|
140
|
+
|
|
141
|
+
# Only apply force if edges overlap vertically
|
|
142
|
+
if y1_max <= y2_min or y2_max <= y1_min:
|
|
143
|
+
continue
|
|
144
|
+
|
|
145
|
+
# Compute edge midpoints
|
|
146
|
+
mid1_x = (x_pos[p1] + x_pos[c1]) / 2
|
|
147
|
+
mid2_x = (x_pos[p2] + x_pos[c2]) / 2
|
|
148
|
+
|
|
149
|
+
# Horizontal distance
|
|
150
|
+
dx = mid1_x - mid2_x
|
|
151
|
+
dist = abs(dx) + 0.05
|
|
152
|
+
|
|
153
|
+
# Repulsive force
|
|
154
|
+
force = edge_repulsion / (dist ** 2)
|
|
155
|
+
force = min(force, 0.1) # Cap force
|
|
156
|
+
|
|
157
|
+
# Direction
|
|
158
|
+
sign = 1 if dx > 0 else -1
|
|
159
|
+
if dx == 0:
|
|
160
|
+
sign = 1 if np.random.random() > 0.5 else -1
|
|
161
|
+
|
|
162
|
+
# Apply force to endpoints (except leaves)
|
|
163
|
+
if heights[p1] > 0:
|
|
164
|
+
forces[p1] += force * sign * 0.5
|
|
165
|
+
if heights[c1] > 0:
|
|
166
|
+
forces[c1] += force * sign * 0.5
|
|
167
|
+
if heights[p2] > 0:
|
|
168
|
+
forces[p2] -= force * sign * 0.5
|
|
169
|
+
if heights[c2] > 0:
|
|
170
|
+
forces[c2] -= force * sign * 0.5
|
|
171
|
+
|
|
172
|
+
# 2. Node-node repulsion within layers
|
|
173
|
+
for h in sorted_heights:
|
|
174
|
+
if h == 0:
|
|
175
|
+
continue
|
|
176
|
+
layer_nodes = layers[h]
|
|
177
|
+
for i, n1 in enumerate(layer_nodes):
|
|
178
|
+
for n2 in layer_nodes[i+1:]:
|
|
179
|
+
dx = x_pos[n1] - x_pos[n2]
|
|
180
|
+
dist = abs(dx) + 0.05
|
|
181
|
+
force = node_repulsion / (dist ** 2)
|
|
182
|
+
force = min(force, 0.05)
|
|
183
|
+
sign = 1 if dx > 0 else -1
|
|
184
|
+
forces[n1] += force * sign
|
|
185
|
+
forces[n2] -= force * sign
|
|
186
|
+
|
|
187
|
+
# 3. Attraction to children (maintains some structure)
|
|
188
|
+
for node in all_nodes:
|
|
189
|
+
if heights[node] > 0 and children_map[node]:
|
|
190
|
+
child_xs = [x_pos[c] for c in children_map[node]]
|
|
191
|
+
centroid = np.mean(child_xs)
|
|
192
|
+
forces[node] += child_attraction * (centroid - x_pos[node])
|
|
193
|
+
|
|
194
|
+
# 4. Attraction to parents (for recombination nodes)
|
|
195
|
+
for node in all_nodes:
|
|
196
|
+
if heights[node] > 0 and parents_map[node]:
|
|
197
|
+
parent_xs = [x_pos[p] for p in parents_map[node]]
|
|
198
|
+
centroid = np.mean(parent_xs)
|
|
199
|
+
forces[node] += parent_attraction * (centroid - x_pos[node])
|
|
200
|
+
|
|
201
|
+
# Apply forces
|
|
202
|
+
for node, force in forces.items():
|
|
203
|
+
x_pos[node] += lr * force
|
|
204
|
+
# Hard boundaries
|
|
205
|
+
x_pos[node] = np.clip(x_pos[node], x_min, x_max)
|
|
206
|
+
|
|
207
|
+
# Update the JSON data
|
|
208
|
+
for leaf in arg_data['Leaf']:
|
|
209
|
+
leaf['xpos'] = float(x_pos[leaf['nodeid']])
|
|
210
|
+
|
|
211
|
+
for coal in arg_data['Coalescent']:
|
|
212
|
+
coal['xpos'] = float(x_pos[coal['nodeid']])
|
|
213
|
+
|
|
214
|
+
for recomb in arg_data['Recombination']:
|
|
215
|
+
recomb['xpos'] = float(x_pos[recomb['nodeid']])
|
|
216
|
+
|
|
217
|
+
return arg_data
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def count_crossings(arg_data):
|
|
221
|
+
"""Count edge crossings in the layout."""
|
|
222
|
+
heights = {}
|
|
223
|
+
x_pos = {}
|
|
224
|
+
|
|
225
|
+
for leaf in arg_data['Leaf']:
|
|
226
|
+
nid = leaf['nodeid']
|
|
227
|
+
heights[nid] = leaf['height']
|
|
228
|
+
x_pos[nid] = leaf['xpos']
|
|
229
|
+
|
|
230
|
+
for coal in arg_data['Coalescent']:
|
|
231
|
+
nid = coal['nodeid']
|
|
232
|
+
heights[nid] = coal['height']
|
|
233
|
+
x_pos[nid] = coal['xpos']
|
|
234
|
+
|
|
235
|
+
for recomb in arg_data['Recombination']:
|
|
236
|
+
nid = recomb['nodeid']
|
|
237
|
+
heights[nid] = recomb['height']
|
|
238
|
+
x_pos[nid] = recomb['xpos']
|
|
239
|
+
|
|
240
|
+
edges = []
|
|
241
|
+
for lineage in arg_data['Lineage']:
|
|
242
|
+
if lineage['up'] is not None:
|
|
243
|
+
edges.append((lineage['up'], lineage['down']))
|
|
244
|
+
|
|
245
|
+
crossings = 0
|
|
246
|
+
for i, (p1, c1) in enumerate(edges):
|
|
247
|
+
for p2, c2 in edges[i+1:]:
|
|
248
|
+
if p1 == p2 or c1 == c2 or p1 == c2 or p2 == c1:
|
|
249
|
+
continue
|
|
250
|
+
h1_top, h1_bot = heights[p1], heights[c1]
|
|
251
|
+
h2_top, h2_bot = heights[p2], heights[c2]
|
|
252
|
+
if max(h1_bot, h2_bot) >= min(h1_top, h2_top):
|
|
253
|
+
continue
|
|
254
|
+
if (x_pos[p1] - x_pos[p2]) * (x_pos[c1] - x_pos[c2]) < 0:
|
|
255
|
+
crossings += 1
|
|
256
|
+
|
|
257
|
+
return crossings
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def check_parents_outside_children(arg_data):
|
|
261
|
+
"""Find parents positioned outside their children's x-range."""
|
|
262
|
+
heights = {}
|
|
263
|
+
x_pos = {}
|
|
264
|
+
|
|
265
|
+
for leaf in arg_data['Leaf']:
|
|
266
|
+
nid = leaf['nodeid']
|
|
267
|
+
heights[nid] = leaf['height']
|
|
268
|
+
x_pos[nid] = leaf['xpos']
|
|
269
|
+
|
|
270
|
+
for coal in arg_data['Coalescent']:
|
|
271
|
+
nid = coal['nodeid']
|
|
272
|
+
heights[nid] = coal['height']
|
|
273
|
+
x_pos[nid] = coal['xpos']
|
|
274
|
+
|
|
275
|
+
for recomb in arg_data['Recombination']:
|
|
276
|
+
nid = recomb['nodeid']
|
|
277
|
+
heights[nid] = recomb['height']
|
|
278
|
+
x_pos[nid] = recomb['xpos']
|
|
279
|
+
|
|
280
|
+
children_map = defaultdict(list)
|
|
281
|
+
for lineage in arg_data['Lineage']:
|
|
282
|
+
if lineage['up'] is not None:
|
|
283
|
+
children_map[lineage['up']].append(lineage['down'])
|
|
284
|
+
|
|
285
|
+
outside = []
|
|
286
|
+
for node, children in children_map.items():
|
|
287
|
+
if children:
|
|
288
|
+
child_xs = [x_pos[c] for c in children]
|
|
289
|
+
lo, hi = min(child_xs), max(child_xs)
|
|
290
|
+
node_x = x_pos[node]
|
|
291
|
+
if node_x < lo - 0.001 or node_x > hi + 0.001:
|
|
292
|
+
outside.append({
|
|
293
|
+
'node': node,
|
|
294
|
+
'x': node_x,
|
|
295
|
+
'children_range': (lo, hi),
|
|
296
|
+
'offset': min(node_x - lo, node_x - hi, key=abs)
|
|
297
|
+
})
|
|
298
|
+
|
|
299
|
+
return outside
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
# Demo
|
|
303
|
+
if __name__ == '__main__':
|
|
304
|
+
arg_json = {
|
|
305
|
+
"Coalescent": [
|
|
306
|
+
{"nodeid": 5, "height": 0.13669486930750932, "children": [2, 1], "parent": 5, "xpos": 0.125},
|
|
307
|
+
{"nodeid": 7, "height": 0.3897837685725356, "children": [7, 4], "parent": 8, "xpos": 0.5125},
|
|
308
|
+
{"nodeid": 8, "height": 0.5503568938072688, "children": [8, 3], "parent": 9, "xpos": 0.60625},
|
|
309
|
+
{"nodeid": 10, "height": 0.7368215587091936, "children": [10, 9], "parent": 12, "xpos": 0.578125},
|
|
310
|
+
{"nodeid": 11, "height": 0.9908807698242248, "children": [6, 11], "parent": 13, "xpos": 0.3125},
|
|
311
|
+
{"nodeid": 12, "height": 1.0, "children": [13, 12], "parent": 14, "xpos": 0.4453125}
|
|
312
|
+
],
|
|
313
|
+
"Recombination": [
|
|
314
|
+
{"nodeid": 6, "height": 0.331775128015738, "child": 5, "left_parent": 6, "right_parent": 7, "recomb_point": 0.9254180219413557, "xpos": 0.125},
|
|
315
|
+
{"nodeid": 9, "height": 0.592546189634543, "child": 0, "left_parent": 10, "right_parent": 11, "recomb_point": 0.1973898113750865, "xpos": 0.5}
|
|
316
|
+
],
|
|
317
|
+
"Leaf": [
|
|
318
|
+
{"nodeid": 0, "height": 0.0, "intervals": [[0, 1]], "parent": 0, "xpos": 0.5},
|
|
319
|
+
{"nodeid": 1, "height": 0.0, "intervals": [[0, 1]], "parent": 1, "xpos": 0.0},
|
|
320
|
+
{"nodeid": 2, "height": 0.0, "intervals": [[0, 1]], "parent": 2, "xpos": 0.25},
|
|
321
|
+
{"nodeid": 3, "height": 0.0, "intervals": [[0, 1]], "parent": 3, "xpos": 1.0},
|
|
322
|
+
{"nodeid": 4, "height": 0.0, "intervals": [[0, 1]], "parent": 4, "xpos": 0.75}
|
|
323
|
+
],
|
|
324
|
+
"Lineage": [
|
|
325
|
+
{"lineageid": 0, "down": 0, "up": 9, "intervals": [[0, 1]]},
|
|
326
|
+
{"lineageid": 1, "down": 1, "up": 5, "intervals": [[0, 1]]},
|
|
327
|
+
{"lineageid": 2, "down": 2, "up": 5, "intervals": [[0, 1]]},
|
|
328
|
+
{"lineageid": 3, "down": 3, "up": 8, "intervals": [[0, 1]]},
|
|
329
|
+
{"lineageid": 4, "down": 4, "up": 7, "intervals": [[0, 1]]},
|
|
330
|
+
{"lineageid": 5, "down": 5, "up": 6, "intervals": [[0, 1]]},
|
|
331
|
+
{"lineageid": 6, "down": 6, "up": 11, "intervals": [[0, 0.9254180219413557]]},
|
|
332
|
+
{"lineageid": 7, "down": 6, "up": 7, "intervals": [[0.9254180219413557, 1]]},
|
|
333
|
+
{"lineageid": 8, "down": 7, "up": 8, "intervals": [[0, 1]]},
|
|
334
|
+
{"lineageid": 9, "down": 8, "up": 10, "intervals": [[0, 1]]},
|
|
335
|
+
{"lineageid": 10, "down": 9, "up": 10, "intervals": [[0, 0.1973898113750865]]},
|
|
336
|
+
{"lineageid": 11, "down": 9, "up": 11, "intervals": [[0, 1]]},
|
|
337
|
+
{"lineageid": 12, "down": 10, "up": 12, "intervals": [[0, 1]]},
|
|
338
|
+
{"lineageid": 13, "down": 11, "up": 12, "intervals": [[0, 1]]},
|
|
339
|
+
{"lineageid": 14, "down": 12, "up": None, "intervals": [[0, 1]]}
|
|
340
|
+
]
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
import copy
|
|
344
|
+
|
|
345
|
+
print("Original layout:")
|
|
346
|
+
print(f" Crossings: {count_crossings(arg_json)}")
|
|
347
|
+
print(f" Leaves: {[(l['nodeid'], l['xpos']) for l in arg_json['Leaf']]}")
|
|
348
|
+
print(f" Coalescent: {[(c['nodeid'], round(c['xpos'], 3)) for c in arg_json['Coalescent']]}")
|
|
349
|
+
print(f" Recombination: {[(r['nodeid'], round(r['xpos'], 3)) for r in arg_json['Recombination']]}")
|
|
350
|
+
outside = check_parents_outside_children(arg_json)
|
|
351
|
+
print(f" Parents outside children: {len(outside)}")
|
|
352
|
+
|
|
353
|
+
# Compute new layout
|
|
354
|
+
arg_updated = compute_arg_xpos(copy.deepcopy(arg_json))
|
|
355
|
+
|
|
356
|
+
print("\nForce-directed layout:")
|
|
357
|
+
print(f" Crossings: {count_crossings(arg_updated)}")
|
|
358
|
+
print(f" Leaves: {[(l['nodeid'], l['xpos']) for l in arg_updated['Leaf']]}")
|
|
359
|
+
print(f" Coalescent: {[(c['nodeid'], round(c['xpos'], 3)) for c in arg_updated['Coalescent']]}")
|
|
360
|
+
print(f" Recombination: {[(r['nodeid'], round(r['xpos'], 3)) for r in arg_updated['Recombination']]}")
|
|
361
|
+
|
|
362
|
+
outside = check_parents_outside_children(arg_updated)
|
|
363
|
+
print(f" Parents outside children: {len(outside)}")
|
|
364
|
+
for o in outside:
|
|
365
|
+
print(f" Node {o['node']} at {o['x']:.3f}, children in [{o['children_range'][0]:.3f}, {o['children_range'][1]:.3f}]")
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ARG Layout for JSON Format
|
|
3
|
+
===========================
|
|
4
|
+
Computes x-coordinates for ARG nodes defined in the given JSON structure.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
import json
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def compute_arg_xpos(arg_data, n_iterations=50):
|
|
13
|
+
"""
|
|
14
|
+
Compute x-positions for an ARG defined in JSON format.
|
|
15
|
+
|
|
16
|
+
Uses a barycenter heuristic: each internal node is positioned at the
|
|
17
|
+
centroid of its leaf descendants. For recombination nodes, considers
|
|
18
|
+
both children and parents.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
arg_data : dict
|
|
23
|
+
ARG data with 'Leaf', 'Coalescent', 'Recombination', and 'Lineage' keys
|
|
24
|
+
n_iterations : int
|
|
25
|
+
Number of refinement iterations
|
|
26
|
+
|
|
27
|
+
Returns
|
|
28
|
+
-------
|
|
29
|
+
dict
|
|
30
|
+
The input arg_data with updated 'xpos' values for all nodes
|
|
31
|
+
"""
|
|
32
|
+
# Extract all nodes and heights
|
|
33
|
+
all_nodes = []
|
|
34
|
+
heights = {}
|
|
35
|
+
|
|
36
|
+
for leaf in arg_data['Leaf']:
|
|
37
|
+
nid = leaf['nodeid']
|
|
38
|
+
all_nodes.append(nid)
|
|
39
|
+
heights[nid] = leaf['height']
|
|
40
|
+
|
|
41
|
+
for coal in arg_data['Coalescent']:
|
|
42
|
+
nid = coal['nodeid']
|
|
43
|
+
all_nodes.append(nid)
|
|
44
|
+
heights[nid] = coal['height']
|
|
45
|
+
|
|
46
|
+
for recomb in arg_data['Recombination']:
|
|
47
|
+
nid = recomb['nodeid']
|
|
48
|
+
all_nodes.append(nid)
|
|
49
|
+
heights[nid] = recomb['height']
|
|
50
|
+
|
|
51
|
+
# Build edges from Lineage data: (parent, child)
|
|
52
|
+
edges = []
|
|
53
|
+
for lineage in arg_data['Lineage']:
|
|
54
|
+
down = lineage['down']
|
|
55
|
+
up = lineage['up']
|
|
56
|
+
if up is not None:
|
|
57
|
+
edges.append((up, down))
|
|
58
|
+
|
|
59
|
+
# Build adjacency lists
|
|
60
|
+
children_map = defaultdict(list)
|
|
61
|
+
parents_map = defaultdict(list)
|
|
62
|
+
for parent, child in edges:
|
|
63
|
+
children_map[parent].append(child)
|
|
64
|
+
parents_map[child].append(parent)
|
|
65
|
+
|
|
66
|
+
# Identify leaves
|
|
67
|
+
leaves = [n for n in all_nodes if heights[n] == 0]
|
|
68
|
+
n_leaves = len(leaves)
|
|
69
|
+
x_min, x_max = 0.0, 1.0
|
|
70
|
+
|
|
71
|
+
# Group nodes by height
|
|
72
|
+
layers = defaultdict(list)
|
|
73
|
+
for node in all_nodes:
|
|
74
|
+
layers[heights[node]].append(node)
|
|
75
|
+
sorted_heights = sorted(layers.keys())
|
|
76
|
+
|
|
77
|
+
x_pos = {}
|
|
78
|
+
|
|
79
|
+
# Use original leaf positions if available, otherwise space evenly
|
|
80
|
+
original_leaf_xpos = {}
|
|
81
|
+
for leaf in arg_data['Leaf']:
|
|
82
|
+
if 'xpos' in leaf and leaf['xpos'] is not None:
|
|
83
|
+
original_leaf_xpos[leaf['nodeid']] = leaf['xpos']
|
|
84
|
+
|
|
85
|
+
if original_leaf_xpos and len(original_leaf_xpos) == n_leaves:
|
|
86
|
+
# Use original positions
|
|
87
|
+
for node in leaves:
|
|
88
|
+
x_pos[node] = original_leaf_xpos[node]
|
|
89
|
+
else:
|
|
90
|
+
# Space evenly (sorted by node id)
|
|
91
|
+
leaves_sorted = sorted(leaves)
|
|
92
|
+
for i, node in enumerate(leaves_sorted):
|
|
93
|
+
if n_leaves > 1:
|
|
94
|
+
x_pos[node] = x_min + i * (x_max - x_min) / (n_leaves - 1)
|
|
95
|
+
else:
|
|
96
|
+
x_pos[node] = (x_min + x_max) / 2
|
|
97
|
+
|
|
98
|
+
# Compute leaf descendants for each node (for barycenter)
|
|
99
|
+
def get_leaf_descendants(node, memo={}):
|
|
100
|
+
if node in memo:
|
|
101
|
+
return memo[node]
|
|
102
|
+
if not children_map[node]: # Is a leaf
|
|
103
|
+
memo[node] = {node}
|
|
104
|
+
return memo[node]
|
|
105
|
+
result = set()
|
|
106
|
+
for child in children_map[node]:
|
|
107
|
+
result |= get_leaf_descendants(child, memo)
|
|
108
|
+
memo[node] = result
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
# Clear memo for fresh computation
|
|
112
|
+
leaf_desc_memo = {}
|
|
113
|
+
for node in all_nodes:
|
|
114
|
+
get_leaf_descendants(node, leaf_desc_memo)
|
|
115
|
+
|
|
116
|
+
# Position each internal node at centroid of its leaf descendants
|
|
117
|
+
for h in sorted_heights:
|
|
118
|
+
if h == 0:
|
|
119
|
+
continue
|
|
120
|
+
for node in layers[h]:
|
|
121
|
+
descendants = leaf_desc_memo.get(node, set())
|
|
122
|
+
if descendants:
|
|
123
|
+
x_pos[node] = np.mean([x_pos[d] for d in descendants])
|
|
124
|
+
else:
|
|
125
|
+
x_pos[node] = (x_min + x_max) / 2
|
|
126
|
+
|
|
127
|
+
# Iterative refinement: consider both children and parents
|
|
128
|
+
for _ in range(n_iterations):
|
|
129
|
+
for h in sorted_heights:
|
|
130
|
+
if h == 0:
|
|
131
|
+
continue
|
|
132
|
+
for node in layers[h]:
|
|
133
|
+
pulls = []
|
|
134
|
+
weights = []
|
|
135
|
+
|
|
136
|
+
# Pull from children (stronger)
|
|
137
|
+
for child in children_map[node]:
|
|
138
|
+
pulls.append(x_pos[child])
|
|
139
|
+
weights.append(2.0)
|
|
140
|
+
|
|
141
|
+
# Pull from parents (weaker, for recombination nodes)
|
|
142
|
+
for parent in parents_map[node]:
|
|
143
|
+
pulls.append(x_pos[parent])
|
|
144
|
+
weights.append(1.0)
|
|
145
|
+
|
|
146
|
+
if pulls:
|
|
147
|
+
x_pos[node] = np.average(pulls, weights=weights)
|
|
148
|
+
|
|
149
|
+
# Ensure positions are within bounds
|
|
150
|
+
for node in all_nodes:
|
|
151
|
+
x_pos[node] = np.clip(x_pos[node], x_min, x_max)
|
|
152
|
+
|
|
153
|
+
# Update the JSON data with computed positions
|
|
154
|
+
for leaf in arg_data['Leaf']:
|
|
155
|
+
leaf['xpos'] = float(x_pos[leaf['nodeid']])
|
|
156
|
+
|
|
157
|
+
for coal in arg_data['Coalescent']:
|
|
158
|
+
coal['xpos'] = float(x_pos[coal['nodeid']])
|
|
159
|
+
|
|
160
|
+
for recomb in arg_data['Recombination']:
|
|
161
|
+
recomb['xpos'] = float(x_pos[recomb['nodeid']])
|
|
162
|
+
|
|
163
|
+
return arg_data
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def count_crossings(arg_data):
|
|
167
|
+
"""Count edge crossings in the layout."""
|
|
168
|
+
# Extract heights and positions
|
|
169
|
+
heights = {}
|
|
170
|
+
x_pos = {}
|
|
171
|
+
|
|
172
|
+
for leaf in arg_data['Leaf']:
|
|
173
|
+
nid = leaf['nodeid']
|
|
174
|
+
heights[nid] = leaf['height']
|
|
175
|
+
x_pos[nid] = leaf['xpos']
|
|
176
|
+
|
|
177
|
+
for coal in arg_data['Coalescent']:
|
|
178
|
+
nid = coal['nodeid']
|
|
179
|
+
heights[nid] = coal['height']
|
|
180
|
+
x_pos[nid] = coal['xpos']
|
|
181
|
+
|
|
182
|
+
for recomb in arg_data['Recombination']:
|
|
183
|
+
nid = recomb['nodeid']
|
|
184
|
+
heights[nid] = recomb['height']
|
|
185
|
+
x_pos[nid] = recomb['xpos']
|
|
186
|
+
|
|
187
|
+
# Build edges
|
|
188
|
+
edges = []
|
|
189
|
+
for lineage in arg_data['Lineage']:
|
|
190
|
+
if lineage['up'] is not None:
|
|
191
|
+
edges.append((lineage['up'], lineage['down']))
|
|
192
|
+
|
|
193
|
+
# Count crossings
|
|
194
|
+
crossings = 0
|
|
195
|
+
for i, (p1, c1) in enumerate(edges):
|
|
196
|
+
for p2, c2 in edges[i+1:]:
|
|
197
|
+
if p1 == p2 or c1 == c2 or p1 == c2 or p2 == c1:
|
|
198
|
+
continue
|
|
199
|
+
h1_top, h1_bot = heights[p1], heights[c1]
|
|
200
|
+
h2_top, h2_bot = heights[p2], heights[c2]
|
|
201
|
+
if max(h1_bot, h2_bot) >= min(h1_top, h2_top):
|
|
202
|
+
continue
|
|
203
|
+
if (x_pos[p1] - x_pos[p2]) * (x_pos[c1] - x_pos[c2]) < 0:
|
|
204
|
+
crossings += 1
|
|
205
|
+
|
|
206
|
+
return crossings
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# Demo with the provided JSON
|
|
210
|
+
if __name__ == '__main__':
|
|
211
|
+
# The provided ARG data
|
|
212
|
+
arg_json = {
|
|
213
|
+
"Coalescent": [
|
|
214
|
+
{"nodeid": 5, "height": 0.13669486930750932, "children": [2, 1], "parent": 5, "xpos": 0.125},
|
|
215
|
+
{"nodeid": 7, "height": 0.3897837685725356, "children": [7, 4], "parent": 8, "xpos": 0.5125},
|
|
216
|
+
{"nodeid": 8, "height": 0.5503568938072688, "children": [8, 3], "parent": 9, "xpos": 0.60625},
|
|
217
|
+
{"nodeid": 10, "height": 0.7368215587091936, "children": [10, 9], "parent": 12, "xpos": 0.578125},
|
|
218
|
+
{"nodeid": 11, "height": 0.9908807698242248, "children": [6, 11], "parent": 13, "xpos": 0.3125},
|
|
219
|
+
{"nodeid": 12, "height": 1.0, "children": [13, 12], "parent": 14, "xpos": 0.4453125}
|
|
220
|
+
],
|
|
221
|
+
"Recombination": [
|
|
222
|
+
{"nodeid": 6, "height": 0.331775128015738, "child": 5, "left_parent": 6, "right_parent": 7, "recomb_point": 0.9254180219413557, "xpos": 0.125},
|
|
223
|
+
{"nodeid": 9, "height": 0.592546189634543, "child": 0, "left_parent": 10, "right_parent": 11, "recomb_point": 0.1973898113750865, "xpos": 0.5}
|
|
224
|
+
],
|
|
225
|
+
"Leaf": [
|
|
226
|
+
{"nodeid": 0, "height": 0.0, "intervals": [[0, 1]], "parent": 0, "xpos": 0.5},
|
|
227
|
+
{"nodeid": 1, "height": 0.0, "intervals": [[0, 1]], "parent": 1, "xpos": 0.0},
|
|
228
|
+
{"nodeid": 2, "height": 0.0, "intervals": [[0, 1]], "parent": 2, "xpos": 0.25},
|
|
229
|
+
{"nodeid": 3, "height": 0.0, "intervals": [[0, 1]], "parent": 3, "xpos": 1.0},
|
|
230
|
+
{"nodeid": 4, "height": 0.0, "intervals": [[0, 1]], "parent": 4, "xpos": 0.75}
|
|
231
|
+
],
|
|
232
|
+
"Lineage": [
|
|
233
|
+
{"lineageid": 0, "down": 0, "up": 9, "intervals": [[0, 1]]},
|
|
234
|
+
{"lineageid": 1, "down": 1, "up": 5, "intervals": [[0, 1]]},
|
|
235
|
+
{"lineageid": 2, "down": 2, "up": 5, "intervals": [[0, 1]]},
|
|
236
|
+
{"lineageid": 3, "down": 3, "up": 8, "intervals": [[0, 1]]},
|
|
237
|
+
{"lineageid": 4, "down": 4, "up": 7, "intervals": [[0, 1]]},
|
|
238
|
+
{"lineageid": 5, "down": 5, "up": 6, "intervals": [[0, 1]]},
|
|
239
|
+
{"lineageid": 6, "down": 6, "up": 11, "intervals": [[0, 0.9254180219413557]]},
|
|
240
|
+
{"lineageid": 7, "down": 6, "up": 7, "intervals": [[0.9254180219413557, 1]]},
|
|
241
|
+
{"lineageid": 8, "down": 7, "up": 8, "intervals": [[0, 1]]},
|
|
242
|
+
{"lineageid": 9, "down": 8, "up": 10, "intervals": [[0, 1]]},
|
|
243
|
+
{"lineageid": 10, "down": 9, "up": 10, "intervals": [[0, 0.1973898113750865]]},
|
|
244
|
+
{"lineageid": 11, "down": 9, "up": 11, "intervals": [[0, 1]]},
|
|
245
|
+
{"lineageid": 12, "down": 10, "up": 12, "intervals": [[0, 1]]},
|
|
246
|
+
{"lineageid": 13, "down": 11, "up": 12, "intervals": [[0, 1]]},
|
|
247
|
+
{"lineageid": 14, "down": 12, "up": None, "intervals": [[0, 1]]}
|
|
248
|
+
]
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
print("Original positions:")
|
|
252
|
+
print(" Leaves:", {leaf['nodeid']: leaf['xpos'] for leaf in arg_json['Leaf']})
|
|
253
|
+
print(" Coalescent:", {c['nodeid']: c['xpos'] for c in arg_json['Coalescent']})
|
|
254
|
+
print(" Recombination:", {r['nodeid']: r['xpos'] for r in arg_json['Recombination']})
|
|
255
|
+
print(f" Crossings: {count_crossings(arg_json)}")
|
|
256
|
+
|
|
257
|
+
# Compute new layout
|
|
258
|
+
import copy
|
|
259
|
+
arg_updated = compute_arg_xpos(copy.deepcopy(arg_json))
|
|
260
|
+
|
|
261
|
+
print("\nComputed positions:")
|
|
262
|
+
print(" Leaves:", {leaf['nodeid']: round(leaf['xpos'], 3) for leaf in arg_updated['Leaf']})
|
|
263
|
+
print(" Coalescent:", {c['nodeid']: round(c['xpos'], 3) for c in arg_updated['Coalescent']})
|
|
264
|
+
print(" Recombination:", {r['nodeid']: round(r['xpos'], 3) for r in arg_updated['Recombination']})
|
|
265
|
+
print(f" Crossings: {count_crossings(arg_updated)}")
|
|
266
|
+
|
|
267
|
+
# Print full updated JSON
|
|
268
|
+
print("\nFull updated JSON:")
|
|
269
|
+
print(json.dumps(arg_updated, indent=2))
|