lorax-arg 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lorax/buffer.py +43 -0
  2. lorax/cache/__init__.py +43 -0
  3. lorax/cache/csv_tree_graph.py +59 -0
  4. lorax/cache/disk.py +467 -0
  5. lorax/cache/file_cache.py +142 -0
  6. lorax/cache/file_context.py +72 -0
  7. lorax/cache/lru.py +90 -0
  8. lorax/cache/tree_graph.py +293 -0
  9. lorax/cli.py +312 -0
  10. lorax/cloud/__init__.py +0 -0
  11. lorax/cloud/gcs_utils.py +205 -0
  12. lorax/constants.py +66 -0
  13. lorax/context.py +80 -0
  14. lorax/csv/__init__.py +7 -0
  15. lorax/csv/config.py +250 -0
  16. lorax/csv/layout.py +182 -0
  17. lorax/csv/newick_tree.py +234 -0
  18. lorax/handlers.py +998 -0
  19. lorax/lineage.py +456 -0
  20. lorax/loaders/__init__.py +0 -0
  21. lorax/loaders/csv_loader.py +10 -0
  22. lorax/loaders/loader.py +31 -0
  23. lorax/loaders/tskit_loader.py +119 -0
  24. lorax/lorax_app.py +75 -0
  25. lorax/manager.py +58 -0
  26. lorax/metadata/__init__.py +0 -0
  27. lorax/metadata/loader.py +426 -0
  28. lorax/metadata/mutations.py +146 -0
  29. lorax/modes.py +190 -0
  30. lorax/pg.py +183 -0
  31. lorax/redis_utils.py +30 -0
  32. lorax/routes.py +137 -0
  33. lorax/session_manager.py +206 -0
  34. lorax/sockets/__init__.py +55 -0
  35. lorax/sockets/connection.py +99 -0
  36. lorax/sockets/debug.py +47 -0
  37. lorax/sockets/decorators.py +112 -0
  38. lorax/sockets/file_ops.py +200 -0
  39. lorax/sockets/lineage.py +307 -0
  40. lorax/sockets/metadata.py +232 -0
  41. lorax/sockets/mutations.py +154 -0
  42. lorax/sockets/node_search.py +535 -0
  43. lorax/sockets/tree_layout.py +117 -0
  44. lorax/sockets/utils.py +10 -0
  45. lorax/tree_graph/__init__.py +12 -0
  46. lorax/tree_graph/tree_graph.py +689 -0
  47. lorax/utils.py +124 -0
  48. lorax_app/__init__.py +4 -0
  49. lorax_app/app.py +159 -0
  50. lorax_app/cli.py +114 -0
  51. lorax_app/static/X.png +0 -0
  52. lorax_app/static/assets/index-BCEGlUFi.js +2361 -0
  53. lorax_app/static/assets/index-iKjzUpA9.css +1 -0
  54. lorax_app/static/assets/localBackendWorker-BaWwjSV_.js +2 -0
  55. lorax_app/static/assets/renderDataWorker-BKLdiU7J.js +2 -0
  56. lorax_app/static/gestures/gesture-flick.ogv +0 -0
  57. lorax_app/static/gestures/gesture-two-finger-scroll.ogv +0 -0
  58. lorax_app/static/index.html +14 -0
  59. lorax_app/static/logo.png +0 -0
  60. lorax_app/static/lorax-logo.png +0 -0
  61. lorax_app/static/vite.svg +1 -0
  62. lorax_arg-0.1.dist-info/METADATA +131 -0
  63. lorax_arg-0.1.dist-info/RECORD +66 -0
  64. lorax_arg-0.1.dist-info/WHEEL +5 -0
  65. lorax_arg-0.1.dist-info/entry_points.txt +4 -0
  66. lorax_arg-0.1.dist-info/top_level.txt +2 -0
lorax/manager.py ADDED
@@ -0,0 +1,58 @@
1
+ from fastapi.websockets import WebSocket, WebSocketState, WebSocketDisconnect
2
+
3
+
4
+ class WebSocketManager:
5
+ def __init__(self):
6
+ self.connected_clients = set()
7
+ self.client_component = {} # websocket -> set(component names)
8
+
9
+ async def connect(self, websocket: WebSocket):
10
+ client_ip = f"{websocket.client.host}:{websocket.client.port}"
11
+ await websocket.accept()
12
+ self.connected_clients.add(websocket)
13
+
14
+ async def cleanup_disconnected_clients(self):
15
+ dead = [ws for ws in self.connected_clients if not self.is_connected(ws)]
16
+ for ws in dead:
17
+ await self.disconnect(ws)
18
+
19
+ def get_connected_clients(self):
20
+ """Get list of currently connected clients"""
21
+ return [client for client in self.connected_clients if self.is_connected(client)]
22
+
23
+ def is_connected(self, ws: WebSocket) -> bool:
24
+ try:
25
+ return ws.client_state == WebSocketState.CONNECTED
26
+ except Exception:
27
+ return False
28
+
29
+ async def send_message(self, ws: WebSocket, message: dict):
30
+ if self.is_connected(ws):
31
+ try:
32
+ await ws.send_json(message)
33
+ except Exception as e:
34
+ print(f"send_message error: {e}")
35
+ await self.disconnect(ws)
36
+ else:
37
+ await self.disconnect(ws)
38
+
39
+ async def disconnect(self, websocket: WebSocket):
40
+ if websocket in self.connected_clients:
41
+ self.connected_clients.remove(websocket)
42
+ if websocket in self.client_component:
43
+ del self.client_component[websocket]
44
+
45
+ async def register_component(self, ws: WebSocket, component: str):
46
+ self.client_component.setdefault(ws, set()).add(component)
47
+
48
+ async def send_to_component(self, component: str, message: dict):
49
+ """Send only to sockets in *this* session that registered that component."""
50
+ targets = [
51
+ ws for ws, comps in self.client_component.items()
52
+ if component in comps and self.is_connected(ws)
53
+ ]
54
+ if not targets:
55
+ print(f"⚠️ No sockets registered for component: {component}")
56
+ return
57
+ for ws in targets:
58
+ await self.send_message(ws, message)
File without changes
@@ -0,0 +1,426 @@
1
+ """
2
+ Metadata extraction and caching for tree sequences.
3
+
4
+ Functions accept FileContext and use its nested metadata cache.
5
+ When a FileContext is evicted, its metadata cache is evicted together.
6
+ """
7
+
8
+ import json
9
+ import numpy as np
10
+ import tskit
11
+ import pyarrow as pa
12
+ from collections import defaultdict
13
+ from lorax.utils import ensure_json_dict, make_json_safe, make_json_serializable
14
+
15
+
16
+ def get_metadata_for_key(
17
+ ctx,
18
+ key,
19
+ sources=("individual", "node", "population"),
20
+ sample_name_key="name"
21
+ ):
22
+ """
23
+ Get sample-to-value mapping for a specific metadata key.
24
+ Results are cached in the FileContext's nested metadata cache.
25
+
26
+ Parameters
27
+ ----------
28
+ ctx : FileContext
29
+ The file context containing tree_sequence and metadata cache
30
+ key : str
31
+ The metadata key to extract
32
+ sources : tuple
33
+ Any of ("individual", "node", "population")
34
+ sample_name_key : str
35
+ Key in node metadata used as sample name
36
+
37
+ Returns
38
+ -------
39
+ dict
40
+ {sample_name: value} for the specified key
41
+ """
42
+ # Check nested cache in FileContext
43
+ cached = ctx.get_metadata(key)
44
+ if cached is not None:
45
+ print(f"✅ Using cached metadata for key: {key}")
46
+ return cached
47
+
48
+ ts = ctx.tree_sequence
49
+
50
+ # Special handling for "sample" key - each sample's value is its own name
51
+ if key == "sample":
52
+ result = {}
53
+ for node_id in ts.samples():
54
+ node = ts.node(node_id)
55
+ node_meta = node.metadata or {}
56
+ try:
57
+ node_meta = ensure_json_dict(node_meta)
58
+ except (TypeError, json.JSONDecodeError):
59
+ node_meta = {}
60
+ sample_name = str(node_meta.get(sample_name_key, f"{node_id}"))
61
+ result[sample_name] = sample_name
62
+ ctx.set_metadata(key, result)
63
+ return result
64
+
65
+ result = {}
66
+
67
+ for node_id in ts.samples():
68
+ node = ts.node(node_id)
69
+ node_meta = node.metadata or {}
70
+ node_meta = ensure_json_dict(node_meta)
71
+ sample_name = node_meta.get(sample_name_key, f"{node_id}")
72
+
73
+ for source in sources:
74
+ if source == "individual":
75
+ if node.individual == tskit.NULL:
76
+ continue
77
+ meta = ts.individual(node.individual).metadata
78
+ meta = meta or {}
79
+ meta = ensure_json_dict(meta)
80
+
81
+ elif source == "node":
82
+ meta = node_meta
83
+
84
+ elif source == "population":
85
+ if node.population == tskit.NULL:
86
+ continue
87
+ meta = ts.population(node.population).metadata
88
+ meta = meta or {}
89
+ meta = ensure_json_dict(meta)
90
+
91
+ else:
92
+ continue
93
+
94
+ if not meta:
95
+ continue
96
+
97
+ if key in meta:
98
+ value = meta[key]
99
+ if value is None:
100
+ break # Skip None values
101
+ if isinstance(value, (list, dict)):
102
+ value = repr(value)
103
+ result[sample_name] = str(value)
104
+ break # Found the key, move to next sample
105
+
106
+ ctx.set_metadata(key, result)
107
+ return result
108
+
109
+
110
+ def search_samples_by_metadata(
111
+ ctx,
112
+ key,
113
+ value,
114
+ sources=("individual", "node", "population"),
115
+ sample_name_key="name"
116
+ ):
117
+ """
118
+ Search for samples that have a specific value for a metadata key.
119
+
120
+ Parameters
121
+ ----------
122
+ ctx : FileContext
123
+ The file context containing tree_sequence and metadata cache
124
+ key : str
125
+ The metadata key to search
126
+ value : str
127
+ The value to match
128
+ sources : tuple
129
+ Any of ("individual", "node", "population")
130
+ sample_name_key : str
131
+ Key in node metadata used as sample name
132
+
133
+ Returns
134
+ -------
135
+ list
136
+ List of sample names matching the criteria
137
+ """
138
+ ts = ctx.tree_sequence
139
+
140
+ # Try to use cached metadata if available
141
+ cached = ctx.get_metadata(key)
142
+
143
+ if cached is not None:
144
+ # Use cached data for fast lookup
145
+ return [sample for sample, val in cached.items() if str(val) == str(value)]
146
+
147
+ # If not cached, compute on the fly
148
+ matching_samples = []
149
+
150
+ for node_id in ts.samples():
151
+ node = ts.node(node_id)
152
+ node_meta = node.metadata or {}
153
+ node_meta = ensure_json_dict(node_meta)
154
+ sample_name = node_meta.get(sample_name_key, f"{node_id}")
155
+
156
+ for source in sources:
157
+ if source == "individual":
158
+ if node.individual == tskit.NULL:
159
+ continue
160
+ meta = ts.individual(node.individual).metadata
161
+ meta = meta or {}
162
+ meta = ensure_json_dict(meta)
163
+
164
+ elif source == "node":
165
+ meta = node_meta
166
+
167
+ elif source == "population":
168
+ if node.population == tskit.NULL:
169
+ continue
170
+ meta = ts.population(node.population).metadata
171
+ meta = meta or {}
172
+ meta = ensure_json_dict(meta)
173
+
174
+ else:
175
+ continue
176
+
177
+ if not meta:
178
+ continue
179
+
180
+ if key in meta:
181
+ meta_value = meta[key]
182
+ if meta_value is None:
183
+ break # Skip None values
184
+ if isinstance(meta_value, (list, dict)):
185
+ meta_value = repr(meta_value)
186
+ if str(meta_value) == str(value):
187
+ matching_samples.append(sample_name)
188
+ break
189
+
190
+ return matching_samples
191
+
192
+
193
+ def _get_sample_metadata_value(ts, node_id, key, sources, sample_name_key="name"):
194
+ """
195
+ Helper to get a specific metadata value for a sample node.
196
+ Returns (sample_name, value) tuple.
197
+ """
198
+ node = ts.node(node_id)
199
+ node_meta = node.metadata or {}
200
+ node_meta = ensure_json_dict(node_meta)
201
+ sample_name = node_meta.get(sample_name_key, f"{node_id}")
202
+
203
+ # Special handling for "sample" key: it is not a real metadata field in tskit.
204
+ # Treat it as identity so "sample" searches/highlights match by sample name.
205
+ if key == "sample":
206
+ sample_name = str(sample_name)
207
+ return (sample_name, sample_name)
208
+
209
+ for source in sources:
210
+ if source == "individual":
211
+ if node.individual == tskit.NULL:
212
+ continue
213
+ meta = ts.individual(node.individual).metadata
214
+ meta = meta or {}
215
+ meta = ensure_json_dict(meta)
216
+
217
+ elif source == "node":
218
+ meta = node_meta
219
+
220
+ elif source == "population":
221
+ if node.population == tskit.NULL:
222
+ continue
223
+ meta = ts.population(node.population).metadata
224
+ meta = meta or {}
225
+ meta = ensure_json_dict(meta)
226
+
227
+ else:
228
+ continue
229
+
230
+ if not meta:
231
+ continue
232
+
233
+ if key in meta:
234
+ value = meta[key]
235
+ if isinstance(value, (list, dict)):
236
+ value = repr(value)
237
+ return (sample_name, value)
238
+
239
+ return (sample_name, None)
240
+
241
+
242
+ def get_metadata_array_for_key(
243
+ ctx,
244
+ key,
245
+ sources=("individual", "node", "population"),
246
+ sample_name_key="name"
247
+ ):
248
+ """
249
+ Build efficient array-based metadata for a key using PyArrow.
250
+
251
+ Returns indices array where indices[i] is the index into unique_values
252
+ for the i-th sample (ordered by node_id from ts.samples()).
253
+
254
+ Parameters
255
+ ----------
256
+ ctx : FileContext
257
+ The file context containing tree_sequence and metadata cache
258
+ key : str
259
+ The metadata key to extract
260
+ sources : tuple
261
+ Any of ("individual", "node", "population")
262
+ sample_name_key : str
263
+ Key in node metadata used as sample name
264
+
265
+ Returns
266
+ -------
267
+ dict
268
+ {
269
+ 'unique_values': [val0, val1, ...], # Index i -> value string
270
+ 'sample_node_ids': [node_id0, node_id1, ...], # Sample order
271
+ 'arrow_buffer': bytes # PyArrow IPC serialized indices
272
+ }
273
+ """
274
+ cache_key = f"{key}:array"
275
+ cached = ctx.get_metadata(cache_key)
276
+ if cached is not None:
277
+ print(f"✅ Using cached metadata array for key: {key}")
278
+ return cached
279
+
280
+ ts = ctx.tree_sequence
281
+ sample_ids = list(ts.samples())
282
+ n_samples = len(sample_ids)
283
+
284
+ # Special handling for "sample" key - each sample's name is its own unique value
285
+ if key == "sample":
286
+ unique_values = []
287
+ value_to_idx = {}
288
+ indices = np.zeros(n_samples, dtype=np.uint32)
289
+
290
+ for i, node_id in enumerate(sample_ids):
291
+ node = ts.node(node_id)
292
+ node_meta = node.metadata or {}
293
+ try:
294
+ node_meta = ensure_json_dict(node_meta)
295
+ except (TypeError, json.JSONDecodeError):
296
+ node_meta = {}
297
+ sample_name = str(node_meta.get(sample_name_key, f"{node_id}"))
298
+
299
+ if sample_name not in value_to_idx:
300
+ value_to_idx[sample_name] = len(unique_values)
301
+ unique_values.append(sample_name)
302
+
303
+ indices[i] = value_to_idx[sample_name]
304
+
305
+ # Serialize to Arrow IPC format
306
+ table = pa.table({'idx': pa.array(indices, type=pa.uint32())})
307
+ sink = pa.BufferOutputStream()
308
+ writer = pa.ipc.new_stream(sink, table.schema)
309
+ writer.write_table(table)
310
+ writer.close()
311
+
312
+ result = {
313
+ 'unique_values': unique_values,
314
+ 'sample_node_ids': [int(x) for x in sample_ids],
315
+ 'arrow_buffer': sink.getvalue().to_pybytes()
316
+ }
317
+ ctx.set_metadata(cache_key, result)
318
+ print(f"✅ Built sample metadata array ({n_samples} samples, {len(unique_values)} unique values)")
319
+ return result
320
+
321
+ unique_values = []
322
+ value_to_idx = {}
323
+ indices = np.zeros(n_samples, dtype=np.uint32)
324
+
325
+ for i, node_id in enumerate(sample_ids):
326
+ sample_name, value = _get_sample_metadata_value(ts, node_id, key, sources, sample_name_key)
327
+
328
+ if value is None:
329
+ value = "" # Handle missing values
330
+
331
+ value_str = str(value)
332
+
333
+ if value_str not in value_to_idx:
334
+ value_to_idx[value_str] = len(unique_values)
335
+ unique_values.append(value_str)
336
+
337
+ indices[i] = value_to_idx[value_str]
338
+
339
+ # Serialize to Arrow IPC format
340
+ table = pa.table({'idx': pa.array(indices, type=pa.uint32())})
341
+ sink = pa.BufferOutputStream()
342
+ writer = pa.ipc.new_stream(sink, table.schema)
343
+ writer.write_table(table)
344
+ writer.close()
345
+
346
+ result = {
347
+ 'unique_values': unique_values,
348
+ 'sample_node_ids': [int(x) for x in sample_ids], # Convert to Python int for JSON
349
+ 'arrow_buffer': sink.getvalue().to_pybytes()
350
+ }
351
+
352
+ ctx.set_metadata(cache_key, result)
353
+ print(f"✅ Built metadata array for key: {key} ({n_samples} samples, {len(unique_values)} unique values)")
354
+ return result
355
+
356
+
357
+ def get_metadata_schema(
358
+ ts,
359
+ sources=("individual", "node", "population"),
360
+ sample_name_key="name"
361
+ ):
362
+ """
363
+ Extract metadata keys only (values are fetched on-demand via get_metadata_array_for_key).
364
+
365
+ Also includes "sample" as the first key, where each sample's name/ID is its
366
+ own unique value (for coloring samples individually).
367
+
368
+ Parameters
369
+ ----------
370
+ ts : tskit.TreeSequence
371
+ The tree sequence (not FileContext - this doesn't need caching)
372
+ sources : tuple
373
+ Any of ("individual", "node", "population")
374
+ sample_name_key : str
375
+ Key in node metadata used as sample name
376
+
377
+ Returns
378
+ -------
379
+ dict
380
+ {
381
+ "metadata_keys": [key1, key2, ...]
382
+ }
383
+ """
384
+ keys = set()
385
+
386
+ for node_id in ts.samples():
387
+ node = ts.node(node_id)
388
+
389
+ # Parse node metadata
390
+ node_meta = node.metadata or {}
391
+ try:
392
+ node_meta = ensure_json_dict(node_meta)
393
+ except (TypeError, json.JSONDecodeError):
394
+ node_meta = {}
395
+
396
+ for source in sources:
397
+ if source == "individual":
398
+ if node.individual == tskit.NULL:
399
+ continue
400
+ meta = ts.individual(node.individual).metadata
401
+ meta = meta or {}
402
+ meta = ensure_json_dict(meta)
403
+
404
+ elif source == "node":
405
+ meta = node_meta # Reuse already parsed node metadata
406
+
407
+ elif source == "population":
408
+ if node.population == tskit.NULL:
409
+ continue
410
+ meta = ts.population(node.population).metadata
411
+ meta = meta or {}
412
+ meta = ensure_json_dict(meta)
413
+
414
+ else:
415
+ raise ValueError(f"Unknown source: {source}")
416
+
417
+ if not meta:
418
+ continue
419
+
420
+ for key in meta.keys():
421
+ keys.add(key)
422
+
423
+ # Prepend "sample" to keys - this makes it the default colorBy option
424
+ return {
425
+ "metadata_keys": ["sample"] + sorted(list(keys))
426
+ }
@@ -0,0 +1,146 @@
1
+
2
+ import numpy as np
3
+ import tskit
4
+
5
+ def get_mutations_in_window(ts, start, end, offset=0, limit=1000):
6
+ """
7
+ Get mutations within a genomic interval [start, end) with pagination.
8
+
9
+ Args:
10
+ ts: tskit.TreeSequence
11
+ start: Start genomic position (bp)
12
+ end: End genomic position (bp)
13
+ offset: Number of mutations to skip (for pagination)
14
+ limit: Maximum number of mutations to return
15
+
16
+ Returns:
17
+ dict with:
18
+ - 'mutations': list of mutation dicts
19
+ - 'total_count': total mutations in window (for pagination)
20
+ - 'has_more': whether there are more mutations
21
+ """
22
+ t = ts.tables
23
+ sites = t.sites
24
+ mutations = t.mutations
25
+
26
+ # Get positions for all mutations via their sites
27
+ positions = sites.position[mutations.site]
28
+
29
+ # Create mask for mutations in the window
30
+ mask = (positions >= start) & (positions < end)
31
+ indices = np.where(mask)[0]
32
+
33
+ total_count = len(indices)
34
+
35
+ # Apply pagination
36
+ paginated_indices = indices[offset:offset + limit]
37
+
38
+ # Extract mutation data
39
+ result_mutations = []
40
+ for idx in paginated_indices:
41
+ mut = mutations[idx]
42
+ site = sites[mut.site]
43
+ position = int(site.position)
44
+ site_id = int(mut.site)
45
+ node_id = int(mut.node)
46
+
47
+ # Get ancestral and derived states
48
+ ancestral_state = site.ancestral_state
49
+ derived_state = mut.derived_state
50
+
51
+ result_mutations.append({
52
+ 'position': position,
53
+ 'mutation': f"{ancestral_state}->{derived_state}",
54
+ 'node_id': node_id,
55
+ 'site_id': site_id,
56
+ 'ancestral_state': ancestral_state,
57
+ 'derived_state': derived_state,
58
+ })
59
+
60
+ return {
61
+ 'mutations': result_mutations,
62
+ 'total_count': total_count,
63
+ 'has_more': offset + limit < total_count
64
+ }
65
+
66
+
67
+ def search_mutations_by_position(ts, position, range_bp=5000, offset=0, limit=1000):
68
+ """
69
+ Search for mutations around a specific position.
70
+
71
+ Args:
72
+ ts: tskit.TreeSequence
73
+ position: Center position to search around (bp)
74
+ range_bp: Total range to search (searches +/- range_bp/2 around position)
75
+ offset: Number of mutations to skip (for pagination)
76
+ limit: Maximum number of mutations to return
77
+
78
+ Returns:
79
+ dict with:
80
+ - 'mutations': list of mutation dicts sorted by distance from position
81
+ - 'total_count': total mutations in search range
82
+ - 'has_more': whether there are more mutations
83
+ - 'search_start': actual start of search range
84
+ - 'search_end': actual end of search range
85
+ """
86
+ half_range = range_bp // 2
87
+ search_start = max(0, position - half_range)
88
+ search_end = min(ts.sequence_length, position + half_range)
89
+
90
+ t = ts.tables
91
+ sites = t.sites
92
+ mutations = t.mutations
93
+
94
+ # Get positions for all mutations via their sites
95
+ positions = sites.position[mutations.site]
96
+
97
+ # Create mask for mutations in the search range
98
+ mask = (positions >= search_start) & (positions < search_end)
99
+ indices = np.where(mask)[0]
100
+
101
+ # Calculate distances and sort by distance
102
+ if len(indices) > 0:
103
+ mutation_positions = positions[indices]
104
+ distances = np.abs(mutation_positions - position)
105
+ sorted_order = np.argsort(distances)
106
+ indices = indices[sorted_order]
107
+ sorted_distances = distances[sorted_order]
108
+ else:
109
+ sorted_distances = np.array([])
110
+
111
+ total_count = len(indices)
112
+
113
+ # Apply pagination
114
+ paginated_indices = indices[offset:offset + limit]
115
+ paginated_distances = sorted_distances[offset:offset + limit] if len(sorted_distances) > 0 else []
116
+
117
+ # Extract mutation data
118
+ result_mutations = []
119
+ for i, idx in enumerate(paginated_indices):
120
+ mut = mutations[idx]
121
+ site = sites[mut.site]
122
+ mut_position = int(site.position)
123
+ site_id = int(mut.site)
124
+ node_id = int(mut.node)
125
+
126
+ # Get ancestral and derived states
127
+ ancestral_state = site.ancestral_state
128
+ derived_state = mut.derived_state
129
+
130
+ result_mutations.append({
131
+ 'position': mut_position,
132
+ 'mutation': f"{ancestral_state}->{derived_state}",
133
+ 'node_id': node_id,
134
+ 'site_id': site_id,
135
+ 'ancestral_state': ancestral_state,
136
+ 'derived_state': derived_state,
137
+ 'distance': int(paginated_distances[i]) if i < len(paginated_distances) else 0,
138
+ })
139
+
140
+ return {
141
+ 'mutations': result_mutations,
142
+ 'total_count': total_count,
143
+ 'has_more': offset + limit < total_count,
144
+ 'search_start': int(search_start),
145
+ 'search_end': int(search_end),
146
+ }