lorax-arg 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. lorax/buffer.py +43 -0
  2. lorax/cache/__init__.py +43 -0
  3. lorax/cache/csv_tree_graph.py +59 -0
  4. lorax/cache/disk.py +467 -0
  5. lorax/cache/file_cache.py +142 -0
  6. lorax/cache/file_context.py +72 -0
  7. lorax/cache/lru.py +90 -0
  8. lorax/cache/tree_graph.py +293 -0
  9. lorax/cli.py +312 -0
  10. lorax/cloud/__init__.py +0 -0
  11. lorax/cloud/gcs_utils.py +205 -0
  12. lorax/constants.py +66 -0
  13. lorax/context.py +80 -0
  14. lorax/csv/__init__.py +7 -0
  15. lorax/csv/config.py +250 -0
  16. lorax/csv/layout.py +182 -0
  17. lorax/csv/newick_tree.py +234 -0
  18. lorax/handlers.py +998 -0
  19. lorax/lineage.py +456 -0
  20. lorax/loaders/__init__.py +0 -0
  21. lorax/loaders/csv_loader.py +10 -0
  22. lorax/loaders/loader.py +31 -0
  23. lorax/loaders/tskit_loader.py +119 -0
  24. lorax/lorax_app.py +75 -0
  25. lorax/manager.py +58 -0
  26. lorax/metadata/__init__.py +0 -0
  27. lorax/metadata/loader.py +426 -0
  28. lorax/metadata/mutations.py +146 -0
  29. lorax/modes.py +190 -0
  30. lorax/pg.py +183 -0
  31. lorax/redis_utils.py +30 -0
  32. lorax/routes.py +137 -0
  33. lorax/session_manager.py +206 -0
  34. lorax/sockets/__init__.py +55 -0
  35. lorax/sockets/connection.py +99 -0
  36. lorax/sockets/debug.py +47 -0
  37. lorax/sockets/decorators.py +112 -0
  38. lorax/sockets/file_ops.py +200 -0
  39. lorax/sockets/lineage.py +307 -0
  40. lorax/sockets/metadata.py +232 -0
  41. lorax/sockets/mutations.py +154 -0
  42. lorax/sockets/node_search.py +535 -0
  43. lorax/sockets/tree_layout.py +117 -0
  44. lorax/sockets/utils.py +10 -0
  45. lorax/tree_graph/__init__.py +12 -0
  46. lorax/tree_graph/tree_graph.py +689 -0
  47. lorax/utils.py +124 -0
  48. lorax_app/__init__.py +4 -0
  49. lorax_app/app.py +159 -0
  50. lorax_app/cli.py +114 -0
  51. lorax_app/static/X.png +0 -0
  52. lorax_app/static/assets/index-BCEGlUFi.js +2361 -0
  53. lorax_app/static/assets/index-iKjzUpA9.css +1 -0
  54. lorax_app/static/assets/localBackendWorker-BaWwjSV_.js +2 -0
  55. lorax_app/static/assets/renderDataWorker-BKLdiU7J.js +2 -0
  56. lorax_app/static/gestures/gesture-flick.ogv +0 -0
  57. lorax_app/static/gestures/gesture-two-finger-scroll.ogv +0 -0
  58. lorax_app/static/index.html +14 -0
  59. lorax_app/static/logo.png +0 -0
  60. lorax_app/static/lorax-logo.png +0 -0
  61. lorax_app/static/vite.svg +1 -0
  62. lorax_arg-0.1.dist-info/METADATA +131 -0
  63. lorax_arg-0.1.dist-info/RECORD +66 -0
  64. lorax_arg-0.1.dist-info/WHEEL +5 -0
  65. lorax_arg-0.1.dist-info/entry_points.txt +4 -0
  66. lorax_arg-0.1.dist-info/top_level.txt +2 -0
@@ -0,0 +1,232 @@
1
+ """
2
+ Metadata event handlers for Lorax Socket.IO.
3
+
4
+ Handles fetch_metadata_for_key, search_metadata, and fetch_metadata_array events.
5
+ """
6
+
7
+ import asyncio
8
+
9
+ from lorax.constants import ERROR_NO_FILE_LOADED
10
+ from lorax.metadata.loader import (
11
+ get_metadata_for_key, search_samples_by_metadata, get_metadata_array_for_key
12
+ )
13
+ from lorax.cache import get_file_context
14
+ from lorax.sockets.decorators import require_session
15
+ from lorax.sockets.utils import is_csv_session_file
16
+
17
+
18
+ def register_metadata_events(sio):
19
+ """Register metadata-related socket events."""
20
+
21
+ @sio.event
22
+ async def fetch_metadata_for_key(sid, data):
23
+ """Socket event to fetch metadata mapping for a specific key."""
24
+ try:
25
+ lorax_sid = data.get("lorax_sid")
26
+ session = await require_session(lorax_sid, sid, sio)
27
+ if not session:
28
+ return
29
+
30
+ if not session.file_path:
31
+ print(f"⚠️ No file loaded for session {lorax_sid}")
32
+ await sio.emit("error", {
33
+ "code": ERROR_NO_FILE_LOADED,
34
+ "message": "No file loaded. Please load a file first."
35
+ }, to=sid)
36
+ return
37
+
38
+ if is_csv_session_file(session.file_path):
39
+ key = data.get("key")
40
+ if key == "sample":
41
+ ctx = await get_file_context(session.file_path)
42
+ if ctx is None:
43
+ await sio.emit("metadata-key-result", {"error": "Failed to load CSV"}, to=sid)
44
+ return
45
+ # sample_names is {name: {"sample_name": name}, ...}
46
+ sample_names = ctx.config.get("sample_names", {})
47
+ # Return {name: name} like tskit's "sample" key
48
+ result = {name: name for name in sample_names.keys()}
49
+ await sio.emit("metadata-key-result", {"key": key, "data": result}, to=sid)
50
+ return
51
+ else:
52
+ await sio.emit("metadata-key-result", {
53
+ "error": f"Metadata key '{key}' is not supported for CSV files."
54
+ }, to=sid)
55
+ return
56
+
57
+ key = data.get("key")
58
+ if not key:
59
+ await sio.emit("metadata-key-result", {
60
+ "error": "Missing 'key' parameter"
61
+ }, to=sid)
62
+ return
63
+
64
+ ctx = await get_file_context(session.file_path)
65
+ if ctx is None:
66
+ await sio.emit("metadata-key-result", {
67
+ "error": "Failed to load tree sequence"
68
+ }, to=sid)
69
+ return
70
+
71
+ # Pass FileContext to metadata function
72
+ result = await asyncio.to_thread(get_metadata_for_key, ctx, key)
73
+ await sio.emit("metadata-key-result", {"key": key, "data": result}, to=sid)
74
+ except Exception as e:
75
+ print(f"❌ Metadata fetch error: {e}")
76
+ await sio.emit("metadata-key-result", {"error": str(e)}, to=sid)
77
+
78
+ @sio.event
79
+ async def search_metadata(sid, data):
80
+ """Socket event to search for samples matching a metadata value."""
81
+ try:
82
+ lorax_sid = data.get("lorax_sid")
83
+ session = await require_session(lorax_sid, sid, sio)
84
+ if not session:
85
+ return
86
+
87
+ if not session.file_path:
88
+ print(f"⚠️ No file loaded for session {lorax_sid}")
89
+ await sio.emit("error", {
90
+ "code": ERROR_NO_FILE_LOADED,
91
+ "message": "No file loaded. Please load a file first."
92
+ }, to=sid)
93
+ return
94
+
95
+ if is_csv_session_file(session.file_path):
96
+ key = data.get("key")
97
+ value = data.get("value")
98
+ if key == "sample":
99
+ ctx = await get_file_context(session.file_path)
100
+ if ctx is None:
101
+ await sio.emit("search-result", {"error": "Failed to load CSV"}, to=sid)
102
+ return
103
+ sample_names = ctx.config.get("sample_names", {})
104
+ # Return matching sample names (exact match)
105
+ matching = [name for name in sample_names.keys() if name == value]
106
+ await sio.emit("search-result", {"key": key, "value": value, "samples": matching}, to=sid)
107
+ return
108
+ else:
109
+ await sio.emit("search-result", {
110
+ "error": f"Metadata key '{key}' is not supported for CSV files."
111
+ }, to=sid)
112
+ return
113
+
114
+ key = data.get("key")
115
+ value = data.get("value")
116
+
117
+ if not key or value is None:
118
+ await sio.emit("search-result", {
119
+ "error": "Missing 'key' or 'value' parameter"
120
+ }, to=sid)
121
+ return
122
+
123
+ ctx = await get_file_context(session.file_path)
124
+ if ctx is None:
125
+ await sio.emit("search-result", {
126
+ "error": "Failed to load tree sequence"
127
+ }, to=sid)
128
+ return
129
+
130
+ # Pass FileContext to metadata function
131
+ result = await asyncio.to_thread(
132
+ search_samples_by_metadata, ctx, key, value
133
+ )
134
+ await sio.emit("search-result", {
135
+ "key": key,
136
+ "value": value,
137
+ "samples": result
138
+ }, to=sid)
139
+ except Exception as e:
140
+ print(f"❌ Search error: {e}")
141
+ await sio.emit("search-result", {"error": str(e)}, to=sid)
142
+
143
+ @sio.event
144
+ async def fetch_metadata_array(sid, data):
145
+ """Socket event to fetch metadata as efficient PyArrow array format.
146
+
147
+ This is optimized for large tree sequences (1M+ samples) where JSON
148
+ serialization would be too slow/large. Returns binary Arrow IPC data
149
+ with indices that map node_id -> value index.
150
+ """
151
+ try:
152
+ lorax_sid = data.get("lorax_sid")
153
+ session = await require_session(lorax_sid, sid, sio)
154
+ if not session:
155
+ return
156
+
157
+ if not session.file_path:
158
+ print(f"⚠️ No file loaded for session {lorax_sid}")
159
+ await sio.emit("error", {
160
+ "code": ERROR_NO_FILE_LOADED,
161
+ "message": "No file loaded. Please load a file first."
162
+ }, to=sid)
163
+ return
164
+
165
+ if is_csv_session_file(session.file_path):
166
+ key = data.get("key")
167
+ if key == "sample":
168
+ ctx = await get_file_context(session.file_path)
169
+ if ctx is None:
170
+ await sio.emit("metadata-array-result", {"error": "Failed to load CSV"}, to=sid)
171
+ return
172
+
173
+ sample_names = ctx.config.get("sample_names", {})
174
+ names_list = list(sample_names.keys())
175
+
176
+ # Build PyArrow array where each sample maps to its own unique index
177
+ import numpy as np
178
+ import pyarrow as pa
179
+
180
+ unique_values = names_list
181
+ indices = np.arange(len(names_list), dtype=np.uint32)
182
+
183
+ # Create Arrow IPC buffer
184
+ table = pa.table({'idx': pa.array(indices, type=pa.uint32())})
185
+ sink = pa.BufferOutputStream()
186
+ writer = pa.ipc.new_stream(sink, table.schema)
187
+ writer.write_table(table)
188
+ writer.close()
189
+
190
+ await sio.emit("metadata-array-result", {
191
+ "key": key,
192
+ "unique_values": unique_values,
193
+ "sample_node_ids": list(range(len(names_list))), # Sequential indices for CSV
194
+ "buffer": sink.getvalue().to_pybytes()
195
+ }, to=sid)
196
+ return
197
+ else:
198
+ await sio.emit("metadata-array-result", {
199
+ "error": f"Metadata key '{key}' is not supported for CSV files."
200
+ }, to=sid)
201
+ return
202
+
203
+ key = data.get("key")
204
+ if not key:
205
+ await sio.emit("metadata-array-result", {
206
+ "error": "Missing 'key' parameter"
207
+ }, to=sid)
208
+ return
209
+
210
+ ctx = await get_file_context(session.file_path)
211
+ if ctx is None:
212
+ await sio.emit("metadata-array-result", {
213
+ "error": "Failed to load tree sequence"
214
+ }, to=sid)
215
+ return
216
+
217
+ # Pass FileContext to metadata function
218
+ result = await asyncio.to_thread(
219
+ get_metadata_array_for_key, ctx, key
220
+ )
221
+
222
+ # Send metadata with Arrow buffer as binary
223
+ await sio.emit("metadata-array-result", {
224
+ "key": key,
225
+ "unique_values": result['unique_values'],
226
+ "sample_node_ids": result['sample_node_ids'],
227
+ "buffer": result['arrow_buffer'] # Binary data
228
+ }, to=sid)
229
+
230
+ except Exception as e:
231
+ print(f"❌ Metadata array fetch error: {e}")
232
+ await sio.emit("metadata-array-result", {"error": str(e)}, to=sid)
@@ -0,0 +1,154 @@
1
+ """
2
+ Mutation event handlers for Lorax Socket.IO.
3
+
4
+ Handles query_mutations_window and search_mutations events.
5
+ """
6
+
7
+ import asyncio
8
+
9
+ from lorax.constants import ERROR_NO_FILE_LOADED
10
+ from lorax.metadata.mutations import (
11
+ get_mutations_in_window, search_mutations_by_position
12
+ )
13
+ from lorax.buffer import mutations_to_arrow_buffer
14
+ from lorax.cache import get_file_context
15
+ from lorax.sockets.decorators import require_session
16
+ from lorax.sockets.utils import is_csv_session_file
17
+
18
+
19
+ def register_mutations_events(sio):
20
+ """Register mutation-related socket events."""
21
+
22
+ @sio.event
23
+ async def query_mutations_window(sid, data):
24
+ """Socket event to fetch mutations within a genomic window.
25
+
26
+ Returns PyArrow IPC binary data with mutations in the specified range.
27
+ Supports pagination via offset and limit parameters.
28
+ """
29
+ try:
30
+ lorax_sid = data.get("lorax_sid")
31
+ session = await require_session(lorax_sid, sid, sio)
32
+ if not session:
33
+ return
34
+
35
+ if not session.file_path:
36
+ print(f"⚠️ No file loaded for session {lorax_sid}")
37
+ await sio.emit("error", {
38
+ "code": ERROR_NO_FILE_LOADED,
39
+ "message": "No file loaded. Please load a file first."
40
+ }, to=sid)
41
+ return
42
+
43
+ if is_csv_session_file(session.file_path):
44
+ await sio.emit("mutations-window-result", {
45
+ "error": "Mutations are not supported for CSV yet."
46
+ }, to=sid)
47
+ return
48
+
49
+ start = data.get("start", 0)
50
+ end = data.get("end", 0)
51
+ offset = data.get("offset", 0)
52
+ limit = data.get("limit", 1000)
53
+
54
+ ctx = await get_file_context(session.file_path)
55
+ if ctx is None:
56
+ await sio.emit("mutations-window-result", {
57
+ "error": "Failed to load tree sequence"
58
+ }, to=sid)
59
+ return
60
+
61
+ ts = ctx.tree_sequence
62
+
63
+ # Get mutations in the window
64
+ result = await asyncio.to_thread(
65
+ get_mutations_in_window, ts, start, end, offset, limit
66
+ )
67
+
68
+ # Convert to PyArrow buffer
69
+ buffer = await asyncio.to_thread(mutations_to_arrow_buffer, result)
70
+
71
+ await sio.emit("mutations-window-result", {
72
+ "buffer": buffer,
73
+ "total_count": result['total_count'],
74
+ "has_more": result['has_more'],
75
+ "start": start,
76
+ "end": end,
77
+ "offset": offset,
78
+ "limit": limit
79
+ }, to=sid)
80
+
81
+ except Exception as e:
82
+ print(f"❌ Mutations window query error: {e}")
83
+ await sio.emit("mutations-window-result", {"error": str(e)}, to=sid)
84
+
85
+ @sio.event
86
+ async def search_mutations(sid, data):
87
+ """Socket event to search mutations by position with configurable range.
88
+
89
+ Returns PyArrow IPC binary data with mutations sorted by distance from position.
90
+ Supports pagination via offset and limit parameters.
91
+ """
92
+ try:
93
+ lorax_sid = data.get("lorax_sid")
94
+ session = await require_session(lorax_sid, sid, sio)
95
+ if not session:
96
+ return
97
+
98
+ if not session.file_path:
99
+ print(f"⚠️ No file loaded for session {lorax_sid}")
100
+ await sio.emit("error", {
101
+ "code": ERROR_NO_FILE_LOADED,
102
+ "message": "No file loaded. Please load a file first."
103
+ }, to=sid)
104
+ return
105
+
106
+ if is_csv_session_file(session.file_path):
107
+ await sio.emit("mutations-search-result", {
108
+ "error": "Mutations search is not supported for CSV yet."
109
+ }, to=sid)
110
+ return
111
+
112
+ position = data.get("position")
113
+ if position is None:
114
+ await sio.emit("mutations-search-result", {
115
+ "error": "Missing 'position' parameter"
116
+ }, to=sid)
117
+ return
118
+
119
+ range_bp = data.get("range_bp", 5000)
120
+ offset = data.get("offset", 0)
121
+ limit = data.get("limit", 1000)
122
+
123
+ ctx = await get_file_context(session.file_path)
124
+ if ctx is None:
125
+ await sio.emit("mutations-search-result", {
126
+ "error": "Failed to load tree sequence"
127
+ }, to=sid)
128
+ return
129
+
130
+ ts = ctx.tree_sequence
131
+
132
+ # Search mutations around the position
133
+ result = await asyncio.to_thread(
134
+ search_mutations_by_position, ts, position, range_bp, offset, limit
135
+ )
136
+
137
+ # Convert to PyArrow buffer
138
+ buffer = await asyncio.to_thread(mutations_to_arrow_buffer, result)
139
+
140
+ await sio.emit("mutations-search-result", {
141
+ "buffer": buffer,
142
+ "total_count": result['total_count'],
143
+ "has_more": result['has_more'],
144
+ "search_start": result['search_start'],
145
+ "search_end": result['search_end'],
146
+ "position": position,
147
+ "range_bp": range_bp,
148
+ "offset": offset,
149
+ "limit": limit
150
+ }, to=sid)
151
+
152
+ except Exception as e:
153
+ print(f"❌ Mutations search error: {e}")
154
+ await sio.emit("mutations-search-result", {"error": str(e)}, to=sid)