lorax-arg 0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lorax/buffer.py +43 -0
- lorax/cache/__init__.py +43 -0
- lorax/cache/csv_tree_graph.py +59 -0
- lorax/cache/disk.py +467 -0
- lorax/cache/file_cache.py +142 -0
- lorax/cache/file_context.py +72 -0
- lorax/cache/lru.py +90 -0
- lorax/cache/tree_graph.py +293 -0
- lorax/cli.py +312 -0
- lorax/cloud/__init__.py +0 -0
- lorax/cloud/gcs_utils.py +205 -0
- lorax/constants.py +66 -0
- lorax/context.py +80 -0
- lorax/csv/__init__.py +7 -0
- lorax/csv/config.py +250 -0
- lorax/csv/layout.py +182 -0
- lorax/csv/newick_tree.py +234 -0
- lorax/handlers.py +998 -0
- lorax/lineage.py +456 -0
- lorax/loaders/__init__.py +0 -0
- lorax/loaders/csv_loader.py +10 -0
- lorax/loaders/loader.py +31 -0
- lorax/loaders/tskit_loader.py +119 -0
- lorax/lorax_app.py +75 -0
- lorax/manager.py +58 -0
- lorax/metadata/__init__.py +0 -0
- lorax/metadata/loader.py +426 -0
- lorax/metadata/mutations.py +146 -0
- lorax/modes.py +190 -0
- lorax/pg.py +183 -0
- lorax/redis_utils.py +30 -0
- lorax/routes.py +137 -0
- lorax/session_manager.py +206 -0
- lorax/sockets/__init__.py +55 -0
- lorax/sockets/connection.py +99 -0
- lorax/sockets/debug.py +47 -0
- lorax/sockets/decorators.py +112 -0
- lorax/sockets/file_ops.py +200 -0
- lorax/sockets/lineage.py +307 -0
- lorax/sockets/metadata.py +232 -0
- lorax/sockets/mutations.py +154 -0
- lorax/sockets/node_search.py +535 -0
- lorax/sockets/tree_layout.py +117 -0
- lorax/sockets/utils.py +10 -0
- lorax/tree_graph/__init__.py +12 -0
- lorax/tree_graph/tree_graph.py +689 -0
- lorax/utils.py +124 -0
- lorax_app/__init__.py +4 -0
- lorax_app/app.py +159 -0
- lorax_app/cli.py +114 -0
- lorax_app/static/X.png +0 -0
- lorax_app/static/assets/index-BCEGlUFi.js +2361 -0
- lorax_app/static/assets/index-iKjzUpA9.css +1 -0
- lorax_app/static/assets/localBackendWorker-BaWwjSV_.js +2 -0
- lorax_app/static/assets/renderDataWorker-BKLdiU7J.js +2 -0
- lorax_app/static/gestures/gesture-flick.ogv +0 -0
- lorax_app/static/gestures/gesture-two-finger-scroll.ogv +0 -0
- lorax_app/static/index.html +14 -0
- lorax_app/static/logo.png +0 -0
- lorax_app/static/lorax-logo.png +0 -0
- lorax_app/static/vite.svg +1 -0
- lorax_arg-0.1.dist-info/METADATA +131 -0
- lorax_arg-0.1.dist-info/RECORD +66 -0
- lorax_arg-0.1.dist-info/WHEEL +5 -0
- lorax_arg-0.1.dist-info/entry_points.txt +4 -0
- lorax_arg-0.1.dist-info/top_level.txt +2 -0
lorax/cli.py
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lorax CLI - Command-line interface for the Lorax backend server.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
lorax serve --reload # Development mode with auto-reload
|
|
6
|
+
lorax serve --host 0.0.0.0 --port 8080 --reload
|
|
7
|
+
lorax serve --gunicorn --workers 4 # Production mode with gunicorn
|
|
8
|
+
lorax config show # Show current configuration
|
|
9
|
+
lorax config set cache.ts_cache_size 3 # Set a config value
|
|
10
|
+
"""
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
import json
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
import click
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Default config path for local/conda mode
|
|
19
|
+
DEFAULT_CONFIG_PATH = Path.home() / ".lorax" / "config.yaml"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@click.group()
|
|
23
|
+
@click.version_option(version="1.0.0", prog_name="lorax")
|
|
24
|
+
def main():
|
|
25
|
+
"""Lorax Backend CLI - Tree visualization and analysis server."""
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@main.group()
|
|
30
|
+
def config():
|
|
31
|
+
"""Manage Lorax configuration."""
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@config.command("show")
|
|
36
|
+
def config_show():
|
|
37
|
+
"""Show current configuration."""
|
|
38
|
+
from lorax.modes import CURRENT_MODE, CURRENT_CONFIG, get_cache_dir, get_uploads_dir
|
|
39
|
+
|
|
40
|
+
click.echo(f"Mode: {CURRENT_MODE}")
|
|
41
|
+
click.echo(f"")
|
|
42
|
+
click.echo("Cache Configuration:")
|
|
43
|
+
click.echo(f" TS Cache Size: {CURRENT_CONFIG.ts_cache_size}")
|
|
44
|
+
click.echo(f" Config Cache Size: {CURRENT_CONFIG.config_cache_size}")
|
|
45
|
+
click.echo(f" Metadata Cache Size: {CURRENT_CONFIG.metadata_cache_size}")
|
|
46
|
+
click.echo(f"")
|
|
47
|
+
click.echo("Disk Cache:")
|
|
48
|
+
click.echo(f" Enabled: {CURRENT_CONFIG.disk_cache_enabled}")
|
|
49
|
+
click.echo(f" Max Size: {CURRENT_CONFIG.disk_cache_max_gb} GB")
|
|
50
|
+
click.echo(f" Directory: {get_cache_dir(CURRENT_CONFIG)}")
|
|
51
|
+
click.echo(f"")
|
|
52
|
+
click.echo("Connection Limits:")
|
|
53
|
+
click.echo(f" Max Sockets Per Session: {CURRENT_CONFIG.max_sockets_per_session}")
|
|
54
|
+
click.echo(f" Enforce Limits: {CURRENT_CONFIG.enforce_connection_limits}")
|
|
55
|
+
click.echo(f"")
|
|
56
|
+
click.echo("Directories:")
|
|
57
|
+
click.echo(f" Data Dir: {CURRENT_CONFIG.data_dir}")
|
|
58
|
+
click.echo(f" Uploads Dir: {get_uploads_dir(CURRENT_CONFIG)}")
|
|
59
|
+
click.echo(f"")
|
|
60
|
+
click.echo("Requirements:")
|
|
61
|
+
click.echo(f" Redis Required: {CURRENT_CONFIG.require_redis}")
|
|
62
|
+
click.echo(f" GCS Required: {CURRENT_CONFIG.require_gcs}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@config.command("init")
|
|
66
|
+
@click.option("--force", is_flag=True, help="Overwrite existing config file")
|
|
67
|
+
def config_init(force: bool):
|
|
68
|
+
"""Initialize a config file for local mode."""
|
|
69
|
+
config_path = DEFAULT_CONFIG_PATH
|
|
70
|
+
|
|
71
|
+
if config_path.exists() and not force:
|
|
72
|
+
click.echo(f"Config file already exists: {config_path}")
|
|
73
|
+
click.echo("Use --force to overwrite")
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
# Create default config
|
|
77
|
+
default_config = """# Lorax Local Configuration
|
|
78
|
+
# This file is used when running in local/conda mode
|
|
79
|
+
|
|
80
|
+
mode: local
|
|
81
|
+
|
|
82
|
+
data_dir: ~/.lorax
|
|
83
|
+
|
|
84
|
+
cache:
|
|
85
|
+
ts_cache_size: 5
|
|
86
|
+
config_cache_size: 2
|
|
87
|
+
metadata_cache_size: 10
|
|
88
|
+
|
|
89
|
+
disk_cache:
|
|
90
|
+
enabled: false
|
|
91
|
+
max_gb: 10
|
|
92
|
+
|
|
93
|
+
server:
|
|
94
|
+
host: 127.0.0.1
|
|
95
|
+
port: 8080
|
|
96
|
+
open_browser: true
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
100
|
+
config_path.write_text(default_config)
|
|
101
|
+
click.echo(f"Created config file: {config_path}")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@config.command("path")
|
|
105
|
+
def config_path():
|
|
106
|
+
"""Show config file path."""
|
|
107
|
+
click.echo(DEFAULT_CONFIG_PATH)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
@main.command("cache-status")
|
|
111
|
+
def cache_status():
|
|
112
|
+
"""Show cache statistics."""
|
|
113
|
+
import asyncio
|
|
114
|
+
from lorax.context import disk_cache_manager
|
|
115
|
+
|
|
116
|
+
async def get_stats():
|
|
117
|
+
return await disk_cache_manager.get_stats()
|
|
118
|
+
|
|
119
|
+
stats = asyncio.run(get_stats())
|
|
120
|
+
|
|
121
|
+
click.echo("Disk Cache Status:")
|
|
122
|
+
if not stats.get("enabled"):
|
|
123
|
+
click.echo(" Disabled")
|
|
124
|
+
else:
|
|
125
|
+
click.echo(f" Directory: {stats.get('cache_dir')}")
|
|
126
|
+
click.echo(f" Size: {stats.get('total_size_mb', 0)} MB / {stats.get('max_size_mb', 0)} MB ({stats.get('usage_percent', 0)}%)")
|
|
127
|
+
click.echo(f" Files: {stats.get('file_count', 0)}")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@main.command("cache-clear")
|
|
131
|
+
@click.confirmation_option(prompt="Are you sure you want to clear the disk cache?")
|
|
132
|
+
def cache_clear():
|
|
133
|
+
"""Clear the disk cache."""
|
|
134
|
+
import asyncio
|
|
135
|
+
from lorax.context import disk_cache_manager
|
|
136
|
+
|
|
137
|
+
async def clear():
|
|
138
|
+
await disk_cache_manager.clear()
|
|
139
|
+
|
|
140
|
+
asyncio.run(clear())
|
|
141
|
+
click.echo("Disk cache cleared")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
@main.command()
|
|
145
|
+
@click.option(
|
|
146
|
+
"--host",
|
|
147
|
+
default="127.0.0.1",
|
|
148
|
+
help="Host to bind to (default: 127.0.0.1 for local, 0.0.0.0 for production)"
|
|
149
|
+
)
|
|
150
|
+
@click.option(
|
|
151
|
+
"--port",
|
|
152
|
+
default=8080,
|
|
153
|
+
type=int,
|
|
154
|
+
help="Port to bind to (default: 8080)"
|
|
155
|
+
)
|
|
156
|
+
@click.option(
|
|
157
|
+
"--reload",
|
|
158
|
+
is_flag=True,
|
|
159
|
+
help="Enable auto-reload for development"
|
|
160
|
+
)
|
|
161
|
+
@click.option(
|
|
162
|
+
"--gunicorn",
|
|
163
|
+
is_flag=True,
|
|
164
|
+
help="Use gunicorn for production (requires lorax[prod])"
|
|
165
|
+
)
|
|
166
|
+
@click.option(
|
|
167
|
+
"--workers",
|
|
168
|
+
default=4,
|
|
169
|
+
type=int,
|
|
170
|
+
help="Number of gunicorn workers (default: 4, only used with --gunicorn)"
|
|
171
|
+
)
|
|
172
|
+
@click.option(
|
|
173
|
+
"--config",
|
|
174
|
+
default=None,
|
|
175
|
+
type=click.Path(exists=True),
|
|
176
|
+
help="Path to gunicorn config file (optional)"
|
|
177
|
+
)
|
|
178
|
+
@click.option(
|
|
179
|
+
"--open-browser",
|
|
180
|
+
is_flag=True,
|
|
181
|
+
help="Open browser automatically (for local mode)"
|
|
182
|
+
)
|
|
183
|
+
def serve(host: str, port: int, reload: bool, gunicorn: bool, workers: int, config: str, open_browser: bool):
|
|
184
|
+
"""Start the Lorax backend server.
|
|
185
|
+
|
|
186
|
+
Examples:
|
|
187
|
+
# Local mode (opens browser)
|
|
188
|
+
lorax serve --open-browser
|
|
189
|
+
|
|
190
|
+
# Development mode (with auto-reload)
|
|
191
|
+
lorax serve --reload
|
|
192
|
+
|
|
193
|
+
# Specify host/port
|
|
194
|
+
lorax serve --host 0.0.0.0 --port 8080 --reload
|
|
195
|
+
|
|
196
|
+
# Production mode (with gunicorn)
|
|
197
|
+
lorax serve --gunicorn --workers 4
|
|
198
|
+
"""
|
|
199
|
+
if gunicorn:
|
|
200
|
+
_run_with_gunicorn(host, port, workers, config)
|
|
201
|
+
else:
|
|
202
|
+
_run_with_uvicorn(host, port, reload, open_browser)
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _run_with_uvicorn(host: str, port: int, reload: bool, open_browser: bool = False):
|
|
206
|
+
"""Run the server with uvicorn (development/local mode)."""
|
|
207
|
+
import uvicorn
|
|
208
|
+
|
|
209
|
+
click.echo(f"Starting Lorax server with uvicorn on {host}:{port}")
|
|
210
|
+
if reload:
|
|
211
|
+
click.echo("Auto-reload enabled")
|
|
212
|
+
|
|
213
|
+
# Open browser if requested (in a separate thread to not block server start)
|
|
214
|
+
if open_browser:
|
|
215
|
+
import threading
|
|
216
|
+
import webbrowser
|
|
217
|
+
import time
|
|
218
|
+
|
|
219
|
+
def open_browser_delayed():
|
|
220
|
+
time.sleep(1.5) # Wait for server to start
|
|
221
|
+
url = f"http://{host}:{port}" if host != "0.0.0.0" else f"http://127.0.0.1:{port}"
|
|
222
|
+
click.echo(f"Opening browser: {url}")
|
|
223
|
+
webbrowser.open(url)
|
|
224
|
+
|
|
225
|
+
threading.Thread(target=open_browser_delayed, daemon=True).start()
|
|
226
|
+
|
|
227
|
+
uvicorn.run(
|
|
228
|
+
"lorax.lorax_app:sio_app",
|
|
229
|
+
host=host,
|
|
230
|
+
port=port,
|
|
231
|
+
reload=reload,
|
|
232
|
+
log_level="info"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _run_with_gunicorn(host: str, port: int, workers: int, config: str):
|
|
237
|
+
"""Run the server with gunicorn (production mode)."""
|
|
238
|
+
try:
|
|
239
|
+
import gunicorn.app.base
|
|
240
|
+
except ImportError:
|
|
241
|
+
click.echo(
|
|
242
|
+
"Error: gunicorn is not installed. "
|
|
243
|
+
"Install with: pip install lorax[prod]",
|
|
244
|
+
err=True
|
|
245
|
+
)
|
|
246
|
+
sys.exit(1)
|
|
247
|
+
|
|
248
|
+
from gunicorn.app.base import BaseApplication
|
|
249
|
+
|
|
250
|
+
class LoraxApplication(BaseApplication):
|
|
251
|
+
def __init__(self, app_uri: str, options: dict = None):
|
|
252
|
+
self.app_uri = app_uri
|
|
253
|
+
self.options = options or {}
|
|
254
|
+
super().__init__()
|
|
255
|
+
|
|
256
|
+
def load_config(self):
|
|
257
|
+
for key, value in self.options.items():
|
|
258
|
+
if key in self.cfg.settings and value is not None:
|
|
259
|
+
self.cfg.set(key.lower(), value)
|
|
260
|
+
|
|
261
|
+
def load(self):
|
|
262
|
+
# Return the app URI string for gunicorn to import
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
bind = f"{host}:{port}"
|
|
266
|
+
|
|
267
|
+
options = {
|
|
268
|
+
"bind": bind,
|
|
269
|
+
"workers": workers,
|
|
270
|
+
"worker_class": "uvicorn.workers.UvicornWorker",
|
|
271
|
+
"timeout": 0,
|
|
272
|
+
"graceful_timeout": 0,
|
|
273
|
+
"keepalive": 0,
|
|
274
|
+
"accesslog": "-",
|
|
275
|
+
"errorlog": "-",
|
|
276
|
+
"loglevel": "info",
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
# Load custom config if provided
|
|
280
|
+
if config:
|
|
281
|
+
click.echo(f"Loading gunicorn config from: {config}")
|
|
282
|
+
# The config file will be loaded by gunicorn
|
|
283
|
+
options["config"] = config
|
|
284
|
+
|
|
285
|
+
click.echo(f"Starting Lorax server with gunicorn on {bind}")
|
|
286
|
+
click.echo(f"Workers: {workers}")
|
|
287
|
+
|
|
288
|
+
# Use subprocess to run gunicorn with the app
|
|
289
|
+
import subprocess
|
|
290
|
+
|
|
291
|
+
cmd = [
|
|
292
|
+
sys.executable, "-m", "gunicorn",
|
|
293
|
+
"-b", bind,
|
|
294
|
+
"-w", str(workers),
|
|
295
|
+
"-k", "uvicorn.workers.UvicornWorker",
|
|
296
|
+
"--timeout", "0",
|
|
297
|
+
"--graceful-timeout", "0",
|
|
298
|
+
"--access-logfile", "-",
|
|
299
|
+
"--error-logfile", "-",
|
|
300
|
+
"--log-level", "info",
|
|
301
|
+
]
|
|
302
|
+
|
|
303
|
+
if config:
|
|
304
|
+
cmd.extend(["-c", config])
|
|
305
|
+
|
|
306
|
+
cmd.append("lorax.lorax_app:sio_app")
|
|
307
|
+
|
|
308
|
+
subprocess.run(cmd)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
if __name__ == "__main__":
|
|
312
|
+
main()
|
lorax/cloud/__init__.py
ADDED
|
File without changes
|
lorax/cloud/gcs_utils.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
from google.cloud import storage
|
|
2
|
+
from datetime import datetime, timezone
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
import zoneinfo
|
|
6
|
+
import aiofiles
|
|
7
|
+
import asyncio
|
|
8
|
+
import requests
|
|
9
|
+
import aiohttp
|
|
10
|
+
import os
|
|
11
|
+
|
|
12
|
+
BUCKET_NAME = os.getenv("GCS_BUCKET_NAME")
|
|
13
|
+
|
|
14
|
+
def get_gcs_client():
|
|
15
|
+
return storage.Client()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
async def _download_gcs_file_direct(bucket_name: str, blob_path: str, local_path: str):
|
|
19
|
+
"""
|
|
20
|
+
Internal: Download a public GCS blob to a local file.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
bucket_name (str): Name of the public GCS bucket (e.g. "lorax_projects")
|
|
24
|
+
blob_path (str): Path inside the bucket (e.g. "1000Genomes/1kg_chr20.trees.tsz")
|
|
25
|
+
local_path (str): Local destination path (e.g. "uploads/1kg_chr20.trees.tsz")
|
|
26
|
+
"""
|
|
27
|
+
url = f"https://storage.googleapis.com/{bucket_name}/{blob_path}"
|
|
28
|
+
Path(local_path).parent.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
|
|
30
|
+
async with aiohttp.ClientSession() as session:
|
|
31
|
+
async with session.get(url) as resp:
|
|
32
|
+
if resp.status != 200:
|
|
33
|
+
raise RuntimeError(f"Failed to download {url} (HTTP {resp.status})")
|
|
34
|
+
|
|
35
|
+
async with aiofiles.open(local_path, "wb") as f:
|
|
36
|
+
while chunk := await resp.content.read(1024 * 1024): # 1 MB chunks
|
|
37
|
+
await f.write(chunk)
|
|
38
|
+
|
|
39
|
+
return Path(local_path)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
async def download_gcs_file(bucket_name: str, blob_path: str, local_path: str):
|
|
43
|
+
"""
|
|
44
|
+
Asynchronously download a public GCS blob to a local file.
|
|
45
|
+
Uses disk cache if available for automatic caching and eviction.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
bucket_name (str): Name of the public GCS bucket (e.g. "lorax_projects")
|
|
49
|
+
blob_path (str): Path inside the bucket (e.g. "1000Genomes/1kg_chr20.trees.tsz")
|
|
50
|
+
local_path (str): Local destination path (e.g. "uploads/1kg_chr20.trees.tsz")
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
Path to the downloaded file
|
|
54
|
+
"""
|
|
55
|
+
# Import here to avoid circular imports
|
|
56
|
+
from lorax.context import disk_cache_manager
|
|
57
|
+
|
|
58
|
+
if disk_cache_manager.enabled:
|
|
59
|
+
# Use disk cache for managed downloads
|
|
60
|
+
async def download_func(path: str):
|
|
61
|
+
await _download_gcs_file_direct(bucket_name, blob_path, path)
|
|
62
|
+
|
|
63
|
+
cached_path = await disk_cache_manager.get_or_download(
|
|
64
|
+
bucket_name, blob_path, download_func
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# If local_path differs from cached path, create a symlink or copy
|
|
68
|
+
local_path_obj = Path(local_path)
|
|
69
|
+
if cached_path != local_path_obj:
|
|
70
|
+
local_path_obj.parent.mkdir(parents=True, exist_ok=True)
|
|
71
|
+
# Symlink to cached file (avoid duplicate storage)
|
|
72
|
+
if local_path_obj.exists() or local_path_obj.is_symlink():
|
|
73
|
+
local_path_obj.unlink()
|
|
74
|
+
try:
|
|
75
|
+
local_path_obj.symlink_to(cached_path)
|
|
76
|
+
except OSError:
|
|
77
|
+
# Fallback: just return cached path if symlink fails
|
|
78
|
+
return cached_path
|
|
79
|
+
|
|
80
|
+
return local_path_obj
|
|
81
|
+
else:
|
|
82
|
+
# Direct download without caching
|
|
83
|
+
return await _download_gcs_file_direct(bucket_name, blob_path, local_path)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
async def download_gcs_file_cached(
|
|
87
|
+
bucket_name: str,
|
|
88
|
+
blob_path: str,
|
|
89
|
+
disk_cache_manager=None
|
|
90
|
+
) -> Optional[Path]:
|
|
91
|
+
"""
|
|
92
|
+
Download a GCS file using the disk cache manager.
|
|
93
|
+
|
|
94
|
+
This is the preferred method for production use - it handles:
|
|
95
|
+
- Distributed locking (prevents duplicate downloads across workers)
|
|
96
|
+
- LRU eviction (manages disk space)
|
|
97
|
+
- Access time tracking
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
bucket_name: GCS bucket name
|
|
101
|
+
blob_path: Path within bucket
|
|
102
|
+
disk_cache_manager: Optional cache manager (uses global if not provided)
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Path to cached file, or None if download failed
|
|
106
|
+
"""
|
|
107
|
+
if disk_cache_manager is None:
|
|
108
|
+
from lorax.context import disk_cache_manager
|
|
109
|
+
|
|
110
|
+
if not disk_cache_manager.enabled:
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
async def download_func(local_path: str):
|
|
114
|
+
await _download_gcs_file_direct(bucket_name, blob_path, local_path)
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
return await disk_cache_manager.get_or_download(
|
|
118
|
+
bucket_name, blob_path, download_func
|
|
119
|
+
)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"Failed to download {blob_path} from GCS: {e}")
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def get_public_gcs_dict(
|
|
125
|
+
bucket_name: str,
|
|
126
|
+
sid: str,
|
|
127
|
+
prefix: str = "",
|
|
128
|
+
projects=None,
|
|
129
|
+
include_uploads: bool = False,
|
|
130
|
+
uploads_sid: str | None = None,
|
|
131
|
+
):
|
|
132
|
+
if projects is None:
|
|
133
|
+
projects = {}
|
|
134
|
+
api_url = f"https://storage.googleapis.com/storage/v1/b/{bucket_name}/o"
|
|
135
|
+
params = {"prefix": '', "fields": "items(name)"}
|
|
136
|
+
resp = requests.get(api_url, params=params)
|
|
137
|
+
resp.raise_for_status()
|
|
138
|
+
items = resp.json().get("items", [])
|
|
139
|
+
|
|
140
|
+
for item in items:
|
|
141
|
+
name = item['name']
|
|
142
|
+
path_parts = name.split("/")
|
|
143
|
+
|
|
144
|
+
# Must have at least a top-level directory (e.g., 'folder/')
|
|
145
|
+
if len(path_parts) < 2:
|
|
146
|
+
continue
|
|
147
|
+
name_first = path_parts[0]
|
|
148
|
+
second_part = path_parts[1]
|
|
149
|
+
|
|
150
|
+
# Handle Uploads filtering
|
|
151
|
+
if name_first == 'Uploads':
|
|
152
|
+
if not include_uploads:
|
|
153
|
+
continue
|
|
154
|
+
if not uploads_sid or second_part != uploads_sid:
|
|
155
|
+
continue
|
|
156
|
+
# Require a filename component
|
|
157
|
+
if len(path_parts) < 3:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
if name_first not in projects:
|
|
161
|
+
projects[name_first] = {'folder': name_first,'files': [], 'description': ''}
|
|
162
|
+
|
|
163
|
+
if name_first != 'Uploads' and second_part:
|
|
164
|
+
if second_part not in projects[name_first]['files']:
|
|
165
|
+
projects[name_first]['files'].append(second_part)
|
|
166
|
+
elif name_first == 'Uploads':
|
|
167
|
+
filename = path_parts[2]
|
|
168
|
+
if filename not in projects[name_first]['files']:
|
|
169
|
+
projects[name_first]['files'].append(filename)
|
|
170
|
+
return projects
|
|
171
|
+
|
|
172
|
+
async def upload_to_gcs(bucket_name: str, local_path: Path, sid: str):
|
|
173
|
+
"""
|
|
174
|
+
Upload file to GCS under session-specific folder.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
bucket_name (str): Target GCS bucket
|
|
178
|
+
local_path (Path): Local file to upload
|
|
179
|
+
sid (str): Session ID (used as prefix)
|
|
180
|
+
"""
|
|
181
|
+
blob_path = f"Uploads/{sid}/{local_path.name}"
|
|
182
|
+
|
|
183
|
+
# Use executor to avoid blocking event loop
|
|
184
|
+
loop = asyncio.get_event_loop()
|
|
185
|
+
await loop.run_in_executor(None, _upload_file_sync, bucket_name, local_path, blob_path)
|
|
186
|
+
return f"https://storage.googleapis.com/{bucket_name}/{blob_path}"
|
|
187
|
+
|
|
188
|
+
def _upload_file_sync(bucket_name: str, local_path: Path, blob_path: str):
|
|
189
|
+
"""Synchronous helper called within a thread executor."""
|
|
190
|
+
client = get_gcs_client()
|
|
191
|
+
bucket = client.bucket(bucket_name)
|
|
192
|
+
blob = bucket.blob(blob_path)
|
|
193
|
+
blob.upload_from_filename(str(local_path))
|
|
194
|
+
blob.custom_time = datetime.utcnow()
|
|
195
|
+
pacific_tz = zoneinfo.ZoneInfo("America/Los_Angeles")
|
|
196
|
+
now_pacific = datetime.now(pacific_tz)
|
|
197
|
+
|
|
198
|
+
# Convert to UTC (GCS expects UTC timestamps)
|
|
199
|
+
now_utc = now_pacific.astimezone(timezone.utc)
|
|
200
|
+
|
|
201
|
+
# Assign custom_time so GCS lifecycle rule can delete it after 7 days
|
|
202
|
+
blob.custom_time = now_utc
|
|
203
|
+
blob.patch()
|
|
204
|
+
# blob.make_public() # Optional: make public for browser access
|
|
205
|
+
print(f"Uploaded {local_path.name} to gs://{bucket_name}/{blob_path}")
|
lorax/constants.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lorax Backend Constants
|
|
3
|
+
|
|
4
|
+
Centralized configuration values to avoid hardcoding throughout the codebase.
|
|
5
|
+
Mode-aware configuration based on deployment environment.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
# Import mode configuration
|
|
12
|
+
from lorax.modes import (
|
|
13
|
+
get_mode_config,
|
|
14
|
+
get_uploads_dir,
|
|
15
|
+
get_cache_dir,
|
|
16
|
+
CURRENT_MODE,
|
|
17
|
+
CURRENT_CONFIG,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
# Session Configuration
|
|
21
|
+
SESSION_COOKIE = "lorax_sid"
|
|
22
|
+
COOKIE_MAX_AGE = 7 * 24 * 60 * 60 # 7 days in seconds
|
|
23
|
+
|
|
24
|
+
# Cache Configuration (mode-aware)
|
|
25
|
+
TS_CACHE_SIZE = CURRENT_CONFIG.ts_cache_size
|
|
26
|
+
CONFIG_CACHE_SIZE = CURRENT_CONFIG.config_cache_size
|
|
27
|
+
METADATA_CACHE_SIZE = CURRENT_CONFIG.metadata_cache_size
|
|
28
|
+
|
|
29
|
+
# Disk Cache Configuration (mode-aware)
|
|
30
|
+
DISK_CACHE_ENABLED = CURRENT_CONFIG.disk_cache_enabled
|
|
31
|
+
DISK_CACHE_DIR = get_cache_dir(CURRENT_CONFIG)
|
|
32
|
+
DISK_CACHE_MAX_BYTES = CURRENT_CONFIG.disk_cache_max_gb * 1024 * 1024 * 1024
|
|
33
|
+
|
|
34
|
+
# Connection Limits (mode-aware)
|
|
35
|
+
MAX_SOCKETS_PER_SESSION = CURRENT_CONFIG.max_sockets_per_session
|
|
36
|
+
ENFORCE_CONNECTION_LIMITS = CURRENT_CONFIG.enforce_connection_limits
|
|
37
|
+
|
|
38
|
+
# File Types
|
|
39
|
+
SUPPORTED_EXTENSIONS = {'.tsz', '.trees', '.csv'}
|
|
40
|
+
|
|
41
|
+
# Directory Names (mode-aware)
|
|
42
|
+
UPLOADS_DIR = str(get_uploads_dir(CURRENT_CONFIG))
|
|
43
|
+
|
|
44
|
+
# Default Values
|
|
45
|
+
DEFAULT_WINDOW_SIZE = 50000
|
|
46
|
+
|
|
47
|
+
# Socket.IO Configuration
|
|
48
|
+
SOCKET_PING_TIMEOUT = 60 # seconds
|
|
49
|
+
SOCKET_PING_INTERVAL = 25 # seconds
|
|
50
|
+
MAX_HTTP_BUFFER_SIZE = 50_000_000 # 50 MB
|
|
51
|
+
|
|
52
|
+
# Error Codes
|
|
53
|
+
ERROR_SESSION_NOT_FOUND = "SESSION_NOT_FOUND"
|
|
54
|
+
ERROR_MISSING_SESSION = "MISSING_SESSION"
|
|
55
|
+
ERROR_NO_FILE_LOADED = "NO_FILE_LOADED"
|
|
56
|
+
ERROR_TOO_MANY_CONNECTIONS = "TOO_MANY_CONNECTIONS"
|
|
57
|
+
ERROR_CONNECTION_REPLACED = "CONNECTION_REPLACED"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def print_config():
|
|
61
|
+
"""Print current configuration for debugging."""
|
|
62
|
+
print(f"Mode: {CURRENT_MODE}")
|
|
63
|
+
print(f"TS Cache Size: {TS_CACHE_SIZE}")
|
|
64
|
+
print(f"Disk Cache: {DISK_CACHE_ENABLED} ({CURRENT_CONFIG.disk_cache_max_gb}GB)")
|
|
65
|
+
print(f"Max Sockets/Session: {MAX_SOCKETS_PER_SESSION}")
|
|
66
|
+
print(f"Uploads Dir: {UPLOADS_DIR}")
|
lorax/context.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
|
|
5
|
+
load_dotenv()
|
|
6
|
+
|
|
7
|
+
# Import mode configuration first (before other lorax imports that depend on it)
|
|
8
|
+
from lorax.modes import (
|
|
9
|
+
CURRENT_MODE,
|
|
10
|
+
CURRENT_CONFIG,
|
|
11
|
+
get_cache_dir,
|
|
12
|
+
validate_mode_requirements,
|
|
13
|
+
)
|
|
14
|
+
from lorax.session_manager import SessionManager
|
|
15
|
+
from lorax.redis_utils import create_redis_client, get_redis_config
|
|
16
|
+
from lorax.cache import DiskCacheManager, TreeGraphCache, CsvTreeGraphCache
|
|
17
|
+
from lorax.constants import (
|
|
18
|
+
DISK_CACHE_ENABLED,
|
|
19
|
+
DISK_CACHE_DIR,
|
|
20
|
+
DISK_CACHE_MAX_BYTES,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# Validate mode requirements
|
|
24
|
+
validation_errors = validate_mode_requirements(CURRENT_CONFIG)
|
|
25
|
+
if validation_errors:
|
|
26
|
+
for error in validation_errors:
|
|
27
|
+
print(f"Warning: {error}")
|
|
28
|
+
|
|
29
|
+
# Shared Global State
|
|
30
|
+
# We initialize these singletons here to ensure all modules share the same instances
|
|
31
|
+
# This is critical for in-memory mode so routes and sockets share the same stores
|
|
32
|
+
|
|
33
|
+
REDIS_CLUSTER_URL, REDIS_CLUSTER = get_redis_config()
|
|
34
|
+
session_manager = SessionManager(redis_url=REDIS_CLUSTER_URL, redis_cluster=REDIS_CLUSTER)
|
|
35
|
+
|
|
36
|
+
# Common Environment Variables
|
|
37
|
+
BUCKET_NAME = os.getenv("BUCKET_NAME") or os.getenv("GCS_BUCKET_NAME") or 'lorax_projects'
|
|
38
|
+
|
|
39
|
+
# Initialize Disk Cache Manager
|
|
40
|
+
# Uses Redis for distributed locking if available, falls back to file locks
|
|
41
|
+
_redis_client = None
|
|
42
|
+
if REDIS_CLUSTER_URL and DISK_CACHE_ENABLED:
|
|
43
|
+
try:
|
|
44
|
+
_redis_client = create_redis_client(
|
|
45
|
+
REDIS_CLUSTER_URL,
|
|
46
|
+
decode_responses=True,
|
|
47
|
+
cluster=REDIS_CLUSTER,
|
|
48
|
+
)
|
|
49
|
+
print("DiskCacheManager using Redis for distributed locking")
|
|
50
|
+
except Exception as e:
|
|
51
|
+
print(f"Warning: Failed to connect Redis for disk cache: {e}")
|
|
52
|
+
|
|
53
|
+
disk_cache_manager = DiskCacheManager(
|
|
54
|
+
cache_dir=Path(DISK_CACHE_DIR),
|
|
55
|
+
max_size_bytes=DISK_CACHE_MAX_BYTES,
|
|
56
|
+
redis_client=_redis_client,
|
|
57
|
+
enabled=DISK_CACHE_ENABLED,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# Initialize TreeGraph Cache for per-session tree caching
|
|
61
|
+
# Uses Redis in production for distributed caching, in-memory for local mode
|
|
62
|
+
_tree_graph_redis = None
|
|
63
|
+
if REDIS_CLUSTER_URL:
|
|
64
|
+
try:
|
|
65
|
+
# Create a separate connection for binary data (decode_responses=False)
|
|
66
|
+
_tree_graph_redis = create_redis_client(
|
|
67
|
+
REDIS_CLUSTER_URL,
|
|
68
|
+
decode_responses=False,
|
|
69
|
+
cluster=REDIS_CLUSTER,
|
|
70
|
+
)
|
|
71
|
+
print(f"TreeGraphCache using Redis at {REDIS_CLUSTER_URL}")
|
|
72
|
+
except Exception as e:
|
|
73
|
+
print(f"Warning: Failed to connect Redis for TreeGraphCache: {e}")
|
|
74
|
+
|
|
75
|
+
tree_graph_cache = TreeGraphCache(redis_client=None)
|
|
76
|
+
|
|
77
|
+
# CSV mode: cache parsed Newick trees per session (in-memory only)
|
|
78
|
+
csv_tree_graph_cache = CsvTreeGraphCache()
|
|
79
|
+
|
|
80
|
+
print(f"Context initialized: mode={CURRENT_MODE}, disk_cache={DISK_CACHE_ENABLED}")
|
lorax/csv/__init__.py
ADDED