lorax-arg 0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lorax/buffer.py +43 -0
- lorax/cache/__init__.py +43 -0
- lorax/cache/csv_tree_graph.py +59 -0
- lorax/cache/disk.py +467 -0
- lorax/cache/file_cache.py +142 -0
- lorax/cache/file_context.py +72 -0
- lorax/cache/lru.py +90 -0
- lorax/cache/tree_graph.py +293 -0
- lorax/cli.py +312 -0
- lorax/cloud/__init__.py +0 -0
- lorax/cloud/gcs_utils.py +205 -0
- lorax/constants.py +66 -0
- lorax/context.py +80 -0
- lorax/csv/__init__.py +7 -0
- lorax/csv/config.py +250 -0
- lorax/csv/layout.py +182 -0
- lorax/csv/newick_tree.py +234 -0
- lorax/handlers.py +998 -0
- lorax/lineage.py +456 -0
- lorax/loaders/__init__.py +0 -0
- lorax/loaders/csv_loader.py +10 -0
- lorax/loaders/loader.py +31 -0
- lorax/loaders/tskit_loader.py +119 -0
- lorax/lorax_app.py +75 -0
- lorax/manager.py +58 -0
- lorax/metadata/__init__.py +0 -0
- lorax/metadata/loader.py +426 -0
- lorax/metadata/mutations.py +146 -0
- lorax/modes.py +190 -0
- lorax/pg.py +183 -0
- lorax/redis_utils.py +30 -0
- lorax/routes.py +137 -0
- lorax/session_manager.py +206 -0
- lorax/sockets/__init__.py +55 -0
- lorax/sockets/connection.py +99 -0
- lorax/sockets/debug.py +47 -0
- lorax/sockets/decorators.py +112 -0
- lorax/sockets/file_ops.py +200 -0
- lorax/sockets/lineage.py +307 -0
- lorax/sockets/metadata.py +232 -0
- lorax/sockets/mutations.py +154 -0
- lorax/sockets/node_search.py +535 -0
- lorax/sockets/tree_layout.py +117 -0
- lorax/sockets/utils.py +10 -0
- lorax/tree_graph/__init__.py +12 -0
- lorax/tree_graph/tree_graph.py +689 -0
- lorax/utils.py +124 -0
- lorax_app/__init__.py +4 -0
- lorax_app/app.py +159 -0
- lorax_app/cli.py +114 -0
- lorax_app/static/X.png +0 -0
- lorax_app/static/assets/index-BCEGlUFi.js +2361 -0
- lorax_app/static/assets/index-iKjzUpA9.css +1 -0
- lorax_app/static/assets/localBackendWorker-BaWwjSV_.js +2 -0
- lorax_app/static/assets/renderDataWorker-BKLdiU7J.js +2 -0
- lorax_app/static/gestures/gesture-flick.ogv +0 -0
- lorax_app/static/gestures/gesture-two-finger-scroll.ogv +0 -0
- lorax_app/static/index.html +14 -0
- lorax_app/static/logo.png +0 -0
- lorax_app/static/lorax-logo.png +0 -0
- lorax_app/static/vite.svg +1 -0
- lorax_arg-0.1.dist-info/METADATA +131 -0
- lorax_arg-0.1.dist-info/RECORD +66 -0
- lorax_arg-0.1.dist-info/WHEEL +5 -0
- lorax_arg-0.1.dist-info/entry_points.txt +4 -0
- lorax_arg-0.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unified File Cache for Lorax.
|
|
3
|
+
|
|
4
|
+
Provides cached loading of FileContext objects which combine tree sequences,
|
|
5
|
+
config, and metadata into a single eviction unit. When a file is evicted,
|
|
6
|
+
all related data is evicted together.
|
|
7
|
+
|
|
8
|
+
This replaces the separate tree_sequence, config, and metadata caches.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import tskit
|
|
17
|
+
import tszip
|
|
18
|
+
|
|
19
|
+
from lorax.cache.lru import LRUCacheWithMeta
|
|
20
|
+
from lorax.cache.file_context import FileContext
|
|
21
|
+
from lorax.constants import TS_CACHE_SIZE
|
|
22
|
+
|
|
23
|
+
# Global lock for thread-safe cache operations
|
|
24
|
+
_cache_lock = asyncio.Lock()
|
|
25
|
+
|
|
26
|
+
# Global cache for FileContext objects
|
|
27
|
+
# Uses LRUCacheWithMeta to track file mtime for cache validation
|
|
28
|
+
_file_cache = LRUCacheWithMeta(max_size=TS_CACHE_SIZE)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _get_file_mtime(file_path: str) -> float:
|
|
32
|
+
"""Get file modification time, or 0 if file doesn't exist."""
|
|
33
|
+
try:
|
|
34
|
+
return Path(file_path).stat().st_mtime
|
|
35
|
+
except (OSError, FileNotFoundError):
|
|
36
|
+
return 0.0
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _load_tree_sequence(file_path: str):
|
|
40
|
+
"""Load tree sequence from file based on extension."""
|
|
41
|
+
if file_path.endswith('.tsz'):
|
|
42
|
+
return tszip.load(file_path)
|
|
43
|
+
elif file_path.endswith('.trees'):
|
|
44
|
+
return tskit.load(file_path)
|
|
45
|
+
elif file_path.endswith('.csv'):
|
|
46
|
+
return pd.read_csv(file_path)
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(f"Unsupported file type: {file_path}")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
async def get_file_context(file_path: str, root_dir: str = None) -> Optional[FileContext]:
|
|
52
|
+
"""
|
|
53
|
+
Get or load a FileContext for the given file path.
|
|
54
|
+
|
|
55
|
+
Validates mtime and returns cached context if valid.
|
|
56
|
+
Loads fresh if cache miss or file changed.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
file_path: Path to the tree sequence file
|
|
60
|
+
root_dir: Root directory for relative paths (defaults to file's parent)
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
FileContext object with tree_sequence, config, and metadata cache,
|
|
64
|
+
or None if file doesn't exist or fails to load.
|
|
65
|
+
"""
|
|
66
|
+
# Import here to avoid circular dependency
|
|
67
|
+
from lorax.loaders.loader import compute_config
|
|
68
|
+
|
|
69
|
+
file_path_obj = Path(file_path)
|
|
70
|
+
if not file_path_obj.exists():
|
|
71
|
+
_file_cache.remove(file_path)
|
|
72
|
+
print(f"❌ File not found: {file_path}")
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
current_mtime = _get_file_mtime(file_path)
|
|
76
|
+
|
|
77
|
+
# Double-checked locking optimization
|
|
78
|
+
# 1. Optimistic check (lock-free read)
|
|
79
|
+
ctx, cached_mtime = _file_cache.get_with_meta(file_path)
|
|
80
|
+
if ctx is not None:
|
|
81
|
+
if cached_mtime == current_mtime:
|
|
82
|
+
print(f"✅ Using cached FileContext: {file_path}")
|
|
83
|
+
return ctx
|
|
84
|
+
else:
|
|
85
|
+
print(f"🔄 File changed, reloading: {file_path}")
|
|
86
|
+
_file_cache.remove(file_path)
|
|
87
|
+
|
|
88
|
+
# Need to load - acquire lock
|
|
89
|
+
async with _cache_lock:
|
|
90
|
+
# 2. Check again under lock (in case another task loaded it while we waited)
|
|
91
|
+
ctx, cached_mtime = _file_cache.get_with_meta(file_path)
|
|
92
|
+
if ctx is not None and cached_mtime == current_mtime:
|
|
93
|
+
print(f"✅ Using cached FileContext (after lock): {file_path}")
|
|
94
|
+
return ctx
|
|
95
|
+
|
|
96
|
+
print(f"📂 Loading FileContext: {file_path}")
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
# Load tree sequence
|
|
100
|
+
ts = await asyncio.to_thread(_load_tree_sequence, file_path)
|
|
101
|
+
|
|
102
|
+
# Compute config immediately (it's derived from ts)
|
|
103
|
+
effective_root_dir = root_dir or str(file_path_obj.parent)
|
|
104
|
+
config = await asyncio.to_thread(
|
|
105
|
+
compute_config, ts, file_path, effective_root_dir
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Create FileContext with empty metadata cache
|
|
109
|
+
ctx = FileContext(
|
|
110
|
+
file_path=file_path,
|
|
111
|
+
tree_sequence=ts,
|
|
112
|
+
config=config,
|
|
113
|
+
mtime=current_mtime
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
_file_cache.set(file_path, ctx, meta=current_mtime)
|
|
117
|
+
return ctx
|
|
118
|
+
|
|
119
|
+
except Exception as e:
|
|
120
|
+
print(f"❌ Failed to load {file_path}: {e}")
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def get_file_cache_size() -> int:
|
|
125
|
+
"""Return current number of cached files."""
|
|
126
|
+
return len(_file_cache.cache)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
# Backwards compatibility: provide get_or_load_ts that returns just the tree_sequence
|
|
130
|
+
async def get_or_load_ts(file_path: str, root_dir: str = None):
|
|
131
|
+
"""
|
|
132
|
+
Backwards-compatible function that returns just the tree sequence.
|
|
133
|
+
|
|
134
|
+
Prefer using get_file_context() directly for new code.
|
|
135
|
+
"""
|
|
136
|
+
ctx = await get_file_context(file_path, root_dir)
|
|
137
|
+
return ctx.tree_sequence if ctx else None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_ts_cache_size() -> int:
|
|
141
|
+
"""Backwards-compatible alias for get_file_cache_size()."""
|
|
142
|
+
return get_file_cache_size()
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""
|
|
2
|
+
FileContext: Unified cache entry for loaded files.
|
|
3
|
+
|
|
4
|
+
Combines tree sequence, config, and metadata into a single eviction unit.
|
|
5
|
+
When a file is evicted from cache, all related data is evicted together,
|
|
6
|
+
preventing orphan metadata and ensuring atomic invalidation.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Optional, Union
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import tskit
|
|
14
|
+
|
|
15
|
+
from lorax.cache.lru import LRUCache
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class FileContext:
|
|
20
|
+
"""
|
|
21
|
+
Unified cache entry for a loaded file.
|
|
22
|
+
|
|
23
|
+
Combines tree sequence, config, and metadata into single
|
|
24
|
+
eviction unit. When file is evicted from cache, all related
|
|
25
|
+
data is evicted together.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
file_path: Absolute path to the loaded file
|
|
29
|
+
tree_sequence: The loaded tskit.TreeSequence or pandas.DataFrame (for CSV)
|
|
30
|
+
config: Configuration dict computed from the tree sequence
|
|
31
|
+
mtime: File modification time at load (used for cache validation)
|
|
32
|
+
|
|
33
|
+
The nested _metadata cache stores per-key metadata lookups:
|
|
34
|
+
- "population" -> {sample_name: value}
|
|
35
|
+
- "region" -> {sample_name: value}
|
|
36
|
+
- "population:array" -> {arrow_buffer, unique_values, sample_node_ids}
|
|
37
|
+
"""
|
|
38
|
+
file_path: str
|
|
39
|
+
tree_sequence: Union[tskit.TreeSequence, pd.DataFrame]
|
|
40
|
+
config: dict
|
|
41
|
+
mtime: float
|
|
42
|
+
|
|
43
|
+
# Per-key metadata cache (nested within file context)
|
|
44
|
+
# Keys: "population", "region", "population:array", etc.
|
|
45
|
+
_metadata: LRUCache = field(default_factory=lambda: LRUCache(max_size=10))
|
|
46
|
+
|
|
47
|
+
def get_metadata(self, key: str) -> Optional[Any]:
|
|
48
|
+
"""Get cached metadata for a specific key."""
|
|
49
|
+
return self._metadata.get(key)
|
|
50
|
+
|
|
51
|
+
def set_metadata(self, key: str, value: Any) -> None:
|
|
52
|
+
"""Cache metadata for a specific key."""
|
|
53
|
+
self._metadata.set(key, value)
|
|
54
|
+
|
|
55
|
+
def clear_metadata(self) -> None:
|
|
56
|
+
"""Clear all cached metadata (e.g., when file reloaded)."""
|
|
57
|
+
self._metadata.clear()
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def is_csv(self) -> bool:
|
|
61
|
+
"""Check if this is a CSV file (pandas DataFrame)."""
|
|
62
|
+
return isinstance(self.tree_sequence, pd.DataFrame)
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def is_tree_sequence(self) -> bool:
|
|
66
|
+
"""Check if this is a tskit tree sequence."""
|
|
67
|
+
return isinstance(self.tree_sequence, tskit.TreeSequence)
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def ts(self) -> Union[tskit.TreeSequence, pd.DataFrame]:
|
|
71
|
+
"""Alias for tree_sequence for backwards compatibility."""
|
|
72
|
+
return self.tree_sequence
|
lorax/cache/lru.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LRU Cache implementations for Lorax.
|
|
3
|
+
|
|
4
|
+
Provides in-memory caching with eviction for:
|
|
5
|
+
- Tree sequences (with mtime validation)
|
|
6
|
+
- Configuration data
|
|
7
|
+
- Metadata lookups
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LRUCache:
|
|
14
|
+
"""Simple LRU cache with eviction for large in-memory tskit/tszip objects."""
|
|
15
|
+
def __init__(self, max_size=5):
|
|
16
|
+
self.max_size = max_size
|
|
17
|
+
self.cache = OrderedDict()
|
|
18
|
+
|
|
19
|
+
def get(self, key):
|
|
20
|
+
if key in self.cache:
|
|
21
|
+
# Move to the end to mark as recently used
|
|
22
|
+
self.cache.move_to_end(key)
|
|
23
|
+
return self.cache[key]
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
def set(self, key, value):
|
|
27
|
+
if key in self.cache:
|
|
28
|
+
# Update existing and mark as recently used
|
|
29
|
+
self.cache.move_to_end(key)
|
|
30
|
+
self.cache[key] = value
|
|
31
|
+
# Evict if too big
|
|
32
|
+
if len(self.cache) > self.max_size:
|
|
33
|
+
old_key, old_val = self.cache.popitem(last=False)
|
|
34
|
+
print(f"🧹 Evicted {old_key} from LRU cache to free memory")
|
|
35
|
+
|
|
36
|
+
def remove(self, key):
|
|
37
|
+
"""Remove a specific key from the cache."""
|
|
38
|
+
if key in self.cache:
|
|
39
|
+
del self.cache[key]
|
|
40
|
+
|
|
41
|
+
def clear(self):
|
|
42
|
+
self.cache.clear()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class LRUCacheWithMeta:
|
|
46
|
+
"""
|
|
47
|
+
LRU cache with metadata support for cache validation.
|
|
48
|
+
|
|
49
|
+
Stores (value, metadata) tuples, allowing validation against
|
|
50
|
+
external state (e.g., file mtime) before returning cached values.
|
|
51
|
+
"""
|
|
52
|
+
def __init__(self, max_size=5):
|
|
53
|
+
self.max_size = max_size
|
|
54
|
+
self.cache = OrderedDict() # key -> (value, meta)
|
|
55
|
+
|
|
56
|
+
def get(self, key):
|
|
57
|
+
"""Get value only (ignores metadata)."""
|
|
58
|
+
if key in self.cache:
|
|
59
|
+
self.cache.move_to_end(key)
|
|
60
|
+
return self.cache[key][0]
|
|
61
|
+
return None
|
|
62
|
+
|
|
63
|
+
def get_with_meta(self, key):
|
|
64
|
+
"""Get (value, metadata) tuple."""
|
|
65
|
+
if key in self.cache:
|
|
66
|
+
self.cache.move_to_end(key)
|
|
67
|
+
value, meta = self.cache[key]
|
|
68
|
+
return value, meta
|
|
69
|
+
return None, None
|
|
70
|
+
|
|
71
|
+
def set(self, key, value, meta=None):
|
|
72
|
+
"""Set value with optional metadata."""
|
|
73
|
+
if key in self.cache:
|
|
74
|
+
self.cache.move_to_end(key)
|
|
75
|
+
self.cache[key] = (value, meta)
|
|
76
|
+
# Evict if too big
|
|
77
|
+
if len(self.cache) > self.max_size:
|
|
78
|
+
old_key, (old_val, old_meta) = self.cache.popitem(last=False)
|
|
79
|
+
print(f"🧹 Evicted {old_key} from LRU cache to free memory")
|
|
80
|
+
|
|
81
|
+
def remove(self, key):
|
|
82
|
+
"""Remove a specific key from the cache."""
|
|
83
|
+
if key in self.cache:
|
|
84
|
+
del self.cache[key]
|
|
85
|
+
|
|
86
|
+
def clear(self):
|
|
87
|
+
self.cache.clear()
|
|
88
|
+
|
|
89
|
+
def __len__(self):
|
|
90
|
+
return len(self.cache)
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TreeGraph Cache for per-session caching of TreeGraph objects.
|
|
3
|
+
|
|
4
|
+
Supports Redis for production (distributed) and in-memory for local mode.
|
|
5
|
+
Enables efficient lineage and search operations by reusing constructed trees.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pickle
|
|
9
|
+
import asyncio
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
from typing import Dict, Optional, TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from lorax.tree_graph import TreeGraph
|
|
15
|
+
|
|
16
|
+
# TTL for Redis entries (matches session lifetime)
|
|
17
|
+
REDIS_TTL_SECONDS = 7 * 24 * 60 * 60 # 7 days
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TreeGraphCache:
|
|
21
|
+
"""
|
|
22
|
+
Per-session cache for TreeGraph objects with Redis/local mode switching.
|
|
23
|
+
|
|
24
|
+
In production mode (with Redis), TreeGraph objects are serialized via pickle
|
|
25
|
+
and stored with per-session keys. In local mode, uses in-memory dict.
|
|
26
|
+
|
|
27
|
+
Features:
|
|
28
|
+
- Visibility-based eviction: trees not in visible set are evicted
|
|
29
|
+
- Thread-safe async operations
|
|
30
|
+
- Automatic mode detection based on Redis availability
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, redis_client=None):
|
|
34
|
+
"""
|
|
35
|
+
Initialize the cache.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
redis_client: Optional async Redis client. If None, uses in-memory mode.
|
|
39
|
+
"""
|
|
40
|
+
self.redis = redis_client
|
|
41
|
+
# Local cache: session_id -> OrderedDict{tree_index -> TreeGraph}
|
|
42
|
+
# OrderedDict maintains insertion order for LRU eviction
|
|
43
|
+
self._local_cache: Dict[str, OrderedDict] = {}
|
|
44
|
+
self._lock = asyncio.Lock()
|
|
45
|
+
|
|
46
|
+
mode = "Redis" if redis_client else "in-memory"
|
|
47
|
+
print(f"TreeGraphCache initialized in {mode} mode")
|
|
48
|
+
|
|
49
|
+
def _redis_key(self, session_id: str, tree_index: int) -> str:
|
|
50
|
+
"""Generate Redis key for a cached TreeGraph."""
|
|
51
|
+
return f"treegraph:{{{session_id}}}:{tree_index}"
|
|
52
|
+
|
|
53
|
+
def _redis_session_pattern(self, session_id: str) -> str:
|
|
54
|
+
"""Generate Redis pattern to match all trees for a session."""
|
|
55
|
+
return f"treegraph:{{{session_id}}}:*"
|
|
56
|
+
|
|
57
|
+
async def get(self, session_id: str, tree_index: int) -> Optional["TreeGraph"]:
|
|
58
|
+
"""
|
|
59
|
+
Retrieve a cached TreeGraph.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
session_id: Session identifier
|
|
63
|
+
tree_index: Tree index in the tree sequence
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
TreeGraph if cached, None otherwise
|
|
67
|
+
"""
|
|
68
|
+
if self.redis:
|
|
69
|
+
return await self._redis_get(session_id, tree_index)
|
|
70
|
+
else:
|
|
71
|
+
return self._local_get(session_id, tree_index)
|
|
72
|
+
|
|
73
|
+
async def set(
|
|
74
|
+
self,
|
|
75
|
+
session_id: str,
|
|
76
|
+
tree_index: int,
|
|
77
|
+
tree_graph: "TreeGraph"
|
|
78
|
+
) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Cache a TreeGraph object.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
session_id: Session identifier
|
|
84
|
+
tree_index: Tree index in the tree sequence
|
|
85
|
+
tree_graph: TreeGraph object to cache
|
|
86
|
+
"""
|
|
87
|
+
if self.redis:
|
|
88
|
+
await self._redis_set(session_id, tree_index, tree_graph)
|
|
89
|
+
else:
|
|
90
|
+
await self._local_set(session_id, tree_index, tree_graph)
|
|
91
|
+
|
|
92
|
+
async def get_all_for_session(self, session_id: str) -> Dict[int, "TreeGraph"]:
|
|
93
|
+
"""
|
|
94
|
+
Retrieve all cached TreeGraphs for a session.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
session_id: Session identifier
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
Dict mapping tree_index to TreeGraph
|
|
101
|
+
"""
|
|
102
|
+
if self.redis:
|
|
103
|
+
return await self._redis_get_all(session_id)
|
|
104
|
+
else:
|
|
105
|
+
return self._local_get_all(session_id)
|
|
106
|
+
|
|
107
|
+
async def clear_session(self, session_id: str) -> None:
|
|
108
|
+
"""
|
|
109
|
+
Clear all cached TreeGraphs for a session.
|
|
110
|
+
|
|
111
|
+
Call this when:
|
|
112
|
+
- Session loads a new file
|
|
113
|
+
- Session is deleted/expired
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
session_id: Session identifier
|
|
117
|
+
"""
|
|
118
|
+
if self.redis:
|
|
119
|
+
await self._redis_clear_session(session_id)
|
|
120
|
+
else:
|
|
121
|
+
await self._local_clear_session(session_id)
|
|
122
|
+
|
|
123
|
+
async def evict_not_visible(self, session_id: str, visible_indices: set) -> int:
|
|
124
|
+
"""
|
|
125
|
+
Evict all cached TreeGraphs NOT in the visible set.
|
|
126
|
+
|
|
127
|
+
This implements visibility-based eviction: trees that are no longer
|
|
128
|
+
visible in the viewport are removed from cache.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
session_id: Session identifier
|
|
132
|
+
visible_indices: Set of tree indices currently visible
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Number of trees evicted
|
|
136
|
+
"""
|
|
137
|
+
if self.redis:
|
|
138
|
+
return await self._redis_evict_not_visible(session_id, visible_indices)
|
|
139
|
+
else:
|
|
140
|
+
return await self._local_evict_not_visible(session_id, visible_indices)
|
|
141
|
+
|
|
142
|
+
# ==================== Redis Implementation ====================
|
|
143
|
+
|
|
144
|
+
async def _redis_get(self, session_id: str, tree_index: int) -> Optional["TreeGraph"]:
|
|
145
|
+
"""Get TreeGraph from Redis."""
|
|
146
|
+
try:
|
|
147
|
+
key = self._redis_key(session_id, tree_index)
|
|
148
|
+
data = await self.redis.get(key)
|
|
149
|
+
if data:
|
|
150
|
+
return pickle.loads(data)
|
|
151
|
+
return None
|
|
152
|
+
except Exception as e:
|
|
153
|
+
print(f"TreeGraphCache Redis get error: {e}")
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
async def _redis_set(
|
|
157
|
+
self,
|
|
158
|
+
session_id: str,
|
|
159
|
+
tree_index: int,
|
|
160
|
+
tree_graph: "TreeGraph"
|
|
161
|
+
) -> None:
|
|
162
|
+
"""Set TreeGraph in Redis."""
|
|
163
|
+
try:
|
|
164
|
+
# Serialize and store
|
|
165
|
+
key = self._redis_key(session_id, tree_index)
|
|
166
|
+
data = pickle.dumps(tree_graph)
|
|
167
|
+
await self.redis.setex(key, REDIS_TTL_SECONDS, data)
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f"TreeGraphCache Redis set error: {e}")
|
|
170
|
+
|
|
171
|
+
async def _redis_get_all(self, session_id: str) -> Dict[int, "TreeGraph"]:
|
|
172
|
+
"""Get all TreeGraphs for a session from Redis."""
|
|
173
|
+
result = {}
|
|
174
|
+
try:
|
|
175
|
+
pattern = self._redis_session_pattern(session_id)
|
|
176
|
+
async for key in self.redis.scan_iter(pattern):
|
|
177
|
+
# Extract tree_index from key: treegraph:{session_id}:{tree_index}
|
|
178
|
+
parts = key.split(":")
|
|
179
|
+
if len(parts) == 3:
|
|
180
|
+
tree_index = int(parts[2])
|
|
181
|
+
data = await self.redis.get(key)
|
|
182
|
+
if data:
|
|
183
|
+
result[tree_index] = pickle.loads(data)
|
|
184
|
+
except Exception as e:
|
|
185
|
+
print(f"TreeGraphCache Redis get_all error: {e}")
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
async def _redis_clear_session(self, session_id: str) -> None:
|
|
189
|
+
"""Clear all TreeGraphs for a session from Redis."""
|
|
190
|
+
try:
|
|
191
|
+
pattern = self._redis_session_pattern(session_id)
|
|
192
|
+
keys = []
|
|
193
|
+
async for key in self.redis.scan_iter(pattern):
|
|
194
|
+
keys.append(key)
|
|
195
|
+
if keys:
|
|
196
|
+
await self.redis.delete(*keys)
|
|
197
|
+
print(f"TreeGraphCache cleared {len(keys)} trees for session {session_id[:8]}...")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
print(f"TreeGraphCache Redis clear error: {e}")
|
|
200
|
+
|
|
201
|
+
async def _redis_evict_not_visible(self, session_id: str, visible_indices: set) -> int:
|
|
202
|
+
"""Evict non-visible trees from Redis."""
|
|
203
|
+
evicted = 0
|
|
204
|
+
try:
|
|
205
|
+
pattern = self._redis_session_pattern(session_id)
|
|
206
|
+
keys_to_delete = []
|
|
207
|
+
async for key in self.redis.scan_iter(pattern):
|
|
208
|
+
# Extract tree_index from key: treegraph:{session_id}:{tree_index}
|
|
209
|
+
parts = key.decode().split(":") if isinstance(key, bytes) else key.split(":")
|
|
210
|
+
if len(parts) == 3:
|
|
211
|
+
tree_index = int(parts[2])
|
|
212
|
+
if tree_index not in visible_indices:
|
|
213
|
+
keys_to_delete.append(key)
|
|
214
|
+
if keys_to_delete:
|
|
215
|
+
await self.redis.delete(*keys_to_delete)
|
|
216
|
+
evicted = len(keys_to_delete)
|
|
217
|
+
if evicted > 0:
|
|
218
|
+
print(f"TreeGraphCache evicted {evicted} non-visible trees for session {session_id[:8]}...")
|
|
219
|
+
except Exception as e:
|
|
220
|
+
print(f"TreeGraphCache Redis evict error: {e}")
|
|
221
|
+
return evicted
|
|
222
|
+
|
|
223
|
+
# ==================== Local (In-Memory) Implementation ====================
|
|
224
|
+
|
|
225
|
+
def _local_get(self, session_id: str, tree_index: int) -> Optional["TreeGraph"]:
|
|
226
|
+
"""Get TreeGraph from local cache."""
|
|
227
|
+
session_cache = self._local_cache.get(session_id)
|
|
228
|
+
if session_cache and tree_index in session_cache:
|
|
229
|
+
# Move to end for LRU (most recently used)
|
|
230
|
+
session_cache.move_to_end(tree_index)
|
|
231
|
+
return session_cache[tree_index]
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
async def _local_set(
|
|
235
|
+
self,
|
|
236
|
+
session_id: str,
|
|
237
|
+
tree_index: int,
|
|
238
|
+
tree_graph: "TreeGraph"
|
|
239
|
+
) -> None:
|
|
240
|
+
"""Set TreeGraph in local cache."""
|
|
241
|
+
async with self._lock:
|
|
242
|
+
if session_id not in self._local_cache:
|
|
243
|
+
self._local_cache[session_id] = OrderedDict()
|
|
244
|
+
|
|
245
|
+
session_cache = self._local_cache[session_id]
|
|
246
|
+
session_cache[tree_index] = tree_graph
|
|
247
|
+
|
|
248
|
+
def _local_get_all(self, session_id: str) -> Dict[int, "TreeGraph"]:
|
|
249
|
+
"""Get all TreeGraphs for a session from local cache."""
|
|
250
|
+
session_cache = self._local_cache.get(session_id)
|
|
251
|
+
if session_cache:
|
|
252
|
+
return dict(session_cache)
|
|
253
|
+
return {}
|
|
254
|
+
|
|
255
|
+
async def _local_clear_session(self, session_id: str) -> None:
|
|
256
|
+
"""Clear all TreeGraphs for a session from local cache."""
|
|
257
|
+
async with self._lock:
|
|
258
|
+
if session_id in self._local_cache:
|
|
259
|
+
count = len(self._local_cache[session_id])
|
|
260
|
+
del self._local_cache[session_id]
|
|
261
|
+
print(f"TreeGraphCache cleared {count} trees for session {session_id[:8]}...")
|
|
262
|
+
|
|
263
|
+
async def _local_evict_not_visible(self, session_id: str, visible_indices: set) -> int:
|
|
264
|
+
"""Evict non-visible trees from local cache."""
|
|
265
|
+
evicted = 0
|
|
266
|
+
async with self._lock:
|
|
267
|
+
session_cache = self._local_cache.get(session_id)
|
|
268
|
+
if session_cache:
|
|
269
|
+
keys_to_remove = [idx for idx in session_cache.keys() if idx not in visible_indices]
|
|
270
|
+
for idx in keys_to_remove:
|
|
271
|
+
del session_cache[idx]
|
|
272
|
+
evicted += 1
|
|
273
|
+
if evicted > 0:
|
|
274
|
+
print(f"TreeGraphCache evicted {evicted} non-visible trees for session {session_id[:8]}...")
|
|
275
|
+
return evicted
|
|
276
|
+
|
|
277
|
+
# ==================== Utility Methods ====================
|
|
278
|
+
|
|
279
|
+
def get_stats(self) -> Dict:
|
|
280
|
+
"""Get cache statistics."""
|
|
281
|
+
if self.redis:
|
|
282
|
+
return {
|
|
283
|
+
"mode": "redis",
|
|
284
|
+
"sessions": "N/A (use Redis commands to inspect)"
|
|
285
|
+
}
|
|
286
|
+
else:
|
|
287
|
+
total_trees = sum(len(cache) for cache in self._local_cache.values())
|
|
288
|
+
return {
|
|
289
|
+
"mode": "in-memory",
|
|
290
|
+
"sessions": len(self._local_cache),
|
|
291
|
+
"total_trees": total_trees,
|
|
292
|
+
"eviction_strategy": "visibility-based"
|
|
293
|
+
}
|