PyPI - lorax-arg - Versions diffs - 0.1__py3-none-any.whl - Mend

lorax-arg 0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

lorax/buffer.py +43 -0
lorax/cache/__init__.py +43 -0
lorax/cache/csv_tree_graph.py +59 -0
lorax/cache/disk.py +467 -0
lorax/cache/file_cache.py +142 -0
lorax/cache/file_context.py +72 -0
lorax/cache/lru.py +90 -0
lorax/cache/tree_graph.py +293 -0
lorax/cli.py +312 -0
lorax/cloud/__init__.py +0 -0
lorax/cloud/gcs_utils.py +205 -0
lorax/constants.py +66 -0
lorax/context.py +80 -0
lorax/csv/__init__.py +7 -0
lorax/csv/config.py +250 -0
lorax/csv/layout.py +182 -0
lorax/csv/newick_tree.py +234 -0
lorax/handlers.py +998 -0
lorax/lineage.py +456 -0
lorax/loaders/__init__.py +0 -0
lorax/loaders/csv_loader.py +10 -0
lorax/loaders/loader.py +31 -0
lorax/loaders/tskit_loader.py +119 -0
lorax/lorax_app.py +75 -0
lorax/manager.py +58 -0
lorax/metadata/__init__.py +0 -0
lorax/metadata/loader.py +426 -0
lorax/metadata/mutations.py +146 -0
lorax/modes.py +190 -0
lorax/pg.py +183 -0
lorax/redis_utils.py +30 -0
lorax/routes.py +137 -0
lorax/session_manager.py +206 -0
lorax/sockets/__init__.py +55 -0
lorax/sockets/connection.py +99 -0
lorax/sockets/debug.py +47 -0
lorax/sockets/decorators.py +112 -0
lorax/sockets/file_ops.py +200 -0
lorax/sockets/lineage.py +307 -0
lorax/sockets/metadata.py +232 -0
lorax/sockets/mutations.py +154 -0
lorax/sockets/node_search.py +535 -0
lorax/sockets/tree_layout.py +117 -0
lorax/sockets/utils.py +10 -0
lorax/tree_graph/__init__.py +12 -0
lorax/tree_graph/tree_graph.py +689 -0
lorax/utils.py +124 -0
lorax_app/__init__.py +4 -0
lorax_app/app.py +159 -0
lorax_app/cli.py +114 -0
lorax_app/static/X.png +0 -0
lorax_app/static/assets/index-BCEGlUFi.js +2361 -0
lorax_app/static/assets/index-iKjzUpA9.css +1 -0
lorax_app/static/assets/localBackendWorker-BaWwjSV_.js +2 -0
lorax_app/static/assets/renderDataWorker-BKLdiU7J.js +2 -0
lorax_app/static/gestures/gesture-flick.ogv +0 -0
lorax_app/static/gestures/gesture-two-finger-scroll.ogv +0 -0
lorax_app/static/index.html +14 -0
lorax_app/static/logo.png +0 -0
lorax_app/static/lorax-logo.png +0 -0
lorax_app/static/vite.svg +1 -0
lorax_arg-0.1.dist-info/METADATA +131 -0
lorax_arg-0.1.dist-info/RECORD +66 -0
lorax_arg-0.1.dist-info/WHEEL +5 -0
lorax_arg-0.1.dist-info/entry_points.txt +4 -0
lorax_arg-0.1.dist-info/top_level.txt +2 -0

lorax/lineage.py ADDED Viewed

@@ -0,0 +1,456 @@
+"""
+Lineage Operations for TreeGraph-based ancestry and descendant tracing.
+Provides efficient operations using cached TreeGraph objects:
+- Ancestor tracing: Path from node to root
+- Descendant finding: All nodes below a given node
+- Node search: Filter nodes by metadata/attributes
+"""
+import numpy as np
+from typing import List, Dict, Optional, Any, TYPE_CHECKING
+from collections import deque
+if TYPE_CHECKING:
+    from lorax.tree_graph import TreeGraph
+    from lorax.tree_graph_cache import TreeGraphCache
+async def get_ancestors(
+    tree_graph_cache: "TreeGraphCache",
+    session_id: str,
+    tree_index: int,
+    node_id: int
+) -> Dict[str, Any]:
+    """
+    Get all ancestors of a node (path from node to root).
+    Uses the parent array from the cached TreeGraph to trace the ancestry path.
+    Args:
+        tree_graph_cache: The TreeGraph cache instance
+        session_id: Session identifier
+        tree_index: Tree index to query
+        node_id: Node ID to trace ancestors for
+    Returns:
+        Dict with:
+        - ancestors: List of node IDs from node to root (excluding the query node)
+        - path: List of (node_id, time, x, y) tuples for visualization
+        - error: Error message if tree not cached
+    """
+    tg = await tree_graph_cache.get(session_id, tree_index)
+    if tg is None:
+        return {
+            "error": f"Tree {tree_index} not cached. Request tree layout first.",
+            "ancestors": [],
+            "path": []
+        }
+    # Validate node_id
+    if node_id < 0 or node_id >= len(tg.parent):
+        return {
+            "error": f"Invalid node_id {node_id}",
+            "ancestors": [],
+            "path": []
+        }
+    # Check if node is in this tree
+    if not tg.in_tree[node_id]:
+        return {
+            "error": f"Node {node_id} is not in tree {tree_index}",
+            "ancestors": [],
+            "path": []
+        }
+    ancestors = []
+    path = []
+    # Include starting node in path
+    path.append({
+        "node_id": int(node_id),
+        "time": float(tg.time[node_id]),
+        "x": float(tg.y[node_id]),  # Backend convention: y = time -> frontend x
+        "y": float(tg.x[node_id])   # Backend convention: x = layout -> frontend y
+    })
+    current = node_id
+    while True:
+        parent = tg.parent[current]
+        if parent == -1:
+            # Reached root
+            break
+        ancestors.append(int(parent))
+        path.append({
+            "node_id": int(parent),
+            "time": float(tg.time[parent]),
+            "x": float(tg.y[parent]),
+            "y": float(tg.x[parent])
+        })
+        current = parent
+    return {
+        "ancestors": ancestors,
+        "path": path,
+        "tree_index": tree_index,
+        "query_node": node_id
+    }
+async def get_descendants(
+    tree_graph_cache: "TreeGraphCache",
+    session_id: str,
+    tree_index: int,
+    node_id: int,
+    include_tips_only: bool = False
+) -> Dict[str, Any]:
+    """
+    Get all descendants of a node (BFS traversal down the tree).
+    Uses the CSR children structure from the cached TreeGraph.
+    Args:
+        tree_graph_cache: The TreeGraph cache instance
+        session_id: Session identifier
+        tree_index: Tree index to query
+        node_id: Node ID to find descendants for
+        include_tips_only: If True, only return tip (leaf) nodes
+    Returns:
+        Dict with:
+        - descendants: List of node IDs that are descendants
+        - tips: List of tip node IDs (always included for convenience)
+        - error: Error message if tree not cached
+    """
+    tg = await tree_graph_cache.get(session_id, tree_index)
+    if tg is None:
+        return {
+            "error": f"Tree {tree_index} not cached. Request tree layout first.",
+            "descendants": [],
+            "tips": []
+        }
+    # Validate node_id
+    if node_id < 0 or node_id >= len(tg.parent):
+        return {
+            "error": f"Invalid node_id {node_id}",
+            "descendants": [],
+            "tips": []
+        }
+    # Check if node is in this tree
+    if not tg.in_tree[node_id]:
+        return {
+            "error": f"Node {node_id} is not in tree {tree_index}",
+            "descendants": [],
+            "tips": []
+        }
+    descendants = []
+    tips = []
+    # BFS traversal using deque for efficiency
+    queue = deque(tg.children(node_id).tolist())
+    while queue:
+        child = queue.popleft()
+        descendants.append(int(child))
+        # Check if tip (no children)
+        if tg.is_tip(child):
+            tips.append(int(child))
+        else:
+            # Add children to queue
+            queue.extend(tg.children(child).tolist())
+    result = {
+        "tips": tips,
+        "tree_index": tree_index,
+        "query_node": node_id,
+        "total_descendants": len(descendants)
+    }
+    if include_tips_only:
+        result["descendants"] = tips
+    else:
+        result["descendants"] = descendants
+    return result
+async def search_nodes_by_criteria(
+    tree_graph_cache: "TreeGraphCache",
+    session_id: str,
+    tree_index: int,
+    criteria: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Search for nodes matching specified criteria.
+    Supported criteria:
+    - min_time: Minimum node time
+    - max_time: Maximum node time
+    - is_tip: True for tips only, False for internal nodes only
+    - has_children: True for nodes with children, False for tips
+    - node_ids: List of specific node IDs to filter to
+    Args:
+        tree_graph_cache: The TreeGraph cache instance
+        session_id: Session identifier
+        tree_index: Tree index to search
+        criteria: Dict of filter criteria
+    Returns:
+        Dict with:
+        - matches: List of matching node IDs
+        - positions: List of {node_id, x, y, time} for each match
+        - error: Error message if tree not cached
+    """
+    tg = await tree_graph_cache.get(session_id, tree_index)
+    if tg is None:
+        return {
+            "error": f"Tree {tree_index} not cached. Request tree layout first.",
+            "matches": [],
+            "positions": []
+        }
+    matches = []
+    positions = []
+    # Get all nodes in this tree
+    in_tree_indices = np.where(tg.in_tree)[0]
+    # Optional: filter to specific node IDs first
+    node_id_filter = criteria.get("node_ids")
+    if node_id_filter is not None:
+        node_id_set = set(node_id_filter)
+        in_tree_indices = [n for n in in_tree_indices if n in node_id_set]
+    for node_id in in_tree_indices:
+        if _matches_criteria(tg, node_id, criteria):
+            matches.append(int(node_id))
+            positions.append({
+                "node_id": int(node_id),
+                "x": float(tg.y[node_id]),  # time -> frontend x
+                "y": float(tg.x[node_id]),  # layout -> frontend y
+                "time": float(tg.time[node_id])
+            })
+    return {
+        "matches": matches,
+        "positions": positions,
+        "tree_index": tree_index,
+        "criteria": criteria,
+        "total_matches": len(matches)
+    }
+def _matches_criteria(tg: "TreeGraph", node_id: int, criteria: Dict[str, Any]) -> bool:
+    """
+    Check if a node matches all specified criteria.
+    Args:
+        tg: TreeGraph object
+        node_id: Node ID to check
+        criteria: Dict of filter criteria
+    Returns:
+        True if node matches all criteria
+    """
+    # Time range filters
+    if "min_time" in criteria:
+        if tg.time[node_id] < criteria["min_time"]:
+            return False
+    if "max_time" in criteria:
+        if tg.time[node_id] > criteria["max_time"]:
+            return False
+    # Tip/internal filter
+    if "is_tip" in criteria:
+        is_tip = tg.is_tip(node_id)
+        if criteria["is_tip"] != is_tip:
+            return False
+    # Has children filter (inverse of is_tip)
+    if "has_children" in criteria:
+        has_children = not tg.is_tip(node_id)
+        if criteria["has_children"] != has_children:
+            return False
+    # Y (layout) position range
+    if "min_y" in criteria:
+        if tg.x[node_id] < criteria["min_y"]:  # x in backend = y in frontend
+            return False
+    if "max_y" in criteria:
+        if tg.x[node_id] > criteria["max_y"]:
+            return False
+    return True
+async def get_subtree(
+    tree_graph_cache: "TreeGraphCache",
+    session_id: str,
+    tree_index: int,
+    root_node_id: int
+) -> Dict[str, Any]:
+    """
+    Get the complete subtree rooted at a given node.
+    Returns all nodes in the subtree with their structure preserved.
+    Args:
+        tree_graph_cache: The TreeGraph cache instance
+        session_id: Session identifier
+        tree_index: Tree index to query
+        root_node_id: Root of the subtree
+    Returns:
+        Dict with:
+        - nodes: List of {node_id, parent_id, x, y, time, is_tip}
+        - edges: List of {parent, child} pairs
+        - error: Error message if tree not cached
+    """
+    tg = await tree_graph_cache.get(session_id, tree_index)
+    if tg is None:
+        return {
+            "error": f"Tree {tree_index} not cached. Request tree layout first.",
+            "nodes": [],
+            "edges": []
+        }
+    # Validate node_id
+    if root_node_id < 0 or root_node_id >= len(tg.parent):
+        return {
+            "error": f"Invalid node_id {root_node_id}",
+            "nodes": [],
+            "edges": []
+        }
+    if not tg.in_tree[root_node_id]:
+        return {
+            "error": f"Node {root_node_id} is not in tree {tree_index}",
+            "nodes": [],
+            "edges": []
+        }
+    nodes = []
+    edges = []
+    # BFS to collect all nodes and edges
+    queue = deque([root_node_id])
+    visited = set()
+    while queue:
+        node_id = queue.popleft()
+        if node_id in visited:
+            continue
+        visited.add(node_id)
+        nodes.append({
+            "node_id": int(node_id),
+            "parent_id": int(tg.parent[node_id]),
+            "x": float(tg.y[node_id]),
+            "y": float(tg.x[node_id]),
+            "time": float(tg.time[node_id]),
+            "is_tip": tg.is_tip(node_id)
+        })
+        children = tg.children(node_id)
+        for child in children:
+            edges.append({
+                "parent": int(node_id),
+                "child": int(child)
+            })
+            queue.append(child)
+    return {
+        "nodes": nodes,
+        "edges": edges,
+        "tree_index": tree_index,
+        "root_node": root_node_id,
+        "total_nodes": len(nodes)
+    }
+async def get_mrca(
+    tree_graph_cache: "TreeGraphCache",
+    session_id: str,
+    tree_index: int,
+    node_ids: List[int]
+) -> Dict[str, Any]:
+    """
+    Find the Most Recent Common Ancestor (MRCA) of a set of nodes.
+    Uses ancestor tracing and intersection to find the MRCA.
+    Args:
+        tree_graph_cache: The TreeGraph cache instance
+        session_id: Session identifier
+        tree_index: Tree index to query
+        node_ids: List of node IDs to find MRCA for
+    Returns:
+        Dict with:
+        - mrca: Node ID of the MRCA, or None if not found
+        - mrca_time: Time of the MRCA node
+        - mrca_position: {x, y} of the MRCA
+        - error: Error message if tree not cached
+    """
+    if not node_ids or len(node_ids) < 2:
+        return {
+            "error": "Need at least 2 nodes to find MRCA",
+            "mrca": None
+        }
+    tg = await tree_graph_cache.get(session_id, tree_index)
+    if tg is None:
+        return {
+            "error": f"Tree {tree_index} not cached. Request tree layout first.",
+            "mrca": None
+        }
+    # Validate all nodes
+    for node_id in node_ids:
+        if node_id < 0 or node_id >= len(tg.parent):
+            return {"error": f"Invalid node_id {node_id}", "mrca": None}
+        if not tg.in_tree[node_id]:
+            return {"error": f"Node {node_id} not in tree {tree_index}", "mrca": None}
+    # Get ancestor sets for each node
+    ancestor_sets = []
+    for node_id in node_ids:
+        ancestors = set()
+        current = node_id
+        while current != -1:
+            ancestors.add(current)
+            current = tg.parent[current]
+        ancestor_sets.append(ancestors)
+    # Find intersection (common ancestors)
+    common_ancestors = ancestor_sets[0]
+    for ancestor_set in ancestor_sets[1:]:
+        common_ancestors = common_ancestors.intersection(ancestor_set)
+    if not common_ancestors:
+        return {
+            "error": "No common ancestor found",
+            "mrca": None,
+            "tree_index": tree_index
+        }
+    # MRCA is the common ancestor with the highest time (most recent)
+    mrca = max(common_ancestors, key=lambda n: tg.time[n])
+    return {
+        "mrca": int(mrca),
+        "mrca_time": float(tg.time[mrca]),
+        "mrca_position": {
+            "x": float(tg.y[mrca]),
+            "y": float(tg.x[mrca])
+        },
+        "tree_index": tree_index,
+        "query_nodes": node_ids
+    }

lorax/loaders/__init__.py ADDED Viewed

File without changes

lorax/loaders/csv_loader.py ADDED Viewed

@@ -0,0 +1,10 @@
+from lorax.csv.config import CsvConfigOptions, build_csv_config
+def get_config_csv(df, file_path, root_dir, window_size=50000):
+    """Extract configuration from a Newick-per-row CSV file.
+    Kept as a thin wrapper for backwards compatibility; real logic lives in
+    `lorax.csv.config` for encapsulation and re-use.
+    """
+    return build_csv_config(df, str(file_path), options=CsvConfigOptions(window_size=window_size))

lorax/loaders/loader.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+Config computation for loaded files.
+Provides compute_config() which creates configuration from a tree sequence.
+The config is cached within FileContext (see cache/file_cache.py), not here.
+"""
+from lorax.loaders.csv_loader import get_config_csv
+from lorax.loaders.tskit_loader import get_config_tskit
+def compute_config(ts, file_path, root_dir):
+    """
+    Compute config for a tree sequence.
+    This function is called by file_cache.py when loading a new FileContext.
+    The config is cached within the FileContext, so this function does
+    not maintain its own cache.
+    Args:
+        ts: tskit.TreeSequence or pandas.DataFrame (for CSV)
+        file_path: Path to the source file
+        root_dir: Root directory for relative paths
+    Returns:
+        dict: Configuration including intervals, sample counts, etc.
+    """
+    if file_path.endswith('.tsz') or file_path.endswith('.trees'):
+        return get_config_tskit(ts, file_path, root_dir)
+    else:
+        return get_config_csv(ts, file_path, root_dir)

lorax/loaders/tskit_loader.py ADDED Viewed

@@ -0,0 +1,119 @@
+import os
+import tskit
+from lorax.metadata.loader import get_metadata_schema
+def _get_project_name(file_path, root_dir):
+    """Return the immediate parent directory name for the file."""
+    if not file_path:
+        return None
+    try:
+        parent_dir = os.path.basename(os.path.dirname(str(file_path)))
+    except Exception:
+        parent_dir = None
+    if parent_dir:
+        return parent_dir
+    if root_dir:
+        return os.path.basename(os.path.normpath(str(root_dir)))
+    return None
+def get_config_tskit(ts, file_path, root_dir):
+    """Extract configuration and metadata from a tree sequence file.
+    Note: Uses get_metadata_schema() for lightweight initial load.
+    Full metadata mappings are fetched on-demand via fetch_metadata_for_key.
+    """
+    try:
+        intervals = list(ts.breakpoints())
+        times = [ts.min_time, ts.max_time]
+        genome_length = ts.sequence_length
+        # Timeline unit label for UI: normalize unknown -> "Time"
+        time_units = getattr(ts, "time_units", None)
+        time_units_str = str(time_units) if time_units is not None else "unknown"
+        timeline_type = "Coalescent Time" if time_units_str.strip().lower() == "unknown" else time_units_str
+        # Compute centered initial position (10% of genome, minimum 1kb)
+        window_size = max(genome_length * 0.1, 1000)
+        midpoint = genome_length / 2.0
+        start = max(0, midpoint - window_size / 2.0)
+        end = min(genome_length, midpoint + window_size / 2.0)
+        sample_names = {}
+        # Use schema-only extraction for lightweight initial load
+        metadata_schema = get_metadata_schema(ts, sources=("individual", "node", "population"))
+        filename = os.path.basename(file_path)
+        project_name = _get_project_name(file_path, root_dir)
+        config = {
+            'genome_length': genome_length,
+            'initial_position': [int(start), int(end)],
+            'times': {'type': timeline_type, 'values': times},
+            'intervals': intervals,
+            'filename': str(filename),
+            'project': project_name,
+            # node_times removed - now sent per-query from handle_layout_query for efficiency
+            # 'mutations': extract_node_mutations_tables(ts),
+            # 'mutations_by_node': extract_mutations_by_node(ts),
+            'sample_names': sample_names,
+            # Send schema only - full mappings fetched on-demand
+            'metadata_schema': metadata_schema
+        }
+        return config
+    except Exception as e:
+        print("Error in get_config", e)
+        return None
+def extract_node_mutations_tables(ts):
+    """Extract mutations keyed by position for UI display."""
+    t = ts.tables
+    s, m = t.sites, t.mutations
+    pos = s.position[m.site]
+    anc = ts.sites_ancestral_state
+    der = ts.mutations_derived_state
+    nodes = m.node  # Node IDs for each mutation
+    out = {}
+    for p, a, d, node_id in zip(pos, anc, der, nodes):
+        if a == d:
+            continue
+        out[str(int(p))] = {
+            "mutation": f"{a}->{d}",
+            "node": int(node_id)
+        }
+    return out
+def extract_mutations_by_node(ts):
+    """Extract mutations grouped by node ID for tree building.
+    Returns:
+        dict: {node_id (int): [{position, mutation_str}, ...]}
+    """
+    t = ts.tables
+    s, m = t.sites, t.mutations
+    pos = s.position[m.site]
+    anc = ts.sites_ancestral_state
+    der = ts.mutations_derived_state
+    nodes = m.node
+    out = {}
+    for p, a, d, node_id in zip(pos, anc, der, nodes):
+        if a == d:
+            continue
+        node_id = int(node_id)
+        if node_id not in out:
+            out[node_id] = []
+        out[node_id].append({
+            "position": int(p),
+            "mutation": f"{a}{int(p)}{d}"
+        })
+    return out

lorax/lorax_app.py ADDED Viewed

@@ -0,0 +1,75 @@
+"""
+Socket.IO version of the Lorax backend (single-process, no Gunicorn).
+Run with:
+    uvicorn lorax_socketio_app:sio_app --host 0.0.0.0 --port 8080 --reload
+"""
+import os
+import socketio
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.middleware.gzip import GZipMiddleware
+from dotenv import load_dotenv
+from lorax.context import REDIS_CLUSTER_URL, REDIS_CLUSTER
+from lorax.constants import (
+    SOCKET_PING_TIMEOUT, SOCKET_PING_INTERVAL, MAX_HTTP_BUFFER_SIZE
+)
+from lorax.routes import router
+from lorax.sockets import register_socket_events
+load_dotenv()
+# Setup
+app = FastAPI(title="Lorax Backend", version="1.0.0")
+app.add_middleware(GZipMiddleware, minimum_size=1000)
+ALLOWED_ORIGINS = [
+    o.strip() for o in os.getenv("ALLOWED_ORIGINS", "http://localhost:5173,http://localhost:3001,http://localhost:3000").split(",")
+]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=ALLOWED_ORIGINS,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+_client_manager = None
+if REDIS_CLUSTER_URL and not REDIS_CLUSTER:
+    _client_manager = socketio.AsyncRedisManager(REDIS_CLUSTER_URL)
+elif REDIS_CLUSTER_URL and REDIS_CLUSTER:
+    print("Warning: Socket.IO Redis manager does not support Redis Cluster; running without shared manager.")
+if _client_manager:
+    sio = socketio.AsyncServer(
+        async_mode="asgi",
+        cors_allowed_origins="*",
+        client_manager=_client_manager,
+        logger=False,
+        engineio_logger=False,
+        ping_timeout=SOCKET_PING_TIMEOUT,
+        ping_interval=SOCKET_PING_INTERVAL,
+        max_http_buffer_size=MAX_HTTP_BUFFER_SIZE
+    )
+else:
+    sio = socketio.AsyncServer(
+        async_mode="asgi",
+        cors_allowed_origins="*",
+        logger=False,
+        engineio_logger=False,
+        ping_timeout=SOCKET_PING_TIMEOUT,
+        ping_interval=SOCKET_PING_INTERVAL,
+        max_http_buffer_size=MAX_HTTP_BUFFER_SIZE
+    )
+sio_app = socketio.ASGIApp(sio, other_asgi_app=app)
+# Include Routes
+app.include_router(router)
+# Register Socket Events
+register_socket_events(sio)