PyPI - bella-companion - Versions diffs - 0.0.0__py3-none-any.whl - Mend

bella-companion 0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bella-companion might be problematic. Click here for more details.

Files changed (34) hide show

bella_companion/__init__.py +0 -0
bella_companion/cli.py +24 -0
bella_companion/fbd_empirical/data/body_mass.csv +1378 -0
bella_companion/fbd_empirical/data/change_times.csv +22 -0
bella_companion/fbd_empirical/data/sampling_change_times.csv +6 -0
bella_companion/fbd_empirical/data/trees.nwk +100 -0
bella_companion/fbd_empirical/figure.py +37 -0
bella_companion/fbd_empirical/notbooks.ipynb +359 -0
bella_companion/fbd_empirical/params.json +11 -0
bella_companion/fbd_empirical/run_beast.py +54 -0
bella_companion/fbd_empirical/summarize_logs.py +50 -0
bella_companion/simulations/__init__.py +0 -0
bella_companion/simulations/features.py +7 -0
bella_companion/simulations/figures/__init__.py +0 -0
bella_companion/simulations/figures/epi_explainations.py +101 -0
bella_companion/simulations/figures/epi_predictions.py +58 -0
bella_companion/simulations/figures/fbd_explainations.py +99 -0
bella_companion/simulations/figures/fbd_predictions.py +66 -0
bella_companion/simulations/figures/scenarios.py +87 -0
bella_companion/simulations/figures/utils.py +250 -0
bella_companion/simulations/generate_data.py +25 -0
bella_companion/simulations/run_beast.py +92 -0
bella_companion/simulations/scenarios/__init__.py +20 -0
bella_companion/simulations/scenarios/common.py +29 -0
bella_companion/simulations/scenarios/epi_multitype.py +68 -0
bella_companion/simulations/scenarios/epi_skyline.py +65 -0
bella_companion/simulations/scenarios/fbd_2traits.py +101 -0
bella_companion/simulations/scenarios/fbd_no_traits.py +71 -0
bella_companion/simulations/scenarios/scenario.py +26 -0
bella_companion/simulations/summarize_logs.py +39 -0
bella_companion/utils.py +164 -0
bella_companion-0.0.0.dist-info/METADATA +13 -0
bella_companion-0.0.0.dist-info/RECORD +34 -0
bella_companion-0.0.0.dist-info/WHEEL +4 -0

bella_companion/simulations/scenarios/fbd_no_traits.py ADDED Viewed

@@ -0,0 +1,71 @@
+from functools import partial
+import numpy as np
+from phylogenie import SkylineParameter, get_canonical_events
+from bella_companion.simulations.features import Feature
+from bella_companion.simulations.scenarios.common import (
+    FBD_MAX_TIME,
+    FBD_RATE_UPPER,
+    FBD_SAMPLING_RATE,
+    get_prior_params,
+    get_random_time_series_predictor,
+)
+from bella_companion.simulations.scenarios.scenario import Scenario, ScenarioType
+def _get_scenario(rates: dict[str, list[float]]) -> Scenario:
+    if len(rates["birth"]) != len(rates["death"]):
+        raise ValueError("Birth rate and death rate lists must have the same length.")
+    n_time_bins = len(rates["birth"])
+    change_times = np.linspace(0, FBD_MAX_TIME, n_time_bins + 1)[1:-1].tolist()
+    return Scenario(
+        type=ScenarioType.FBD,
+        max_time=FBD_MAX_TIME,
+        events=get_canonical_events(
+            states=["X"],
+            sampling_rates=FBD_SAMPLING_RATE,
+            remove_after_sampling=False,
+            birth_rates=SkylineParameter(rates["birth"], change_times),
+            death_rates=SkylineParameter(rates["death"], change_times),
+        ),
+        get_random_predictor=partial(
+            get_random_time_series_predictor, n_time_bins=n_time_bins
+        ),
+        beast_args={
+            "processLength": FBD_MAX_TIME,
+            "changeTimes": " ".join(map(str, change_times)),
+            **get_prior_params("birthRate", FBD_RATE_UPPER, n_time_bins),
+            **get_prior_params("deathRate", FBD_RATE_UPPER, n_time_bins),
+            "samplingRate": FBD_SAMPLING_RATE,
+            "timePredictor": " ".join(map(str, np.linspace(0, 1, n_time_bins))),
+        },
+        targets={
+            f"{rate}Rate": {f"{rate}RateSPi{i}": values[i] for i in range(n_time_bins)}
+            for rate, values in rates.items()
+        },
+        features={
+            f"{rate}Rate": {
+                "timePredictor": Feature(
+                    is_binary=False, is_relevant=len(set(values)) > 1
+                ),
+                "randomPredictor": Feature(is_binary=False, is_relevant=False),
+            }
+            for rate, values in rates.items()
+        },
+    )
+RATES = [
+    {"birth": [0.2] * 10, "death": [0.1] * 10},
+    {
+        "birth": np.linspace(0.4, 0.1, 10).tolist(),
+        "death": np.linspace(0.1, 0.2, 10).tolist(),
+    },
+    {
+        "birth": [0.4] * 5 + [0.1] * 3 + [0.01] * 2,
+        "death": [0.05] * 7 + [0.3] * 1 + [0.01] * 2,
+    },
+]
+SCENARIOS = [_get_scenario(r) for r in RATES]

bella_companion/simulations/scenarios/scenario.py ADDED Viewed

@@ -0,0 +1,26 @@
+from collections.abc import Callable
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any
+from numpy.random import Generator
+from phylogenie.treesimulator import Event
+from bella_companion.simulations.features import Feature
+class ScenarioType(Enum):
+    EPI = "epi"
+    FBD = "fbd"
+@dataclass
+class Scenario:
+    type: ScenarioType
+    max_time: float
+    events: list[Event]
+    get_random_predictor: Callable[[Generator], list[float]]
+    beast_args: dict[str, Any]
+    targets: dict[str, dict[str, float]]
+    features: dict[str, dict[str, Feature]]
+    init_state: str | None = None

bella_companion/simulations/summarize_logs.py ADDED Viewed

@@ -0,0 +1,39 @@
+import json
+import os
+import joblib
+from src.config import BEAST_LOGS_SUMMARIES_DIR, BEAST_OUTPUTS_DIR
+from src.simulations.scenarios import SCENARIOS
+from src.utils import summarize_logs
+def main():
+    with open(BEAST_OUTPUTS_DIR / "simulations_job_ids.json", "r") as f:
+        job_ids: dict[str, dict[str, dict[str, str]]] = json.load(f)
+    for scenario_name, scenario in SCENARIOS.items():
+        summaries_dir = BEAST_LOGS_SUMMARIES_DIR / scenario_name
+        os.makedirs(summaries_dir, exist_ok=True)
+        for model in job_ids[scenario_name]:
+            hidden_nodes = (
+                list(map(int, model.split("-")[1].split("_")))
+                if model.startswith("MLP")
+                else None
+            )
+            logs_dir = BEAST_OUTPUTS_DIR / scenario_name / model
+            print(f"Summarizing {scenario_name} - {model}")
+            logs_summary, weights = summarize_logs(
+                logs_dir,
+                target_columns=[c for t in scenario.targets.values() for c in t],
+                hidden_nodes=hidden_nodes,
+                n_features={t: len(fs) for t, fs in scenario.features.items()},
+                job_ids=job_ids[scenario_name][model],
+            )
+            logs_summary.write_csv(summaries_dir / f"{model}.csv")
+            if weights is not None:
+                joblib.dump(weights, summaries_dir / f"{model}.weights.pkl")
+if __name__ == "__main__":
+    main()

bella_companion/utils.py ADDED Viewed

@@ -0,0 +1,164 @@
+import os
+import re
+import subprocess
+from glob import glob
+from pathlib import Path
+from typing import Any
+import arviz as az
+import matplotlib.pyplot as plt
+import numpy as np
+import polars as pl
+from joblib import Parallel, delayed
+from lumiere.backend.typings import Weights
+from tqdm import tqdm
+def run_sbatch(
+    command: str,
+    log_dir: Path,
+    time: str = "240:00:00",
+    mem_per_cpu: str = "2000",
+    overwrite: bool = False,
+) -> str | None:
+    if not overwrite and log_dir.exists():
+        print(f"Log directory {log_dir} already exists. Skipping.")
+        return
+    cmd = " ".join(
+        [
+            "sbatch",
+            f"-J {log_dir}",
+            f"-o {log_dir / 'output.out'}",
+            f"-e {log_dir / 'error.err'}",
+            f"--time {time}",
+            f"--mem-per-cpu={mem_per_cpu}",
+            f"--wrap='{command}'",
+        ]
+    )
+    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    job_id = re.search(r"Submitted batch job (\d+)", output.stdout)
+    if job_id is None:
+        raise RuntimeError(
+            f"Failed to submit job.\nCommand: {cmd}\nOutput: {output.stdout}\nError: {output.stderr}"
+        )
+    return job_id.group(1)
+def get_job_metadata(job_id: str):
+    output = subprocess.run(
+        f"myjobs -j {job_id}", shell=True, capture_output=True, text=True
+    ).stdout
+    status = re.search(r"Status\s+:\s+(\w+)", output)
+    if status is None:
+        raise RuntimeError(f"Failed to get job status for job {job_id}")
+    status = status.group(1)
+    wall_clock = re.search(r"Wall-clock\s+:\s+([\d\-:]+)", output)
+    if wall_clock is None:
+        raise RuntimeError(f"Failed to get wall-clock time for job {job_id}")
+    wall_clock = wall_clock.group(1)
+    if "-" in wall_clock:
+        days, wall_clock = wall_clock.split("-")
+        days = int(days)
+    else:
+        days = 0
+    hours, minutes, seconds = map(int, wall_clock.split(":"))
+    total_hours = days * 24 + hours + minutes / 60 + seconds / 3600
+    return {"status": status, "total_hours": total_hours}
+def summarize_log(
+    log_file: str,
+    target_columns: list[str],
+    burn_in: float = 0.1,
+    hdi_prob: float = 0.95,
+    hidden_nodes: list[int] | None = None,
+    n_weights_samples: int = 100,
+    n_features: dict[str, int] | None = None,
+    job_id: str | None = None,
+) -> tuple[dict[str, Any], dict[str, list[Weights]] | None]:
+    df = pl.read_csv(log_file, separator="\t", comment_prefix="#")
+    df = df.filter(pl.col("Sample") > burn_in * len(df))
+    targets_df = df.select(target_columns)
+    summary: dict[str, Any] = {"n_samples": len(df)}
+    for column in targets_df.columns:
+        summary[f"{column}_median"] = targets_df[column].median()
+        summary[f"{column}_ess"] = az.ess(  # pyright: ignore[reportUnknownMemberType]
+            np.array(targets_df[column])
+        )
+        lower, upper = az.hdi(  # pyright: ignore[reportUnknownMemberType]
+            np.array(targets_df[column]), hdi_prob=hdi_prob
+        )
+        summary[f"{column}_lower"] = lower
+        summary[f"{column}_upper"] = upper
+    if job_id is not None:
+        summary.update(get_job_metadata(job_id))
+    if hidden_nodes is not None:
+        if n_features is None:
+            raise ValueError("`n_features` must be provided to summarize log weights.")
+        weights: dict[str, list[Weights]] = {}
+        for target, n in n_features.items():
+            nodes = [n, *hidden_nodes, 1]
+            layer_weights = [
+                np.array(
+                    df.tail(n_weights_samples).select(
+                        c for c in df.columns if c.startswith(f"{target}W.{i}")
+                    )
+                ).reshape(-1, n_inputs + 1, n_outputs)
+                for i, (n_inputs, n_outputs) in enumerate(zip(nodes[:-1], nodes[1:]))
+            ]
+            weights[target] = [
+                list(sample_weights) for sample_weights in zip(*layer_weights)
+            ]
+        return summary, weights
+    return summary, None
+def summarize_logs(
+    logs_dir: Path,
+    target_columns: list[str],
+    burn_in: float = 0.1,
+    hdi_prob: float = 0.95,
+    hidden_nodes: list[int] | None = None,
+    n_weights_samples: int = 100,
+    n_features: dict[str, int] | None = None,
+    job_ids: dict[str, str] | None = None,
+) -> tuple[pl.DataFrame, dict[str, list[list[Weights]]] | None]:
+    def _get_log_summary(
+        log_file: str,
+    ) -> tuple[dict[str, Any], dict[str, list[Weights]] | None]:
+        log_id = Path(log_file).stem
+        summary, weights = summarize_log(
+            log_file=log_file,
+            target_columns=target_columns,
+            burn_in=burn_in,
+            hdi_prob=hdi_prob,
+            hidden_nodes=hidden_nodes,
+            n_weights_samples=n_weights_samples,
+            n_features=n_features,
+            job_id=job_ids[log_id] if job_ids is not None else None,
+        )
+        return {"id": log_id, **summary}, weights
+    os.environ["POLARS_MAX_THREADS"] = "1"
+    summaries = Parallel(n_jobs=-1)(
+        delayed(_get_log_summary)(log_file)
+        for log_file in tqdm(glob(str(logs_dir / "*.log")))
+    )
+    data, weights = zip(*summaries)
+    if any(w is not None for w in weights):
+        assert n_features is not None
+        return pl.DataFrame(data), {t: [w[t] for w in weights] for t in n_features}
+    return pl.DataFrame(data), None
+def set_plt_rcparams():
+    plt.rcParams["pdf.fonttype"] = 42
+    plt.rcParams["xtick.labelsize"] = 14
+    plt.rcParams["ytick.labelsize"] = 14
+    plt.rcParams["font.size"] = 14
+    plt.rcParams["figure.constrained_layout.use"] = True
+    plt.rcParams["lines.linewidth"] = 3

bella_companion-0.0.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,13 @@
+Metadata-Version: 2.1
+Name: bella-companion
+Version: 0.0.0
+Summary:
+Author: gabriele-marino
+Author-email: gabmarino.8601@gmail.com
+Requires-Python: >=3.10,<4.0
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: dotenv (>=0.9.9,<0.10.0)
+Requires-Dist: phylogenie (>=2.1.21,<3.0.0)

bella_companion-0.0.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,34 @@
+bella_companion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bella_companion/cli.py,sha256=IUODGLiDcxrF40ZjL-SeQtEQhoPgB989KJiXXU0-Pik,576
+bella_companion/fbd_empirical/data/body_mass.csv,sha256=-UkKNtm9m3g4PjY3BcfdP6z5nL_I6p9cq6cgZ-bWKI8,30360
+bella_companion/fbd_empirical/data/change_times.csv,sha256=zmc9_z91-XMwKyIoP9v9dVlLcf4MeIHkQiHLjoMriOo,120
+bella_companion/fbd_empirical/data/sampling_change_times.csv,sha256=Gwi9RcMFy89RyvfxKVZ_MoKVRHOZLuwB_3LEaq8asMQ,32
+bella_companion/fbd_empirical/data/trees.nwk,sha256=zhvLvPLZelhMThVmvOENkmi3p2aPAARb8KMdHTm6mss,4645318
+bella_companion/fbd_empirical/figure.py,sha256=4paOXCB1EcxuHzLPxDSleQU2AQ_ndTedtzS1ugiKICs,1018
+bella_companion/fbd_empirical/notbooks.ipynb,sha256=O45kmz0lZENRDFbKXEWPsIKATfF5GVeS5tCYmrGLnqk,83326
+bella_companion/fbd_empirical/params.json,sha256=hU23LniClZL_GSBAxIEJUJgMa93AM8zdtFOq6mt3vkI,311
+bella_companion/fbd_empirical/run_beast.py,sha256=2sV2UmxOfWmbueiU6D0p3lueMYiZyIkSKYoblTMrYuA,1935
+bella_companion/fbd_empirical/summarize_logs.py,sha256=O6rhE606Wa98a8b1KKlLPjUOro1pfyqVTLdQksQMG0g,1439
+bella_companion/simulations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bella_companion/simulations/features.py,sha256=DZOBpJGlQ0UinqUZYbEtoemZ2eQGVLV_i-DfpW31qJI,104
+bella_companion/simulations/figures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bella_companion/simulations/figures/epi_explainations.py,sha256=RL9fyjl0a_zPhrGdUXqbMMu6471su8B-O6LyuFlHknw,2816
+bella_companion/simulations/figures/epi_predictions.py,sha256=4yXwOBKxUv4kgZdI9zAMEhZ0QCNKZdkAafRQ1RTeaWg,1835
+bella_companion/simulations/figures/fbd_explainations.py,sha256=9Uj7yttpn_TH5HqycW8R-Nlky9A9aFXDXRpXQuT1L4s,3037
+bella_companion/simulations/figures/fbd_predictions.py,sha256=jdXYCLledZEWoPCIuTLhHEPMdeG6YXvf5xZnEOslv-U,2119
+bella_companion/simulations/figures/scenarios.py,sha256=vyybn3Qhfq96N8tvW0wSzpFoHHP8EIc8dkOz63o_Atw,2492
+bella_companion/simulations/figures/utils.py,sha256=sY8wFBg02fv5ugpJ80EqQishD_HEdLwhqsw2LfM7wEo,8539
+bella_companion/simulations/generate_data.py,sha256=H8OV4ZlTGZB-jXaROTPmOsK3UxRiU-GrX40l-shliw8,728
+bella_companion/simulations/run_beast.py,sha256=NBGfb5ZvtrLX5sA6Ku4SNHqmPGoEXFj5DmV54ZR4zVs,3411
+bella_companion/simulations/scenarios/__init__.py,sha256=3Kl1lKcFpfb3vLX64DmSW4XCF5kXU1ZoHtstFH-ZIzU,876
+bella_companion/simulations/scenarios/common.py,sha256=_ddaSuTvEVdttGkXB4HPc2B7IB1F_GBOCW3cVOPZ-ZM,807
+bella_companion/simulations/scenarios/epi_multitype.py,sha256=GWGIiqvYwX_FrT_3RXkZKYGDht9nZ7ceHRBKUvXDPnA,2432
+bella_companion/simulations/scenarios/epi_skyline.py,sha256=JqnOVATECxBUqEbkR5lBlMI2O8k4hO6ipR8k9cHUsm0,2365
+bella_companion/simulations/scenarios/fbd_2traits.py,sha256=sCtdWyV6GQQOIhnL9Dd8NIbAR-StTwUTD9-b_BalmFQ,3552
+bella_companion/simulations/scenarios/fbd_no_traits.py,sha256=R6CH0fVeQg-Iesl39pq2uY8ICVEO4VZbvUVUCGwauJU,2520
+bella_companion/simulations/scenarios/scenario.py,sha256=_FRWAyOFbw94lAzd3zCD-1ek4TrssoiXfXRQPShLiIA,620
+bella_companion/simulations/summarize_logs.py,sha256=TXaO9cjzl5O1u0fPZpRl-9txzoN-p-fkhoAHoRXTfm8,1433
+bella_companion/utils.py,sha256=26cF3oVBbsahYPO9rcK69l43ybg5AjS12IyfucgyVIM,5666
+bella_companion-0.0.0.dist-info/METADATA,sha256=j55dzUiDk-NtHXDt3bAQ3MYH3fkMDKNmwZ4OD71TAm4,446
+bella_companion-0.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+bella_companion-0.0.0.dist-info/RECORD,,

bella_companion-0.0.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: poetry-core 1.9.0
+Root-Is-Purelib: true
+Tag: py3-none-any