PyPI - bella-companion - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

bella-companion 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of bella-companion might be problematic. Click here for more details.

Files changed (13) hide show

bella_companion/cli.py +16 -6
bella_companion/simulations/__init__.py +3 -1
bella_companion/simulations/figures/epi_explainations.py +10 -2
bella_companion/simulations/run_beast.py +20 -34
bella_companion/simulations/summarize_logs.py +13 -21
bella_companion/utils/__init__.py +4 -0
bella_companion/utils/beast.py +69 -0
bella_companion/utils/slurm.py +58 -0
{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/METADATA +4 -2
{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/RECORD +12 -10
bella_companion/utils.py +0 -164
{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/WHEEL +0 -0
{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/entry_points.txt +0 -0

bella_companion/cli.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import argparse
+import os
+from pathlib import Path
 from dotenv import load_dotenv
-from bella_companion.simulations import generate_data
+from bella_companion.simulations import generate_data, run_beast, summarize_logs
 def main():
-    load_dotenv()
+    load_dotenv(Path(os.getcwd()) / ".env")
     parser = argparse.ArgumentParser(
         prog="bella",
@@ -14,10 +17,17 @@ def main():
     subparsers = parser.add_subparsers(dest="command", required=True)
-    gen_sim_data_parser = subparsers.add_parser(
-        "generate-simulations-data", help="Generate simulation data"
-    )
-    gen_sim_data_parser.set_defaults(func=generate_data)
+    subparsers.add_parser(
+        "generate-simulations-data", help="Generate simulated data."
+    ).set_defaults(func=generate_data)
+    subparsers.add_parser(
+        "run-beast-simulations", help="Run BEAST2 on simulated data."
+    ).set_defaults(func=run_beast)
+    subparsers.add_parser(
+        "summarize-simulation-logs", help="Summarize simulation logs."
+    ).set_defaults(func=summarize_logs)
     args = parser.parse_args()
     args.func()

bella_companion/simulations/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from bella_companion.simulations.generate_data import generate_data
+from bella_companion.simulations.run_beast import run_beast
+from bella_companion.simulations.summarize_logs import summarize_logs
-__all__ = ["generate_data"]
+__all__ = ["generate_data", "run_beast", "summarize_logs"]

bella_companion/simulations/figures/epi_explainations.py CHANGED Viewed

@@ -5,9 +5,8 @@ import joblib
 import matplotlib.pyplot as plt
 import numpy as np
 import polars as pl
-from lumiere.backend import sigmoid
 import src.config as cfg
+from lumiere.backend import sigmoid
 from src.simulations.figures.utils import (
     plot_partial_dependencies,
     plot_shap_features_importance,
@@ -21,6 +20,15 @@ from src.simulations.scenarios.epi_multitype import (
 from src.utils import set_plt_rcparams
+def set_plt_rcparams():
+    plt.rcParams["pdf.fonttype"] = 42
+    plt.rcParams["xtick.labelsize"] = 14
+    plt.rcParams["ytick.labelsize"] = 14
+    plt.rcParams["font.size"] = 14
+    plt.rcParams["figure.constrained_layout.use"] = True
+    plt.rcParams["lines.linewidth"] = 3
 def _plot_predictions(log_summary: pl.DataFrame, output_dir: str):
     sort_idx = np.argsort(MIGRATION_PREDICTOR.flatten())

bella_companion/simulations/run_beast.py CHANGED Viewed

@@ -9,20 +9,23 @@ from phylogenie import Tree, load_newick
 from phylogenie.utils import get_node_depths
 from tqdm import tqdm
-import config as cfg
 from bella_companion.simulations.scenarios import SCENARIOS, ScenarioType
-from bella_companion.utils import run_sbatch
+from bella_companion.utils import submit_job
-def main():
+def run_beast():
     rng = default_rng(42)
+    base_data_dir = Path(os.environ["BELLA_SIMULATIONS_DATA_DIR"])
+    base_output_dir = Path(os.environ["BELLA_BEAST_OUTPUT_DIR"])
     job_ids = {}
     for scenario_name, scenario in SCENARIOS.items():
         job_ids[scenario_name] = defaultdict(dict)
-        data_dir = cfg.SIMULATED_DATA_DIR / scenario_name
-        inference_configs_dir = (
+        data_dir = base_data_dir / scenario_name
+        inference_configs_dir = Path(os.environ["BELLA_BEAST_CONFIGS_DIR"]) / (
             scenario_name.split("_")[0] if "_" in scenario_name else scenario_name
         )
+        log_dir = Path(os.environ["BELLA_SBATCH_LOG_DIR"]) / scenario_name
         for tree_file in tqdm(
             glob(str(data_dir / "*.nwk")),
             desc=f"Submitting BEAST2 jobs for {scenario_name}",
@@ -31,11 +34,13 @@ def main():
             for model in ["Nonparametric", "GLM"] + [
                 f"MLP-{hidden_nodes}" for hidden_nodes in ["3_2", "16_8", "32_16"]
             ]:
-                outputs_dir = cfg.BEAST_OUTPUTS_DIR / scenario_name / model
-                os.makedirs(outputs_dir, exist_ok=True)
+                output_dir = base_output_dir / scenario_name / model
+                os.makedirs(output_dir, exist_ok=True)
                 beast_args = [
                     f"-D treeFile={tree_file},treeID={tree_id}",
-                    f"-prefix {outputs_dir}{os.sep}",
+                    f"-prefix {output_dir}{os.sep}",
+                    f'-D randomPredictor="{" ".join(map(str, scenario.get_random_predictor(rng)))}"',
                 ]
                 beast_args.extend(
                     [
@@ -43,9 +48,6 @@ def main():
                         for key, value in scenario.beast_args.items()
                     ]
                 )
-                beast_args.append(
-                    f'-D randomPredictor="{" ".join(map(str, scenario.get_random_predictor(rng)))}"'
-                )
                 if scenario.type == ScenarioType.EPI:
                     tree = load_newick(tree_file)
                     assert isinstance(tree, Tree)
@@ -53,40 +55,24 @@ def main():
                         f"-D lastSampleTime={max(get_node_depths(tree).values())}"
                     )
+                base_command = [os.environ["BELLA_RUN_BEAST_CMD"], *beast_args]
                 if model in ["Nonparametric", "GLM"]:
                     command = " ".join(
-                        [
-                            cfg.RUN_BEAST,
-                            *beast_args,
-                            str(
-                                cfg.BEAST_CONFIGS_DIR
-                                / inference_configs_dir
-                                / f"{model}.xml"
-                            ),
-                        ]
+                        [*base_command, str(inference_configs_dir / f"{model}.xml")]
                     )
                 else:
                     nodes = model.split("-")[1].split("_")
                     command = " ".join(
                         [
-                            cfg.RUN_BEAST,
-                            *beast_args,
+                            *base_command,
                             f'-D nodes="{" ".join(map(str, nodes))}"',
-                            str(
-                                cfg.BEAST_CONFIGS_DIR
-                                / inference_configs_dir
-                                / "MLP.xml"
-                            ),
+                            str(inference_configs_dir / "MLP.xml"),
                         ]
                     )
-                job_ids[scenario_name][model][tree_id] = run_sbatch(
-                    command, cfg.SBATCH_LOGS_DIR / scenario_name / model / tree_id
+                job_ids[scenario_name][model][tree_id] = submit_job(
+                    command, log_dir / model / tree_id
                 )
-    with open(cfg.BEAST_OUTPUTS_DIR / "simulations_job_ids.json", "w") as f:
+    with open(base_output_dir / "simulations_job_ids.json", "w") as f:
         json.dump(job_ids, f)
-if __name__ == "__main__":
-    main()

bella_companion/simulations/summarize_logs.py CHANGED Viewed

@@ -1,39 +1,31 @@
 import json
 import os
+from pathlib import Path
 import joblib
-from src.config import BEAST_LOGS_SUMMARIES_DIR, BEAST_OUTPUTS_DIR
-from src.simulations.scenarios import SCENARIOS
-from src.utils import summarize_logs
+from bella_companion.simulations.scenarios import SCENARIOS
+from bella_companion.utils import summarize_logs as _summarize_logs
+from bella_companion.utils import summarize_weights
-def main():
-    with open(BEAST_OUTPUTS_DIR / "simulations_job_ids.json", "r") as f:
+def summarize_logs():
+    output_dir = Path(os.environ["BELLA_BEAST_OUTPUT_DIR"])
+    with open(output_dir / "simulations_job_ids.json", "r") as f:
         job_ids: dict[str, dict[str, dict[str, str]]] = json.load(f)
     for scenario_name, scenario in SCENARIOS.items():
-        summaries_dir = BEAST_LOGS_SUMMARIES_DIR / scenario_name
+        summaries_dir = Path(os.environ["BEAST_LOGS_SUMMARIES_DIR"]) / scenario_name
         os.makedirs(summaries_dir, exist_ok=True)
         for model in job_ids[scenario_name]:
-            hidden_nodes = (
-                list(map(int, model.split("-")[1].split("_")))
-                if model.startswith("MLP")
-                else None
-            )
-            logs_dir = BEAST_OUTPUTS_DIR / scenario_name / model
+            logs_dir = output_dir / scenario_name / model
             print(f"Summarizing {scenario_name} - {model}")
-            logs_summary, weights = summarize_logs(
+            summary = _summarize_logs(
                 logs_dir,
                 target_columns=[c for t in scenario.targets.values() for c in t],
-                hidden_nodes=hidden_nodes,
-                n_features={t: len(fs) for t, fs in scenario.features.items()},
                 job_ids=job_ids[scenario_name][model],
             )
-            logs_summary.write_csv(summaries_dir / f"{model}.csv")
-            if weights is not None:
+            summary.write_csv(summaries_dir / f"{model}.csv")
+            if model.startswith("MLP"):
+                weights = summarize_weights(logs_dir)
                 joblib.dump(weights, summaries_dir / f"{model}.weights.pkl")
-if __name__ == "__main__":
-    main()

bella_companion/utils/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from bella_companion.utils.beast import summarize_log, summarize_logs, summarize_weights
+from bella_companion.utils.slurm import submit_job
+__all__ = ["submit_job", "summarize_log", "summarize_logs", "summarize_weights"]

bella_companion/utils/beast.py ADDED Viewed

@@ -0,0 +1,69 @@
+import os
+from functools import partial
+from glob import glob
+from pathlib import Path
+from typing import Any
+import arviz as az
+import numpy as np
+import polars as pl
+from joblib import Parallel, delayed
+from lumiere import read_log_file, read_weights
+from lumiere.backend.typing import Weights
+from tqdm import tqdm
+from bella_companion.utils.slurm import get_job_metadata
+def summarize_log(
+    log_file: str,
+    target_columns: list[str],
+    burn_in: int | float = 0.1,
+    hdi_prob: float = 0.95,
+    job_id: str | None = None,
+) -> dict[str, Any]:
+    log = read_log_file(log_file, burn_in=burn_in)
+    log = log.select(target_columns)
+    summary: dict[str, Any] = {"id": Path(log_file).stem, "n_samples": len(log)}
+    for column in log.columns:
+        summary[f"{column}_median"] = log[column].median()
+        summary[f"{column}_ess"] = az.ess(np.array(log[column]))  # pyright: ignore
+        lower, upper = az.hdi(np.array(log[column]), hdi_prob)  # pyright: ignore
+        summary[f"{column}_lower"] = lower
+        summary[f"{column}_upper"] = upper
+    if job_id is not None:
+        summary.update(get_job_metadata(job_id))
+    return summary
+def summarize_logs(
+    logs_dir: Path,
+    target_columns: list[str],
+    burn_in: float = 0.1,
+    hdi_prob: float = 0.95,
+    job_ids: dict[str, str] | None = None,
+) -> pl.DataFrame:
+    os.environ["POLARS_MAX_THREADS"] = "1"
+    summaries = Parallel(n_jobs=-1)(
+        delayed(
+            partial(
+                summarize_log,
+                target_columns=target_columns,
+                burn_in=burn_in,
+                hdi_prob=hdi_prob,
+                job_id=None if job_ids is None else job_ids[Path(log_file).stem],
+            )
+        )(log_file)
+        for log_file in tqdm(glob(str(logs_dir / "*.log")))
+    )
+    return pl.DataFrame(summaries)
+def summarize_weights(
+    logs_dir: Path, n_samples: int = 100, burn_in: float = 0.1
+) -> list[dict[str, list[Weights]]]:
+    os.environ["POLARS_MAX_THREADS"] = "1"
+    return Parallel(n_jobs=-1)(
+        delayed(partial(read_weights, burn_in=burn_in, n_samples=n_samples))(log_file)
+        for log_file in tqdm(glob(str(logs_dir / "*.log")))
+    )

bella_companion/utils/slurm.py ADDED Viewed

@@ -0,0 +1,58 @@
+import re
+import subprocess
+from pathlib import Path
+def submit_job(
+    command: str, log_dir: Path, time: str = "240:00:00", mem_per_cpu: str = "2000"
+) -> str | None:
+    if log_dir.exists():
+        print(f"Log directory {log_dir} already exists. Skipping.")
+        return
+    cmd = " ".join(
+        [
+            "sbatch",
+            f"-J {log_dir}",
+            f"-o {log_dir / 'output.out'}",
+            f"-e {log_dir / 'error.err'}",
+            f"--time {time}",
+            f"--mem-per-cpu={mem_per_cpu}",
+            f"--wrap='{command}'",
+        ]
+    )
+    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
+    job_id = re.search(r"Submitted batch job (\d+)", output.stdout)
+    if job_id is None:
+        raise RuntimeError(
+            f"Failed to submit job.\n"
+            f"Command: {cmd}\n"
+            f"Output: {output.stdout}\n"
+            f"Error: {output.stderr}"
+        )
+    return job_id.group(1)
+def get_job_metadata(job_id: str):
+    output = subprocess.run(
+        f"myjobs -j {job_id}", shell=True, capture_output=True, text=True
+    ).stdout
+    status = re.search(r"Status\s+:\s+(\w+)", output)
+    if status is None:
+        raise ValueError(f"Failed to get job status for job {job_id}")
+    status = status.group(1)
+    wall_clock = re.search(r"Wall-clock\s+:\s+([\d\-:]+)", output)
+    if wall_clock is None:
+        raise ValueError(f"Failed to get wall-clock time for job {job_id}")
+    wall_clock = wall_clock.group(1)
+    if "-" in wall_clock:
+        days, wall_clock = wall_clock.split("-")
+        days = int(days)
+    else:
+        days = 0
+    hours, minutes, seconds = map(int, wall_clock.split(":"))
+    total_hours = days * 24 + hours + minutes / 60 + seconds / 3600
+    return {"status": status, "total_hours": total_hours}

{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bella-companion
-Version: 0.0.1
+Version: 0.0.2
 Summary:
 Author: gabriele-marino
 Author-email: gabmarino.8601@gmail.com
@@ -9,5 +9,7 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: arviz (>=0.22.0,<0.23.0)
+Requires-Dist: bella-lumiere (>=0.0.10,<0.0.11)
 Requires-Dist: dotenv (>=0.9.9,<0.10.0)
-Requires-Dist: phylogenie (>=2.1.15,<3.0.0)
+Requires-Dist: phylogenie (>=2.1.27,<3.0.0)

{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 bella_companion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-bella_companion/cli.py,sha256=rhtIN7fCnbbkzIb97b3KdBjgNJsltoMKUVQMGbf-tCg,653
+bella_companion/cli.py,sha256=0sPnzGyUGo2OBZ0rj17ZGzMdwNH0o-BXKsYtCJjzGvQ,968
 bella_companion/fbd_empirical/data/body_mass.csv,sha256=-UkKNtm9m3g4PjY3BcfdP6z5nL_I6p9cq6cgZ-bWKI8,30360
 bella_companion/fbd_empirical/data/change_times.csv,sha256=zmc9_z91-XMwKyIoP9v9dVlLcf4MeIHkQiHLjoMriOo,120
 bella_companion/fbd_empirical/data/sampling_change_times.csv,sha256=Gwi9RcMFy89RyvfxKVZ_MoKVRHOZLuwB_3LEaq8asMQ,32
@@ -9,17 +9,17 @@ bella_companion/fbd_empirical/notbooks.ipynb,sha256=O45kmz0lZENRDFbKXEWPsIKATfF5
 bella_companion/fbd_empirical/params.json,sha256=hU23LniClZL_GSBAxIEJUJgMa93AM8zdtFOq6mt3vkI,311
 bella_companion/fbd_empirical/run_beast.py,sha256=2sV2UmxOfWmbueiU6D0p3lueMYiZyIkSKYoblTMrYuA,1935
 bella_companion/fbd_empirical/summarize_logs.py,sha256=O6rhE606Wa98a8b1KKlLPjUOro1pfyqVTLdQksQMG0g,1439
-bella_companion/simulations/__init__.py,sha256=SaGCVSy39n1BuGPHwxG8KTR9mr2E5pvqWrPNBUhbgEQ,97
+bella_companion/simulations/__init__.py,sha256=i6Fe7l5sUJY9hPxdg6L_FVhwbSPhNxQNMb-m33JlfxI,258
 bella_companion/simulations/features.py,sha256=DZOBpJGlQ0UinqUZYbEtoemZ2eQGVLV_i-DfpW31qJI,104
 bella_companion/simulations/figures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-bella_companion/simulations/figures/epi_explainations.py,sha256=RL9fyjl0a_zPhrGdUXqbMMu6471su8B-O6LyuFlHknw,2816
+bella_companion/simulations/figures/epi_explainations.py,sha256=omiJgyIY-I6zcJAcyOF7GJ2pba6pMZySLkWy7OFrjFY,3093
 bella_companion/simulations/figures/epi_predictions.py,sha256=4yXwOBKxUv4kgZdI9zAMEhZ0QCNKZdkAafRQ1RTeaWg,1835
 bella_companion/simulations/figures/fbd_explainations.py,sha256=9Uj7yttpn_TH5HqycW8R-Nlky9A9aFXDXRpXQuT1L4s,3037
 bella_companion/simulations/figures/fbd_predictions.py,sha256=jdXYCLledZEWoPCIuTLhHEPMdeG6YXvf5xZnEOslv-U,2119
 bella_companion/simulations/figures/scenarios.py,sha256=vyybn3Qhfq96N8tvW0wSzpFoHHP8EIc8dkOz63o_Atw,2492
 bella_companion/simulations/figures/utils.py,sha256=sY8wFBg02fv5ugpJ80EqQishD_HEdLwhqsw2LfM7wEo,8539
 bella_companion/simulations/generate_data.py,sha256=H8OV4ZlTGZB-jXaROTPmOsK3UxRiU-GrX40l-shliw8,728
-bella_companion/simulations/run_beast.py,sha256=NBGfb5ZvtrLX5sA6Ku4SNHqmPGoEXFj5DmV54ZR4zVs,3411
+bella_companion/simulations/run_beast.py,sha256=xOuwE0w4IbOqqCSym6kHsAEhfGT2mWdA-jmUZuviMbc,3121
 bella_companion/simulations/scenarios/__init__.py,sha256=3Kl1lKcFpfb3vLX64DmSW4XCF5kXU1ZoHtstFH-ZIzU,876
 bella_companion/simulations/scenarios/common.py,sha256=_ddaSuTvEVdttGkXB4HPc2B7IB1F_GBOCW3cVOPZ-ZM,807
 bella_companion/simulations/scenarios/epi_multitype.py,sha256=GWGIiqvYwX_FrT_3RXkZKYGDht9nZ7ceHRBKUvXDPnA,2432
@@ -27,9 +27,11 @@ bella_companion/simulations/scenarios/epi_skyline.py,sha256=JqnOVATECxBUqEbkR5lB
 bella_companion/simulations/scenarios/fbd_2traits.py,sha256=sCtdWyV6GQQOIhnL9Dd8NIbAR-StTwUTD9-b_BalmFQ,3552
 bella_companion/simulations/scenarios/fbd_no_traits.py,sha256=R6CH0fVeQg-Iesl39pq2uY8ICVEO4VZbvUVUCGwauJU,2520
 bella_companion/simulations/scenarios/scenario.py,sha256=_FRWAyOFbw94lAzd3zCD-1ek4TrssoiXfXRQPShLiIA,620
-bella_companion/simulations/summarize_logs.py,sha256=TXaO9cjzl5O1u0fPZpRl-9txzoN-p-fkhoAHoRXTfm8,1433
-bella_companion/utils.py,sha256=26cF3oVBbsahYPO9rcK69l43ybg5AjS12IyfucgyVIM,5666
-bella_companion-0.0.1.dist-info/METADATA,sha256=4hbW_pstIoT1nTcZM2dgBiLc_FXdIq4T7zzpojLVNbE,446
-bella_companion-0.0.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-bella_companion-0.0.1.dist-info/entry_points.txt,sha256=rSeKoAhmjnQqAYFcXBv0gAM2ViJfJe0D8_dD-fWrXeg,50
-bella_companion-0.0.1.dist-info/RECORD,,
+bella_companion/simulations/summarize_logs.py,sha256=N4W41IbTeJDbJyYZ5HCGyMPz6hKTkWdnbMfowqlD3J0,1264
+bella_companion/utils/__init__.py,sha256=_5tLPH_3GHtimNcH0Yd9Z6yIM3WkWkNApNGLzFnF6nY,222
+bella_companion/utils/beast.py,sha256=RG-iSEFuL92K6yxUV2nxdmcVqfrEiPhaYTmReW4ZoWk,2189
+bella_companion/utils/slurm.py,sha256=v5DaG7YHVyK8KRFptgGDC6I8jxEhyJuMVK9N08pZSAI,1812
+bella_companion-0.0.2.dist-info/METADATA,sha256=3jBu7TyB8P3S1YO9CAnKHKdpD4IcFE4loKhz52xmZeQ,534
+bella_companion-0.0.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+bella_companion-0.0.2.dist-info/entry_points.txt,sha256=rSeKoAhmjnQqAYFcXBv0gAM2ViJfJe0D8_dD-fWrXeg,50
+bella_companion-0.0.2.dist-info/RECORD,,

bella_companion/utils.py DELETED Viewed

@@ -1,164 +0,0 @@
-import os
-import re
-import subprocess
-from glob import glob
-from pathlib import Path
-from typing import Any
-import arviz as az
-import matplotlib.pyplot as plt
-import numpy as np
-import polars as pl
-from joblib import Parallel, delayed
-from lumiere.backend.typings import Weights
-from tqdm import tqdm
-def run_sbatch(
-    command: str,
-    log_dir: Path,
-    time: str = "240:00:00",
-    mem_per_cpu: str = "2000",
-    overwrite: bool = False,
-) -> str | None:
-    if not overwrite and log_dir.exists():
-        print(f"Log directory {log_dir} already exists. Skipping.")
-        return
-    cmd = " ".join(
-        [
-            "sbatch",
-            f"-J {log_dir}",
-            f"-o {log_dir / 'output.out'}",
-            f"-e {log_dir / 'error.err'}",
-            f"--time {time}",
-            f"--mem-per-cpu={mem_per_cpu}",
-            f"--wrap='{command}'",
-        ]
-    )
-    output = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-    job_id = re.search(r"Submitted batch job (\d+)", output.stdout)
-    if job_id is None:
-        raise RuntimeError(
-            f"Failed to submit job.\nCommand: {cmd}\nOutput: {output.stdout}\nError: {output.stderr}"
-        )
-    return job_id.group(1)
-def get_job_metadata(job_id: str):
-    output = subprocess.run(
-        f"myjobs -j {job_id}", shell=True, capture_output=True, text=True
-    ).stdout
-    status = re.search(r"Status\s+:\s+(\w+)", output)
-    if status is None:
-        raise RuntimeError(f"Failed to get job status for job {job_id}")
-    status = status.group(1)
-    wall_clock = re.search(r"Wall-clock\s+:\s+([\d\-:]+)", output)
-    if wall_clock is None:
-        raise RuntimeError(f"Failed to get wall-clock time for job {job_id}")
-    wall_clock = wall_clock.group(1)
-    if "-" in wall_clock:
-        days, wall_clock = wall_clock.split("-")
-        days = int(days)
-    else:
-        days = 0
-    hours, minutes, seconds = map(int, wall_clock.split(":"))
-    total_hours = days * 24 + hours + minutes / 60 + seconds / 3600
-    return {"status": status, "total_hours": total_hours}
-def summarize_log(
-    log_file: str,
-    target_columns: list[str],
-    burn_in: float = 0.1,
-    hdi_prob: float = 0.95,
-    hidden_nodes: list[int] | None = None,
-    n_weights_samples: int = 100,
-    n_features: dict[str, int] | None = None,
-    job_id: str | None = None,
-) -> tuple[dict[str, Any], dict[str, list[Weights]] | None]:
-    df = pl.read_csv(log_file, separator="\t", comment_prefix="#")
-    df = df.filter(pl.col("Sample") > burn_in * len(df))
-    targets_df = df.select(target_columns)
-    summary: dict[str, Any] = {"n_samples": len(df)}
-    for column in targets_df.columns:
-        summary[f"{column}_median"] = targets_df[column].median()
-        summary[f"{column}_ess"] = az.ess(  # pyright: ignore[reportUnknownMemberType]
-            np.array(targets_df[column])
-        )
-        lower, upper = az.hdi(  # pyright: ignore[reportUnknownMemberType]
-            np.array(targets_df[column]), hdi_prob=hdi_prob
-        )
-        summary[f"{column}_lower"] = lower
-        summary[f"{column}_upper"] = upper
-    if job_id is not None:
-        summary.update(get_job_metadata(job_id))
-    if hidden_nodes is not None:
-        if n_features is None:
-            raise ValueError("`n_features` must be provided to summarize log weights.")
-        weights: dict[str, list[Weights]] = {}
-        for target, n in n_features.items():
-            nodes = [n, *hidden_nodes, 1]
-            layer_weights = [
-                np.array(
-                    df.tail(n_weights_samples).select(
-                        c for c in df.columns if c.startswith(f"{target}W.{i}")
-                    )
-                ).reshape(-1, n_inputs + 1, n_outputs)
-                for i, (n_inputs, n_outputs) in enumerate(zip(nodes[:-1], nodes[1:]))
-            ]
-            weights[target] = [
-                list(sample_weights) for sample_weights in zip(*layer_weights)
-            ]
-        return summary, weights
-    return summary, None
-def summarize_logs(
-    logs_dir: Path,
-    target_columns: list[str],
-    burn_in: float = 0.1,
-    hdi_prob: float = 0.95,
-    hidden_nodes: list[int] | None = None,
-    n_weights_samples: int = 100,
-    n_features: dict[str, int] | None = None,
-    job_ids: dict[str, str] | None = None,
-) -> tuple[pl.DataFrame, dict[str, list[list[Weights]]] | None]:
-    def _get_log_summary(
-        log_file: str,
-    ) -> tuple[dict[str, Any], dict[str, list[Weights]] | None]:
-        log_id = Path(log_file).stem
-        summary, weights = summarize_log(
-            log_file=log_file,
-            target_columns=target_columns,
-            burn_in=burn_in,
-            hdi_prob=hdi_prob,
-            hidden_nodes=hidden_nodes,
-            n_weights_samples=n_weights_samples,
-            n_features=n_features,
-            job_id=job_ids[log_id] if job_ids is not None else None,
-        )
-        return {"id": log_id, **summary}, weights
-    os.environ["POLARS_MAX_THREADS"] = "1"
-    summaries = Parallel(n_jobs=-1)(
-        delayed(_get_log_summary)(log_file)
-        for log_file in tqdm(glob(str(logs_dir / "*.log")))
-    )
-    data, weights = zip(*summaries)
-    if any(w is not None for w in weights):
-        assert n_features is not None
-        return pl.DataFrame(data), {t: [w[t] for w in weights] for t in n_features}
-    return pl.DataFrame(data), None
-def set_plt_rcparams():
-    plt.rcParams["pdf.fonttype"] = 42
-    plt.rcParams["xtick.labelsize"] = 14
-    plt.rcParams["ytick.labelsize"] = 14
-    plt.rcParams["font.size"] = 14
-    plt.rcParams["figure.constrained_layout.use"] = True
-    plt.rcParams["lines.linewidth"] = 3

{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{bella_companion-0.0.1.dist-info → bella_companion-0.0.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

bella-companion 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

Potentially problematic release.

bella-companion 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl