bella-companion 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of bella-companion might be problematic. Click here for more details.
- bella_companion/cli.py +6 -0
- bella_companion/fbd_empirical/__init__.py +2 -1
- bella_companion/fbd_empirical/data/change_times.csv +0 -10
- bella_companion/fbd_empirical/data/traits.csv +122 -0
- bella_companion/fbd_empirical/data/trees.nwk +100 -100
- bella_companion/fbd_empirical/results.py +88 -0
- bella_companion/fbd_empirical/run_beast.py +37 -12
- bella_companion/fbd_empirical/summarize_logs.py +2 -5
- bella_companion/simulations/figures/utils.py +2 -2
- {bella_companion-0.0.9.dist-info → bella_companion-0.0.11.dist-info}/METADATA +2 -2
- {bella_companion-0.0.9.dist-info → bella_companion-0.0.11.dist-info}/RECORD +13 -14
- bella_companion/fbd_empirical/data/body_mass.csv +0 -1378
- bella_companion/fbd_empirical/data/sampling_change_times.csv +0 -6
- bella_companion/fbd_empirical/notbooks.ipynb +0 -170
- {bella_companion-0.0.9.dist-info → bella_companion-0.0.11.dist-info}/WHEEL +0 -0
- {bella_companion-0.0.9.dist-info → bella_companion-0.0.11.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import matplotlib.pyplot as plt
|
|
5
|
+
import numpy as np
|
|
6
|
+
import polars as pl
|
|
7
|
+
from numpy.typing import NDArray
|
|
8
|
+
from phylogenie import get_node_depths, load_newick
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _plot_predictions(output_dir: Path):
|
|
12
|
+
log_summaries_dir = Path(os.environ["BELLA_LOG_SUMMARIES_DIR"])
|
|
13
|
+
log_summary = pl.read_csv(log_summaries_dir / "fbd-empirical" / "MLP.csv")
|
|
14
|
+
|
|
15
|
+
data_dir = Path(__file__).parent / "data"
|
|
16
|
+
change_times = (
|
|
17
|
+
pl.read_csv(data_dir / "change_times.csv", has_header=False)
|
|
18
|
+
.to_series()
|
|
19
|
+
.to_list()
|
|
20
|
+
)
|
|
21
|
+
max_time = max(
|
|
22
|
+
max(get_node_depths(tree).values())
|
|
23
|
+
for tree in load_newick(data_dir / "trees.nwk")
|
|
24
|
+
)
|
|
25
|
+
time_bins = list(reversed([0.0, *change_times, max_time]))
|
|
26
|
+
|
|
27
|
+
colors: dict[str, NDArray[np.floating]] = {
|
|
28
|
+
"birth": plt.cm.Blues(np.linspace(0.4, 0.9, 4)), # pyright: ignore
|
|
29
|
+
"death": plt.cm.Oranges(np.linspace(0.4, 0.9, 4)), # pyright: ignore
|
|
30
|
+
"diversification": plt.cm.Greens(np.linspace(0.4, 0.9, 4)), # pyright: ignore
|
|
31
|
+
}
|
|
32
|
+
for rate in ["birth", "death", "diversification"]:
|
|
33
|
+
for state in range(4):
|
|
34
|
+
if rate == "diversification":
|
|
35
|
+
estimates = log_summary.select(
|
|
36
|
+
[
|
|
37
|
+
pl.col(f"birthRateSPi{i}_{state}_median")
|
|
38
|
+
- pl.col(f"deathRateSPi{i}_{state}_median")
|
|
39
|
+
for i in range(len(change_times) + 1)
|
|
40
|
+
]
|
|
41
|
+
).to_numpy()
|
|
42
|
+
else:
|
|
43
|
+
estimates = log_summary.select(
|
|
44
|
+
[
|
|
45
|
+
pl.col(f"{rate}RateSPi{i}_{state}_median")
|
|
46
|
+
for i in range(len(change_times) + 1)
|
|
47
|
+
]
|
|
48
|
+
).to_numpy()
|
|
49
|
+
|
|
50
|
+
median = np.median(estimates, axis=0)
|
|
51
|
+
lower = np.percentile(estimates, 2.5, axis=0)
|
|
52
|
+
upper = np.percentile(estimates, 97.5, axis=0)
|
|
53
|
+
|
|
54
|
+
color = colors[rate][state]
|
|
55
|
+
|
|
56
|
+
plt.fill_between( # pyright: ignore
|
|
57
|
+
time_bins,
|
|
58
|
+
[lower[0], *lower],
|
|
59
|
+
[upper[0], *upper],
|
|
60
|
+
step="pre",
|
|
61
|
+
alpha=0.25,
|
|
62
|
+
color=color,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for estimate in estimates:
|
|
66
|
+
plt.step( # pyright: ignore
|
|
67
|
+
time_bins, [estimate[0], *estimate], color=color, alpha=0.15
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
plt.step( # pyright: ignore
|
|
71
|
+
time_bins, [median[0], *median], color=color, label=state
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
ax = plt.gca()
|
|
75
|
+
ax.invert_xaxis()
|
|
76
|
+
plt.legend(title="Body mass") # pyright: ignore
|
|
77
|
+
plt.xlabel("Time (mya)") # pyright: ignore
|
|
78
|
+
plt.ylabel( # pyright: ignore
|
|
79
|
+
r"$\lambda$" if rate == "birth" else r"$\mu$" if rate == "death" else r"$d$"
|
|
80
|
+
)
|
|
81
|
+
plt.savefig(output_dir / f"{rate}-predictions.svg") # pyright: ignore
|
|
82
|
+
plt.close()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def plot_fbd_empirical_results():
|
|
86
|
+
output_dir = Path(os.environ["BELLA_FIGURES_DIR"]) / "fbd-empirical"
|
|
87
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
88
|
+
_plot_predictions(output_dir)
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import os
|
|
3
|
+
from itertools import combinations
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
@@ -10,25 +11,49 @@ from tqdm import tqdm
|
|
|
10
11
|
|
|
11
12
|
from bella_companion.utils import submit_job
|
|
12
13
|
|
|
13
|
-
|
|
14
|
+
|
|
15
|
+
def _get_migration_rates_init(types: list[str], init_rate: float = 1) -> str:
|
|
16
|
+
mus: list[float] = []
|
|
17
|
+
for t1, t2 in combinations(types, 2):
|
|
18
|
+
traits1 = np.array(list(map(int, t1.split("_"))))
|
|
19
|
+
traits2 = np.array(list(map(int, t2.split("_"))))
|
|
20
|
+
mus.append(init_rate if np.sum(np.abs(traits1 - traits2)) == 1 else 0)
|
|
21
|
+
return " ".join(map(str, mus))
|
|
14
22
|
|
|
15
23
|
|
|
16
24
|
def run_beast():
|
|
25
|
+
from dotenv import load_dotenv
|
|
26
|
+
|
|
27
|
+
load_dotenv()
|
|
17
28
|
base_output_dir = Path(os.environ["BELLA_BEAST_OUTPUT_DIR"])
|
|
18
29
|
output_dir = base_output_dir / "fbd-empirical"
|
|
19
30
|
os.makedirs(output_dir, exist_ok=True)
|
|
20
31
|
|
|
21
|
-
data_dir =
|
|
32
|
+
data_dir = Path(__file__).parent / "data"
|
|
22
33
|
tree_file = data_dir / "trees.nwk"
|
|
23
34
|
change_times_file = data_dir / "change_times.csv"
|
|
35
|
+
traits_file = data_dir / "traits.csv"
|
|
24
36
|
|
|
25
37
|
trees = load_newick(str(tree_file))
|
|
26
38
|
assert isinstance(trees, list)
|
|
39
|
+
|
|
40
|
+
traits = pl.read_csv(traits_file, separator="\t", null_values=["NA"])
|
|
41
|
+
|
|
27
42
|
change_times = (
|
|
28
43
|
pl.read_csv(change_times_file, has_header=False).to_series().to_numpy()
|
|
29
44
|
)
|
|
30
|
-
|
|
31
|
-
|
|
45
|
+
|
|
46
|
+
types: list[str] = sorted(traits["type"].unique())
|
|
47
|
+
types.remove("?")
|
|
48
|
+
N = len(types)
|
|
49
|
+
|
|
50
|
+
time_predictor = " ".join(list(map(str, np.repeat([0, *change_times], N))))
|
|
51
|
+
log10BM_predictor = " ".join(
|
|
52
|
+
[t.split("_")[0] for t in types] * (len(change_times) + 1)
|
|
53
|
+
)
|
|
54
|
+
midlat_predictor = " ".join(
|
|
55
|
+
[t.split("_")[1] for t in types] * (len(change_times) + 1)
|
|
56
|
+
)
|
|
32
57
|
|
|
33
58
|
job_ids = {}
|
|
34
59
|
for i, tree in enumerate(tqdm(trees)):
|
|
@@ -36,24 +61,24 @@ def run_beast():
|
|
|
36
61
|
command = " ".join(
|
|
37
62
|
[
|
|
38
63
|
os.environ["BELLA_RUN_BEAST_CMD"],
|
|
39
|
-
f
|
|
40
|
-
f'-D startTypePriorProbs="
|
|
64
|
+
f'-D types="{",".join(types)}"',
|
|
65
|
+
f'-D startTypePriorProbs="{" ".join([str(1/N)] * N)}"',
|
|
41
66
|
f"-D birthRateUpper=5",
|
|
42
67
|
f"-D deathRateUpper=5",
|
|
43
68
|
f"-D samplingRateUpper=5",
|
|
44
|
-
f'-D samplingRateInit="
|
|
69
|
+
f'-D samplingRateInit="{" ".join(["2.5"] * N)}"',
|
|
45
70
|
f"-D migrationRateUpper=5",
|
|
46
|
-
f'-D migrationRateInit="
|
|
71
|
+
f'-D migrationRateInit="{_get_migration_rates_init(types, 2.5)}"',
|
|
47
72
|
f'-D nodes="16 8"',
|
|
48
73
|
f'-D layersRange="0,1,2"',
|
|
49
|
-
f"-D
|
|
74
|
+
f"-D tree_file={tree_file}",
|
|
50
75
|
f"-D treeIndex={i}",
|
|
51
76
|
f"-D changeTimesFile={change_times_file}",
|
|
52
|
-
f"-D
|
|
53
|
-
f"-D typeTraitFile={data_dir / 'body_mass.csv'}",
|
|
77
|
+
f"-D traitsFile={traits_file}",
|
|
54
78
|
f"-D processLength={process_length}",
|
|
55
79
|
f'-D timePredictor="{time_predictor}"',
|
|
56
|
-
f'-D
|
|
80
|
+
f'-D log10BM_predictor="{log10BM_predictor}"',
|
|
81
|
+
f'-D midlat_predictor="{midlat_predictor}"',
|
|
57
82
|
f"-prefix {output_dir}{os.sep}",
|
|
58
83
|
str(Path(os.environ["BELLA_BEAST_CONFIGS_DIR"]) / "fbd-empirical.xml"),
|
|
59
84
|
]
|
|
@@ -6,13 +6,10 @@ import polars as pl
|
|
|
6
6
|
|
|
7
7
|
from bella_companion.utils import read_weights_dir, summarize_logs_dir
|
|
8
8
|
|
|
9
|
-
THIS_DIR = Path(__file__).parent
|
|
10
|
-
|
|
11
9
|
|
|
12
10
|
def summarize_logs():
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
)
|
|
11
|
+
data_dir = Path(__file__).parent / "data"
|
|
12
|
+
change_times = pl.read_csv(data_dir / "change_times.csv", has_header=False)
|
|
16
13
|
n_time_bins = len(change_times) + 1
|
|
17
14
|
|
|
18
15
|
logs_dir = Path(os.environ["BELLA_BEAST_OUTPUT_DIR"]) / "fbd-empirical"
|
|
@@ -39,7 +39,7 @@ def _count_time_bins(true_values: dict[str, list[float]]) -> int:
|
|
|
39
39
|
def plot_maes_per_time_bin(
|
|
40
40
|
logs_summaries: dict[str, pl.DataFrame],
|
|
41
41
|
true_values: dict[str, list[float]],
|
|
42
|
-
output_filepath: Path,
|
|
42
|
+
output_filepath: str | Path,
|
|
43
43
|
reverse_xticks: bool = False,
|
|
44
44
|
):
|
|
45
45
|
def _mae(target: str, i: int) -> pl.Expr:
|
|
@@ -73,7 +73,7 @@ def plot_maes_per_time_bin(
|
|
|
73
73
|
def plot_coverage_per_time_bin(
|
|
74
74
|
logs_summaries: dict[str, pl.DataFrame],
|
|
75
75
|
true_values: dict[str, list[float]],
|
|
76
|
-
output_filepath: Path,
|
|
76
|
+
output_filepath: str | Path,
|
|
77
77
|
reverse_xticks: bool = False,
|
|
78
78
|
):
|
|
79
79
|
def _coverage(model: str, target: str, i: int) -> float:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bella-companion
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary:
|
|
5
5
|
Author: gabriele-marino
|
|
6
6
|
Author-email: gabmarino.8601@gmail.com
|
|
@@ -12,5 +12,5 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
12
12
|
Requires-Dist: arviz (>=0.22.0,<0.23.0)
|
|
13
13
|
Requires-Dist: bella-lumiere (>=0.0.13,<0.0.14)
|
|
14
14
|
Requires-Dist: dotenv (>=0.9.9,<0.10.0)
|
|
15
|
-
Requires-Dist: phylogenie (>=2.1.
|
|
15
|
+
Requires-Dist: phylogenie (>=2.1.30,<3.0.0)
|
|
16
16
|
Requires-Dist: seaborn (>=0.13.2,<0.14.0)
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
bella_companion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
bella_companion/cli.py,sha256=
|
|
3
|
-
bella_companion/fbd_empirical/__init__.py,sha256=
|
|
4
|
-
bella_companion/fbd_empirical/data/
|
|
5
|
-
bella_companion/fbd_empirical/data/
|
|
6
|
-
bella_companion/fbd_empirical/data/
|
|
7
|
-
bella_companion/fbd_empirical/
|
|
8
|
-
bella_companion/fbd_empirical/
|
|
9
|
-
bella_companion/fbd_empirical/
|
|
10
|
-
bella_companion/fbd_empirical/summarize_logs.py,sha256=TBLdW5LWS5ensEQFFQ8hrzJFJ3t0l-Mzu_SNa-Q8D_w,972
|
|
2
|
+
bella_companion/cli.py,sha256=D-kQuj0I-g5Vm5-mXdOZoio68mhOBpJ3jVG8B2Kwvn4,2190
|
|
3
|
+
bella_companion/fbd_empirical/__init__.py,sha256=FgzjaEpwZImvBsJs17ghF0ltZvhFmgmkTGGF3HgdsGo,284
|
|
4
|
+
bella_companion/fbd_empirical/data/change_times.csv,sha256=tedobHbaY7-6M3V6F6HSqGAU9dpeeajR5mu1XGJfp6w,68
|
|
5
|
+
bella_companion/fbd_empirical/data/traits.csv,sha256=cxNKYmgtBCKHsm9qnOpS29Rkx9cQKe0oxpgaXSIukJg,5422
|
|
6
|
+
bella_companion/fbd_empirical/data/trees.nwk,sha256=JAsdfGhTIzGmJz6g6-0s5SVfqNuxawOMc9gbt0srue4,539385
|
|
7
|
+
bella_companion/fbd_empirical/results.py,sha256=0FENY5EpgNQ6rwPsEiOT2ZEO5oBbIAK6TzPvk11SDnk,3116
|
|
8
|
+
bella_companion/fbd_empirical/run_beast.py,sha256=Vvh0xognC92tAI7GhUyzU901N1x6t8UjUDccvB2JcVA,3309
|
|
9
|
+
bella_companion/fbd_empirical/summarize_logs.py,sha256=OoT43DMjim9o4qsjYCgGKnBKp3dHXzVP6sVtzqR8lMQ,955
|
|
11
10
|
bella_companion/simulations/__init__.py,sha256=ShYRdp1iSjnS_SzcsH-8jbqXz6P1nyRQZqAtPZJCMVE,454
|
|
12
11
|
bella_companion/simulations/features.py,sha256=DZOBpJGlQ0UinqUZYbEtoemZ2eQGVLV_i-DfpW31qJI,104
|
|
13
12
|
bella_companion/simulations/figures/__init__.py,sha256=aBYbJntH4egFmkSSWiVMYDEApXPYxJD7eA3TCPNNegM,658
|
|
@@ -19,7 +18,7 @@ bella_companion/simulations/figures/explain/shap.py,sha256=qRfOMNwkU-hsBy5MHMTfV
|
|
|
19
18
|
bella_companion/simulations/figures/fbd_2traits_results.py,sha256=JiXrbYkH1HwdJQhTHXj6KhMEXYgQmQ6LmDboAIO1CPA,2728
|
|
20
19
|
bella_companion/simulations/figures/fbd_no_traits_results.py,sha256=fLsgpV3IbLLtxQEFNOL9K4jEEJrG324ziUM0rxIv7_k,1962
|
|
21
20
|
bella_companion/simulations/figures/scenarios.py,sha256=gbMz1TUxxT2RSIq2kQlFioNdgSHk-gQY2OQuf6-7Fww,2817
|
|
22
|
-
bella_companion/simulations/figures/utils.py,sha256=
|
|
21
|
+
bella_companion/simulations/figures/utils.py,sha256=0M5OrxaEuqcj9rR2uAc_O7utQvhEceZGH0sKrGRWaWs,3129
|
|
23
22
|
bella_companion/simulations/generate_data.py,sha256=ZFQnusmGWDjRixKOid0l49Aj4qfrI5zxyf1n21wf0p0,768
|
|
24
23
|
bella_companion/simulations/generate_figures.py,sha256=layMgoj3Bfl78Ceb1oE7YirAQ8zhjDyD9IrxDRXf6go,657
|
|
25
24
|
bella_companion/simulations/metrics.py,sha256=TwLgK4Eui4DlG10V9m6zEbmuGfUIli2dE0Ph-gX_6bU,1925
|
|
@@ -36,7 +35,7 @@ bella_companion/utils/__init__.py,sha256=UtMwPK9dWf9NAl0ic8CSsgdW7aSm-5J49OqgvD7
|
|
|
36
35
|
bella_companion/utils/beast.py,sha256=TBa0cLklX1_tXqoQE4LRYvds7mLg_9fS2-6U6OHENHo,2184
|
|
37
36
|
bella_companion/utils/explain.py,sha256=uP7HPyn2YiykAI69BQV3RooDpC6qKoCLXfp3Uibp4zk,1475
|
|
38
37
|
bella_companion/utils/slurm.py,sha256=v5DaG7YHVyK8KRFptgGDC6I8jxEhyJuMVK9N08pZSAI,1812
|
|
39
|
-
bella_companion-0.0.
|
|
40
|
-
bella_companion-0.0.
|
|
41
|
-
bella_companion-0.0.
|
|
42
|
-
bella_companion-0.0.
|
|
38
|
+
bella_companion-0.0.11.dist-info/METADATA,sha256=NQPoh1JxQYhkfvNRr_CrdJx1_t46j5o8P6lHlnEy83k,577
|
|
39
|
+
bella_companion-0.0.11.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
40
|
+
bella_companion-0.0.11.dist-info/entry_points.txt,sha256=rSeKoAhmjnQqAYFcXBv0gAM2ViJfJe0D8_dD-fWrXeg,50
|
|
41
|
+
bella_companion-0.0.11.dist-info/RECORD,,
|