guts-base 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of guts-base might be problematic. Click here for more details.
- guts_base/__init__.py +14 -0
- guts_base/data/__init__.py +34 -0
- guts_base/data/expydb.py +247 -0
- guts_base/data/generator.py +96 -0
- guts_base/data/openguts.py +294 -0
- guts_base/data/preprocessing.py +55 -0
- guts_base/data/survival.py +137 -0
- guts_base/data/time_of_death.py +571 -0
- guts_base/data/utils.py +8 -0
- guts_base/mod.py +251 -0
- guts_base/plot.py +162 -0
- guts_base/prob.py +412 -0
- guts_base/sim/__init__.py +14 -0
- guts_base/sim/base.py +464 -0
- guts_base/sim/ecx.py +357 -0
- guts_base/sim/mempy.py +252 -0
- guts_base/sim/report.py +72 -0
- guts_base/sim.py +0 -0
- guts_base-0.8.2.dist-info/METADATA +836 -0
- guts_base-0.8.2.dist-info/RECORD +24 -0
- guts_base-0.8.2.dist-info/WHEEL +5 -0
- guts_base-0.8.2.dist-info/entry_points.txt +3 -0
- guts_base-0.8.2.dist-info/licenses/LICENSE +674 -0
- guts_base-0.8.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
from expyDB.intervention_model import (
|
|
6
|
+
Experiment, Treatment, Timeseries,
|
|
7
|
+
PandasConverter,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
def read_timeseries_sheet(path, sheet, sep=None):
|
|
11
|
+
ts = pd.read_excel(path, sheet_name=sheet, index_col=0) # type: ignore
|
|
12
|
+
multi_index = pd.MultiIndex.from_tuples(
|
|
13
|
+
[tuple(c.split(sep)) for c in ts.columns], names=["treatment_id", "timeseries_id"]
|
|
14
|
+
)
|
|
15
|
+
ts.columns = multi_index
|
|
16
|
+
return ts
|
|
17
|
+
|
|
18
|
+
def ringtest(path, new_path):
|
|
19
|
+
exposure = read_timeseries_sheet(path, sheet="Exposure", sep=" ")
|
|
20
|
+
exposure.index.name = "time"
|
|
21
|
+
survival = read_timeseries_sheet(path, sheet="Survival", sep=" ")
|
|
22
|
+
survival.index.name = "time"
|
|
23
|
+
|
|
24
|
+
# TODO: possibly using a normal index would also be acceptable
|
|
25
|
+
template = PandasConverter(Experiment())
|
|
26
|
+
# template.meta.index = template.meta_multiindex
|
|
27
|
+
|
|
28
|
+
# extract information from the meta that is needed elsewhere
|
|
29
|
+
data = {}
|
|
30
|
+
data.update({"exposure": exposure})
|
|
31
|
+
data.update({"survival": survival})
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
map = [
|
|
35
|
+
# new keys
|
|
36
|
+
(None, ("experiment", "name"), lambda x: "Ring test"),
|
|
37
|
+
(None, ("experiment", "interventions"), lambda x: ["exposure"]),
|
|
38
|
+
(None, ("experiment", "observations"), lambda x: ["survival"]),
|
|
39
|
+
(None, ("experiment", "public"), lambda x: True),
|
|
40
|
+
|
|
41
|
+
(None, ("treatment", "medium"), lambda x: "water"),
|
|
42
|
+
|
|
43
|
+
(None, ("observation", "unit"), lambda x: "-"),
|
|
44
|
+
(None, ("observation", "time_unit"), lambda x: "day"),
|
|
45
|
+
|
|
46
|
+
(None, ("intervention", "unit"), lambda x: "-"),
|
|
47
|
+
(None, ("intervention", "time_unit"), lambda x: "day"),
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
template.map_to_meta(map=map)
|
|
51
|
+
template.data = data
|
|
52
|
+
template.to_excel(new_path)
|
|
53
|
+
|
|
54
|
+
return new_path
|
|
55
|
+
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
import xarray as xr
|
|
3
|
+
from scipy.stats import binom
|
|
4
|
+
from matplotlib import pyplot as plt
|
|
5
|
+
from pymob.utils.testing import assert_no_nans_in_dataset
|
|
6
|
+
|
|
7
|
+
def prepare_survival_data_for_conditional_binomial(observations: xr.Dataset) -> xr.Dataset:
|
|
8
|
+
"""This is a convenience method for preparing survival data for a
|
|
9
|
+
conditional binomial model. The method simply prepares an array of the
|
|
10
|
+
same size as survival just shifted by one time step to determine the
|
|
11
|
+
number of survivers at the beginning of the next time step to consider
|
|
12
|
+
conditional surviving of repeated observations.
|
|
13
|
+
|
|
14
|
+
The additional dataset fills NaN values which may occurr in the observations
|
|
15
|
+
but not in the parameters of the distribution by forward filling and
|
|
16
|
+
then fills remaining nans (which can only ocurr in the initial times t)
|
|
17
|
+
with the nominal number of used organisms.
|
|
18
|
+
"""
|
|
19
|
+
survival = observations["survival"]
|
|
20
|
+
# fill nan values forward in time with the last observation
|
|
21
|
+
# until the next observation. Afterwards leading nans are replaced with
|
|
22
|
+
# the subject count (no lethality observed before the first observation)
|
|
23
|
+
nsurv = survival.ffill(dim="time").fillna(observations.subject_count)
|
|
24
|
+
|
|
25
|
+
# Test if the observations that were filled into the dataframe at the beginning
|
|
26
|
+
# are equal to the subject count if available.
|
|
27
|
+
np.testing.assert_array_equal(
|
|
28
|
+
nsurv.isel(time=0, id=~observations.subject_count.isnull()),
|
|
29
|
+
observations.subject_count.sel(id=~observations.subject_count.isnull())
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
assert_no_nans_in_dataset(nsurv.to_dataset())
|
|
33
|
+
|
|
34
|
+
# create a convenience observation survivors before t, which gives the
|
|
35
|
+
# number of living organisms at the end of time interval t-1
|
|
36
|
+
# this is used for calculating conditional survival
|
|
37
|
+
observations = observations.assign_coords({
|
|
38
|
+
"survivors_before_t": (("id", "time"), np.column_stack([
|
|
39
|
+
nsurv.isel(time=0).values,
|
|
40
|
+
nsurv.isel(time=list(range(0, len(nsurv.time)-1))).values
|
|
41
|
+
]).astype(int))})
|
|
42
|
+
|
|
43
|
+
return observations
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def survivors_at_start_of_interval(survival: xr.DataArray, ):
|
|
48
|
+
# create a convenience observation survivors before t, which gives the
|
|
49
|
+
# number of living organisms at the end of time interval t-1
|
|
50
|
+
# this is used for calculating conditional survival
|
|
51
|
+
return np.column_stack([
|
|
52
|
+
survival.isel(time=0).values,
|
|
53
|
+
survival.isel(time=list(range(0, len(survival.time)-1))).values
|
|
54
|
+
]).astype(int)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def generate_survival_repeated_observations(
|
|
58
|
+
S,
|
|
59
|
+
N=10,
|
|
60
|
+
time=None,
|
|
61
|
+
reps=1,
|
|
62
|
+
incidence=True,
|
|
63
|
+
seed=1,
|
|
64
|
+
ax=None,
|
|
65
|
+
tol=None
|
|
66
|
+
):
|
|
67
|
+
"""Generate observations from a survival function S, with N individuals
|
|
68
|
+
|
|
69
|
+
For this the conditional survival probability is used. This means that
|
|
70
|
+
for each time-interval the probability of dying in that interval, conditional
|
|
71
|
+
on having lived until the beginning of that interval.
|
|
72
|
+
|
|
73
|
+
S_cond[i] = (S[i-1] - S[i]) / S[i-1] where i are the intervals in T
|
|
74
|
+
|
|
75
|
+
L[i] = Binom(p=S_cond[i], N=N_alive[i-1])
|
|
76
|
+
|
|
77
|
+
L[i] is the death incidence in the interval i. So the number of deceased
|
|
78
|
+
individuals in the interval.
|
|
79
|
+
|
|
80
|
+
For the binomial trials also N changes over time, with
|
|
81
|
+
|
|
82
|
+
N_alive[i] = N - sum(L[:i])
|
|
83
|
+
|
|
84
|
+
This means the number of alive individuals gets reduced by the cumulative
|
|
85
|
+
number of deceased individuals.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
S : ArrayLike
|
|
90
|
+
values from the survival function must be monotonically decreasing
|
|
91
|
+
N : int
|
|
92
|
+
The number of individuals in one experiment that is repeatedly observed
|
|
93
|
+
reps: int
|
|
94
|
+
The number of repeats of the same experiment
|
|
95
|
+
|
|
96
|
+
incidence: bool
|
|
97
|
+
If true, returns the number of deaths in each interval. If False returns
|
|
98
|
+
the number of cumulative deaths until the interval (including the
|
|
99
|
+
interval).
|
|
100
|
+
"""
|
|
101
|
+
rng=np.random.default_rng(seed)
|
|
102
|
+
|
|
103
|
+
if time is None:
|
|
104
|
+
time = np.arange(len(S))
|
|
105
|
+
|
|
106
|
+
T = len(time)
|
|
107
|
+
|
|
108
|
+
if tol is not None:
|
|
109
|
+
S = np.clip(S, tol, 1-tol)
|
|
110
|
+
|
|
111
|
+
L = np.zeros(shape=(reps, T))
|
|
112
|
+
for i in range(T):
|
|
113
|
+
if i == 0:
|
|
114
|
+
S_0 = 1
|
|
115
|
+
else:
|
|
116
|
+
S_0 = S[i-1]
|
|
117
|
+
|
|
118
|
+
# calculate the binomial response of the conditional survival
|
|
119
|
+
# i.e. the probability to die within an interval conditional on
|
|
120
|
+
# having survived until the beginning of that interval
|
|
121
|
+
L[:, i] = binom(p=(S_0-S[i])/S_0, n=N-L.sum(axis=1).astype(int)).rvs(random_state=rng)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# observations
|
|
125
|
+
if ax is None:
|
|
126
|
+
fig, ax = plt.subplots(1,1)
|
|
127
|
+
ax.plot(time, S * N, color="black")
|
|
128
|
+
ax.plot(time, N - L.cumsum(axis=1).T,
|
|
129
|
+
marker="o", color="tab:red", ls="", alpha=.75)
|
|
130
|
+
ax.set_xlabel("Time [h]")
|
|
131
|
+
ax.set_ylabel("Survival")
|
|
132
|
+
ax.set_ylim(N-N*1.02,N*1.02)
|
|
133
|
+
|
|
134
|
+
if incidence:
|
|
135
|
+
return L
|
|
136
|
+
else:
|
|
137
|
+
return L.cumsum(axis=1)
|