guts-base 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of guts-base might be problematic. Click here for more details.

@@ -0,0 +1,55 @@
1
+ import os
2
+
3
+ import pandas as pd
4
+
5
+ from expyDB.intervention_model import (
6
+ Experiment, Treatment, Timeseries,
7
+ PandasConverter,
8
+ )
9
+
10
+ def read_timeseries_sheet(path, sheet, sep=None):
11
+ ts = pd.read_excel(path, sheet_name=sheet, index_col=0) # type: ignore
12
+ multi_index = pd.MultiIndex.from_tuples(
13
+ [tuple(c.split(sep)) for c in ts.columns], names=["treatment_id", "timeseries_id"]
14
+ )
15
+ ts.columns = multi_index
16
+ return ts
17
+
18
+ def ringtest(path, new_path):
19
+ exposure = read_timeseries_sheet(path, sheet="Exposure", sep=" ")
20
+ exposure.index.name = "time"
21
+ survival = read_timeseries_sheet(path, sheet="Survival", sep=" ")
22
+ survival.index.name = "time"
23
+
24
+ # TODO: possibly using a normal index would also be acceptable
25
+ template = PandasConverter(Experiment())
26
+ # template.meta.index = template.meta_multiindex
27
+
28
+ # extract information from the meta that is needed elsewhere
29
+ data = {}
30
+ data.update({"exposure": exposure})
31
+ data.update({"survival": survival})
32
+
33
+
34
+ map = [
35
+ # new keys
36
+ (None, ("experiment", "name"), lambda x: "Ring test"),
37
+ (None, ("experiment", "interventions"), lambda x: ["exposure"]),
38
+ (None, ("experiment", "observations"), lambda x: ["survival"]),
39
+ (None, ("experiment", "public"), lambda x: True),
40
+
41
+ (None, ("treatment", "medium"), lambda x: "water"),
42
+
43
+ (None, ("observation", "unit"), lambda x: "-"),
44
+ (None, ("observation", "time_unit"), lambda x: "day"),
45
+
46
+ (None, ("intervention", "unit"), lambda x: "-"),
47
+ (None, ("intervention", "time_unit"), lambda x: "day"),
48
+ ]
49
+
50
+ template.map_to_meta(map=map)
51
+ template.data = data
52
+ template.to_excel(new_path)
53
+
54
+ return new_path
55
+
@@ -0,0 +1,137 @@
1
+ import numpy as np
2
+ import xarray as xr
3
+ from scipy.stats import binom
4
+ from matplotlib import pyplot as plt
5
+ from pymob.utils.testing import assert_no_nans_in_dataset
6
+
7
+ def prepare_survival_data_for_conditional_binomial(observations: xr.Dataset) -> xr.Dataset:
8
+ """This is a convenience method for preparing survival data for a
9
+ conditional binomial model. The method simply prepares an array of the
10
+ same size as survival just shifted by one time step to determine the
11
+ number of survivers at the beginning of the next time step to consider
12
+ conditional surviving of repeated observations.
13
+
14
+ The additional dataset fills NaN values which may occurr in the observations
15
+ but not in the parameters of the distribution by forward filling and
16
+ then fills remaining nans (which can only ocurr in the initial times t)
17
+ with the nominal number of used organisms.
18
+ """
19
+ survival = observations["survival"]
20
+ # fill nan values forward in time with the last observation
21
+ # until the next observation. Afterwards leading nans are replaced with
22
+ # the subject count (no lethality observed before the first observation)
23
+ nsurv = survival.ffill(dim="time").fillna(observations.subject_count)
24
+
25
+ # Test if the observations that were filled into the dataframe at the beginning
26
+ # are equal to the subject count if available.
27
+ np.testing.assert_array_equal(
28
+ nsurv.isel(time=0, id=~observations.subject_count.isnull()),
29
+ observations.subject_count.sel(id=~observations.subject_count.isnull())
30
+ )
31
+
32
+ assert_no_nans_in_dataset(nsurv.to_dataset())
33
+
34
+ # create a convenience observation survivors before t, which gives the
35
+ # number of living organisms at the end of time interval t-1
36
+ # this is used for calculating conditional survival
37
+ observations = observations.assign_coords({
38
+ "survivors_before_t": (("id", "time"), np.column_stack([
39
+ nsurv.isel(time=0).values,
40
+ nsurv.isel(time=list(range(0, len(nsurv.time)-1))).values
41
+ ]).astype(int))})
42
+
43
+ return observations
44
+
45
+
46
+
47
+ def survivors_at_start_of_interval(survival: xr.DataArray, ):
48
+ # create a convenience observation survivors before t, which gives the
49
+ # number of living organisms at the end of time interval t-1
50
+ # this is used for calculating conditional survival
51
+ return np.column_stack([
52
+ survival.isel(time=0).values,
53
+ survival.isel(time=list(range(0, len(survival.time)-1))).values
54
+ ]).astype(int)
55
+
56
+
57
+ def generate_survival_repeated_observations(
58
+ S,
59
+ N=10,
60
+ time=None,
61
+ reps=1,
62
+ incidence=True,
63
+ seed=1,
64
+ ax=None,
65
+ tol=None
66
+ ):
67
+ """Generate observations from a survival function S, with N individuals
68
+
69
+ For this the conditional survival probability is used. This means that
70
+ for each time-interval the probability of dying in that interval, conditional
71
+ on having lived until the beginning of that interval.
72
+
73
+ S_cond[i] = (S[i-1] - S[i]) / S[i-1] where i are the intervals in T
74
+
75
+ L[i] = Binom(p=S_cond[i], N=N_alive[i-1])
76
+
77
+ L[i] is the death incidence in the interval i. So the number of deceased
78
+ individuals in the interval.
79
+
80
+ For the binomial trials also N changes over time, with
81
+
82
+ N_alive[i] = N - sum(L[:i])
83
+
84
+ This means the number of alive individuals gets reduced by the cumulative
85
+ number of deceased individuals.
86
+
87
+ Parameters
88
+ ----------
89
+ S : ArrayLike
90
+ values from the survival function must be monotonically decreasing
91
+ N : int
92
+ The number of individuals in one experiment that is repeatedly observed
93
+ reps: int
94
+ The number of repeats of the same experiment
95
+
96
+ incidence: bool
97
+ If true, returns the number of deaths in each interval. If False returns
98
+ the number of cumulative deaths until the interval (including the
99
+ interval).
100
+ """
101
+ rng=np.random.default_rng(seed)
102
+
103
+ if time is None:
104
+ time = np.arange(len(S))
105
+
106
+ T = len(time)
107
+
108
+ if tol is not None:
109
+ S = np.clip(S, tol, 1-tol)
110
+
111
+ L = np.zeros(shape=(reps, T))
112
+ for i in range(T):
113
+ if i == 0:
114
+ S_0 = 1
115
+ else:
116
+ S_0 = S[i-1]
117
+
118
+ # calculate the binomial response of the conditional survival
119
+ # i.e. the probability to die within an interval conditional on
120
+ # having survived until the beginning of that interval
121
+ L[:, i] = binom(p=(S_0-S[i])/S_0, n=N-L.sum(axis=1).astype(int)).rvs(random_state=rng)
122
+
123
+
124
+ # observations
125
+ if ax is None:
126
+ fig, ax = plt.subplots(1,1)
127
+ ax.plot(time, S * N, color="black")
128
+ ax.plot(time, N - L.cumsum(axis=1).T,
129
+ marker="o", color="tab:red", ls="", alpha=.75)
130
+ ax.set_xlabel("Time [h]")
131
+ ax.set_ylabel("Survival")
132
+ ax.set_ylim(N-N*1.02,N*1.02)
133
+
134
+ if incidence:
135
+ return L
136
+ else:
137
+ return L.cumsum(axis=1)