PyPI - guts-base - Versions diffs - 0.8.5__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

guts-base 0.8.5py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of guts-base might be problematic. Click here for more details.

Files changed (21) hide show

guts_base/__init__.py +2 -1
guts_base/data/__init__.py +1 -1
guts_base/data/generator.py +6 -5
guts_base/data/survival.py +6 -0
guts_base/mod.py +27 -80
guts_base/prob.py +23 -275
guts_base/sim/__init__.py +10 -1
guts_base/sim/base.py +350 -78
guts_base/sim/constructors.py +31 -0
guts_base/sim/ecx.py +221 -63
guts_base/sim/mempy.py +85 -70
guts_base/sim/report.py +9 -9
guts_base/sim/utils.py +10 -0
{guts_base-0.8.5.dist-info → guts_base-1.0.0.dist-info}/METADATA +3 -4
guts_base-1.0.0.dist-info/RECORD +25 -0
guts_base/sim.py +0 -0
guts_base-0.8.5.dist-info/RECORD +0 -24
{guts_base-0.8.5.dist-info → guts_base-1.0.0.dist-info}/WHEEL +0 -0
{guts_base-0.8.5.dist-info → guts_base-1.0.0.dist-info}/entry_points.txt +0 -0
{guts_base-0.8.5.dist-info → guts_base-1.0.0.dist-info}/licenses/LICENSE +0 -0
{guts_base-0.8.5.dist-info → guts_base-1.0.0.dist-info}/top_level.txt +0 -0

guts_base/sim/base.py CHANGED Viewed

@@ -1,15 +1,18 @@
 import os
 import glob
+from functools import partial
+from copy import deepcopy
+import importlib
 import warnings
 import numpy as np
 import xarray as xr
 from diffrax import Dopri5
-from typing import Literal, Optional, List, Dict
+from typing import Literal, Optional, List, Dict, Mapping, Sequence, Tuple
 import tempfile
 import pandas as pd
 from pymob import SimulationBase
-from pymob.sim.config import DataVariable, Param, string_to_list
+from pymob.sim.config import DataVariable, Param, string_to_list, NumericArray
 from pymob.solvers import JaxSolver
 from pymob.solvers.base import rect_interpolation
@@ -17,14 +20,15 @@ from expyDB.intervention_model import (
     Treatment, Timeseries, select, from_expydb
 )
+from guts_base.sim.utils import GutsBaseError
 from guts_base import mod
 from guts_base.data import (
     to_dataset, reduce_multiindex_to_flat_index, create_artificial_data,
-    create_database_and_import_data_main, design_exposure_scenario
+    create_database_and_import_data_main, design_exposure_scenario, ExposureDataDict
 )
 from guts_base.sim.report import GutsReport
 class GutsBase(SimulationBase):
     """
     Initializes GUTS models from a variety of data sources
@@ -33,48 +37,77 @@ class GutsBase(SimulationBase):
     1. check if necessary entries are made in the configuration, otherwise add defaults
     2. read data or take from input
     3. process data (add dimensions, or add indices)
+    4. Prepare model input
     """
     solver = JaxSolver
     Report = GutsReport
-    unit_time: Literal["day", "hour", "minute", "second"] = "day"
-    results_interpolation: Optional[List[float|int]] = [np.nan, np.nan, 100]
-    ecx_mode: Literal["mean", "draws"] = "mean"
+    results_interpolation: Tuple[float,float,int] = (np.nan, np.nan, 100)
+    _skip_data_processing: bool = False
+    def initialize(self, input: Optional[Dict] = None):
+        """Initiaization goes through a couple of steps:
+        1. Configuration: This makes case-study specific changes to the configuration
+            file or sets state variables that are relevant for the simulation
+            TODO: Ideally everything that is configurable ends up in the config so it
+            can be serialized
+        2. Import data: This method consists of submethods that can be adapted or
+            overwritten in subclass methods.
+            - .read_data
+            - .save_observations
+            - .process_data
+            process_data itself utilizes the submethods _create_indices and
+            _indices_to_dimensions which are empty methods by default, but can be used
+            in subclasses if needed
+        3. Initialize the simulation input (parameters, y0, x_in). This can
+        By splitting up the simulation init method, into these three steps, modifcations
+        of the initialize method allows for higher granularity in subclasses.
+        """
-    def initialize(self, input: Dict = None):
+        # 1. Configuration
+        self.configure_case_study()
-        if hasattr(self.config.simulation, "unit_time"):
-            self.unit_time = self.config.simulation.unit_time  # type: ignore
+        # 2. Import data
+        self.observations = self.read_data()
+        # FIXME: Saving observations here is not intuituve. If i export a simulation,
+        # I want to use the last used state, not some obscure intermediate state
+        # self.save_observations(filename="observations.nc", directory=self.output_path, force=True)
+        if not self._skip_data_processing:
+            self.process_data()
-        if hasattr(self.config.simulation, "results_interpolation"):
-            self.results_interpolation = string_to_list(self.config.simulation.results_interpolation)
-            self.results_interpolation[0] = float(self.results_interpolation[0])
-            self.results_interpolation[1] = float(self.results_interpolation[1])
-            self.results_interpolation[2] = int(self.results_interpolation[2])
+        # 3. prepare y0 and x_in
+        self.prepare_simulation_input()
-        if "observations" in input:
-            self.observations = input["observations"]
-        else:
-            self.observations = self.read_data()
-            self.process_data()
+    def configure_case_study(self):
+        """Modify configuration file or set state variables
+        TODO: This should only modify the configuration file, so that changes
+        are transparent.
+        """
+        if self._model_class is not None:
+            self.model = self._model_class._rhs_jax
+            self.solver_post_processing = self._model_class._solver_post_processing
-        # define tolerance based on the sovler tolerance
-        self.observations = self.observations.assign_coords(eps=self.config.jaxsolver.atol * 10)
+        self.ecx_mode: Literal["mean", "draws"] = "mean"
-        self._reindex_time_dim()
+        self.unit_time: Literal["day", "hour", "minute", "second"] = "day"
+        if hasattr(self.config.simulation, "unit_time"):
+            self.unit_time = self.config.simulation.unit_time  # type: ignore
-        if "survival" in self.observations:
-            if "subject_count" not in self.observations.coords:
-                self.observations = self.observations.assign_coords(
-                    subject_count=("id", self.observations["survival"].isel(time=0).values, )
-                )
-            self.observations = self._data.prepare_survival_data_for_conditional_binomial(
-                observations=self.observations
-            )
+        if hasattr(self.config.simulation, "skip_data_processing"):
+            self._skip_data_processing = bool(self.config.simulation.skip_data_processing)  # type: ignore
-        if "exposure" in self.observations:
-            self.config.data_structure.exposure.observed=False
+        if hasattr(self.config.simulation, "results_interpolation"):
+            results_interpolation_string = string_to_list(self.config.simulation.results_interpolation)
+            self.results_interpolation = (
+                float(results_interpolation_string[0]),
+                float(results_interpolation_string[1]),
+                int(results_interpolation_string[2])
+            )
-        # prepare y0 and x_in
+    def prepare_simulation_input(self):
         x_in = self.parse_input(input="x_in", reference_data=self.observations, drop_dims=[])
         y0 = self.parse_input(input="y0", reference_data=self.observations, drop_dims=["time"])
@@ -187,6 +220,23 @@ class GutsBase(SimulationBase):
         self._create_indices()
         self._indices_to_dimensions()
+        # define tolerance based on the sovler tolerance
+        self.observations = self.observations.assign_coords(eps=self.config.jaxsolver.atol * 10)
+        self._reindex_time_dim()
+        if "survival" in self.observations:
+            if "subject_count" not in self.observations.coords:
+                self.observations = self.observations.assign_coords(
+                    subject_count=("id", self.observations["survival"].isel(time=0).values, )
+                )
+            self.observations = self._data.prepare_survival_data_for_conditional_binomial(
+                observations=self.observations
+            )
+        if "exposure" in self.observations:
+            self.config.data_structure.exposure.observed=False
     def _create_indices(self):
         """Use if indices should be added to sim.indices and sim.observations"""
         pass
@@ -223,30 +273,38 @@ class GutsBase(SimulationBase):
             "is calculated without a dense time resolution, the estimates can be biased!"
         ))
     def recompute_posterior(self):
         """This function interpolates the posterior with a given resolution
         posterior_predictions calculate proper survival predictions for the
         posterior.
+        It also makes sure that the new interpolation does not include fewer values
+        than the original dataset
         """
-        if np.isnan(self.results_interpolation[0]):
-            self.results_interpolation[0] = float(self.observations["time"].min())
-        if np.isnan(self.results_interpolation[1]):
-            self.results_interpolation[1] = float(self.observations["time"].max())
+        ri = self.results_interpolation
         # generate high resolution posterior predictions
         if self.results_interpolation is not None:
             time_interpolate = np.linspace(
-                start=self.results_interpolation[0],
-                stop=self.results_interpolation[1],
+                start=float(self.observations["time"].min()) if np.isnan(ri[0]) else ri[0],
+                stop=float(self.observations["time"].max()) if np.isnan(ri[0]) else ri[1],
                 num=self.results_interpolation[2]
             )
-            self.observations = self.observations.reindex(
-                time=time_interpolate
+            # combine original coordinates and interpolation. This
+            # a) helps error checking during posterior predictions.
+            # b) makes sure that the original time vector is retained, which may be
+            #    relevant for the simulation success (e.g. IT model)
+            obs = self.observations.reindex(
+                time=np.unique(np.concatenate(
+                    [time_interpolate, self.observations["time"]]
+                )),
             )
+            obs["survivors_before_t"] = obs.survivors_before_t.ffill(dim="time").astype(int)
+            obs["survivors_at_start"] = obs.survivors_at_start.ffill(dim="time").astype(int)
+            self.observations = obs
         self.dispatch_constructor()
         _ = self._prob.posterior_predictions(self, self.inferer.idata) # type: ignore
@@ -254,13 +312,13 @@ class GutsBase(SimulationBase):
         self.logger.info("Recomputed posterior and storing in `numpyro_posterior_interp.nc`")
-    def prior_predictive_checks(self):
-        super().prior_predictive_checks()
+    def prior_predictive_checks(self, **plot_kwargs):
+        super().prior_predictive_checks(**plot_kwargs)
         self._plot.plot_prior_predictions(self, data_vars=["survival"])
-    def posterior_predictive_checks(self):
-        super().posterior_predictive_checks()
+    def posterior_predictive_checks(self, **plot_kwargs):
+        super().posterior_predictive_checks(**plot_kwargs)
         self.recompute_posterior()
         # TODO: Include posterior_predictive group once the survival predictions are correctly working
@@ -270,37 +328,93 @@ class GutsBase(SimulationBase):
     def plot(self, results):
         self._plot.plot_survival(self, results)
-    def copy(self):
-        with warnings.catch_warnings(action="ignore"):
-            sim_copy = type(self)(self.config)
-            sim_copy.observations = self.observations
-            sim_copy.model_parameters = self.model_parameters
-            if self.inferer is not None:
-                sim_copy.inferer = type(self.inferer)(self)
-                sim_copy.inferer.idata = self.inferer.idata
-            sim_copy.model = self.model
-            sim_copy.solver_post_processing = self.solver_post_processing
-            sim_copy.load_modules()
-        return sim_copy
-    @property
     def predefined_scenarios(self):
-        # this produces a exposure x_in dataset with only the dimensions ID and TIME
-        oral_acute_1d = design_exposure_scenario(
-            exposures={
-                "oral":dict(start=0, end=1.0, concentration=1.0),
-            },
-            t_max=10.01,
-            dt=1/24,
-            exposure_dimension="exposure_path"
+        """
+        TODO: Fix timescale to observations
+        TODO: Incorporate extra exposure patterns (constant, pulse_1day, pulse_2day)
+        """
+        # get the maximum possible time to provide exposure scenarios that are definitely
+        # long enough
+        time_max = max(
+            self.observations[self.config.simulation.x_dimension].max(),
+            *self.Report.ecx_estimates_times
         )
-        return dict(
-            oral_acute_1d=oral_acute_1d
+        # this produces a exposure x_in dataset with only the dimensions ID and TIME
+        standard_dimensions = (
+            self.config.simulation.batch_dimension,
+            self.config.simulation.x_dimension,
         )
+        # get dimensions different from standard dimensions
+        exposure_dimension = [
+            d for d in self.observations.exposure.dims if d not in
+            standard_dimensions
+        ]
+        # raise an error if the number of extra dimensions is larger than 1
+        if len(exposure_dimension) > 1:
+            raise ValueError(
+                f"{type(self).__name__} can currently handle one additional dimension for "+
+                f"the exposure beside {standard_dimensions}. You provided an exposure "+
+                f"array with the dimensions: {self.observations.exposure.dims}"
+            )
+        else:
+            exposure_dimension = exposure_dimension[0]
+        # iterate over the coordinates of the exposure dimensions to
+        exposure_coordinates = self.observations.exposure[exposure_dimension].values
+        scenarios = {}
+        for coord in exposure_coordinates:
+            concentrations = np.where(coord == exposure_coordinates, 1.0, 0.0)
+            exposure_dict = {
+                coord: ExposureDataDict(start=0, end=1, concentration=conc)
+                for coord, conc in zip(exposure_coordinates, concentrations)
+            }
+            scenario = design_exposure_scenario(
+                exposures=exposure_dict,
+                t_max=time_max,
+                dt=1/24,
+                exposure_dimension=exposure_dimension
+            )
+            scenarios.update({
+                f"1day_exposure_{coord}": scenario
+            })
+        return scenarios
+    @staticmethod
+    def _exposure_data_to_xarray(exposure_data: Dict[str, pd.DataFrame], dim: str):
+        """
+        TODO: Currently no rect interpolation
+        """
+        arrays = {}
+        for key, df in exposure_data.items():
+            # this override is necessary to make all dimensions work out
+            df.index.name = "time"
+            arrays.update({
+                key: df.to_xarray().to_dataarray(dim="id", name=key)
+            })
+        exposure_array = xr.Dataset(arrays).to_array(dim=dim, name="exposure")
+        exposure_array = exposure_array.transpose("id", "time", ...)
+        return xr.Dataset({"exposure": exposure_array})
+    @staticmethod
+    def _survival_data_to_xarray(survival_data: pd.DataFrame):
+        # TODO: survival name is currently not kept because the raw data is not transferred from the survival
+        survival_data.index.name = "time"
+        survival_array = survival_data.to_xarray().to_dataarray(dim="id", name="survival")
+        survival_array = survival_array.transpose("id", "time", ...)
+        arrays = {"survival": survival_array}
+        return xr.Dataset(arrays)
     def expand_batch_like_coordinate_to_new_dimension(self, coordinate, variables):
         """This method will take an existing coordinate of a dataset that has the same
         coordinate has the batch dimension. It will then re-express the coordinate as a
@@ -367,6 +481,164 @@ class GutsBase(SimulationBase):
     def initialize_from_script(self):
         pass
+    @property
+    def _model_class(self):
+        if hasattr(self.config.simulation, "model_class"):
+            module, attr = self.config.simulation.model_class.rsplit(".", 1)
+            _module = importlib.import_module(module)
+            return getattr(_module, attr)
+        else:
+            return None
+    ### API methods ###
+    def point_estimate(
+        self,
+        estimate: Literal["mean", "map"] = "map",
+        to: Literal["xarray", "dict"] = "xarray"
+    ):
+        """Returns a point estimate of the posterior. If you want more control over the posterior
+        use the attribute: sim.inferer.idata.posterior and summarize it or select from it
+        using the arviz (https://python.arviz.org/en/stable/index.html) and the
+        xarray (https://docs.xarray.dev/en/stable/index.html) packages
+        Parameters
+        ----------
+        estimate : Literal["map", "mean"]
+            Point estimate to return.
+            - map: Maximum a Posteriori. The sample that has the highest posterior probability.
+              This sample considers the correlation structure of the posterior
+            - mean: The average of all marginal parameter distributions.
+        to : Literal["xarray", "dict"]
+            Specifies the representation to transform the summarized data to. dict can
+            be used to insert parameters in the .evaluate() method. While xarray is the
+            standard view. Defaults to xarray
+        Example
+        -------
+        >>> sim.best_estimate(to='dict')
+        """
+        if estimate == "mean":
+            best_estimate = self.inferer.idata.posterior.mean(("chain", "draw"))
+        elif estimate == "map":
+            loglik = self.inferer.idata.log_likelihood\
+                .sum(["id", "time"])\
+                .to_array().sum("variable")
+            sample_max_loglik = loglik.argmax(dim=("chain", "draw"))
+            best_estimate = self.inferer.idata.posterior.sel(sample_max_loglik)
+        else:
+            raise GutsBaseError(
+                f"Estimate '{estimate}' not implemented. Choose one of ['mean', 'map']"
+            )
+        if to == "xarray":
+            return best_estimate
+        elif to == "dict":
+            return {k: v.values for k, v in best_estimate.items()}
+        else:
+            raise GutsBaseError(
+                "PymobConverter.best_esimtate() supports only return types to=['xarray', 'dict']" +
+                f"You used {to=}"
+            )
+    def evaluate(
+        self,
+        parameters: Mapping[str, float|NumericArray|Sequence[float]] = {},
+        y0: Mapping[str, float|NumericArray|Sequence[float]] = {},
+        x_in: Mapping[str, float|NumericArray|Sequence[float]] = {},
+    ):
+        """Evaluates the model along the coordinates of the observations with given
+        parameters, x_in, and y0. The dictionaries passed to the function arguments
+        only overwrite the existing default parameters; which makes the usage very simple.
+        Note that the first run of .evaluate() after calling the .dispatch_constructor()
+        takes a little longer, because the model and solver are jit-compiled to JAX for
+        highly efficient computations.
+        Parameters
+        ----------
+        theta : Dict[float|Sequence[float]]
+            Dictionary of model parameters that should be changed for dispatch.
+            Unspecified model parameters will assume the default values,
+            specified under config.model_parameters.NAME.value
+        y0 : Dict[float|Sequence[float]]
+            Dictionary of initial values that should be changed for dispatch.
+        x_in : Dict[float|Sequence[float]]
+            Dictionary of model input values that should be changed for dispatch.
+        Example
+        -------
+        >>> sim.dispatch_constructor()  # necessary if the sim object has been modified
+        >>> # evaluate setting the background mortaltiy to zero
+        >>> sim.evaluate(parameters={'hb': 0.0})
+        """
+        evaluator = self.dispatch(theta=parameters, x_in=x_in, y0=y0)
+        evaluator()
+        return evaluator.results
+    def load_exposure_scenario(
+        self,
+        data: str|Dict[str,pd.DataFrame],
+        sheet_name_prefix: str = "",
+        rect_interpolate=False
+    ):
+        if isinstance(data, str):
+            _data, time_unit = read_excel_file(
+                path=data,
+                sheet_name_prefix=sheet_name_prefix,
+                convert_time_to=self.unit_time
+            )
+        else:
+            _data = data
+        self._obs_backup = self.observations.copy(deep=True)
+        # read exposure array from file
+        exposure_dim = [
+            d for d in self.config.data_structure.exposure.dimensions
+            if d not in (self.config.simulation.x_dimension, self.config.simulation.batch_dimension)
+        ]
+        exposure = self._exposure_data_to_xarray(
+            exposure_data=_data,
+            dim=exposure_dim[0]
+        )
+        # combine with observations
+        new_obs = xr.combine_by_coords([
+            exposure,
+            self.observations.survival
+        ]).sel(id=exposure.id)
+        self.observations = new_obs.sel(time=[t for t in new_obs.time if t <= exposure.time.max()])
+        self.config.simulation.x_in = ["exposure=exposure"]
+        self.model_parameters["x_in"] = self.parse_input("x_in", exposure).ffill("time")  # type: ignore
+        self.model_parameters["y0"] = self.parse_input("y0", drop_dims=["time"])
+        self.dispatch_constructor()
+    def export(self, directory: Optional[str] = None):
+        self.config.simulation.skip_data_processing = False
+        super().export(directory=directory)
 class GutsSimulationConstantExposure(GutsBase):
     t_max = 10
     def initialize_from_script(self):
@@ -387,7 +659,7 @@ class GutsSimulationConstantExposure(GutsBase):
         self.config.model_parameters.z = Param(value=0.2, free=True)
         self.model_parameters["parameters"] = self.config.model_parameters.value_dict
-        self.config.simulation.model = "guts_jax"
+        self.config.simulation.model = "guts_constant_exposure"
         self.coordinates["time"] = np.linspace(0,self.t_max)
@@ -397,7 +669,7 @@ class GutsSimulationConstantExposure(GutsBase):
         # =======================
         self.coordinates["time"] = np.array([0,self.t_max])
-        self.config.simulation.model = "guts_jax"
+        self.config.simulation.model = "guts_constant_exposure"
         self.solver = JaxSolver
@@ -438,7 +710,7 @@ class GutsSimulationVariableExposure(GutsSimulationConstantExposure):
         self.config.model_parameters.remove("C_0")
         self.model_parameters["parameters"] = self.config.model_parameters.value_dict
-        self.config.simulation.solver_post_processing = "post_exposure"
+        self.config.simulation.solver_post_processing = "red_sd_post_processing"
         self.config.simulation.model = "guts_variable_exposure"

guts_base/sim/constructors.py ADDED Viewed

@@ -0,0 +1,31 @@
+import os
+import arviz as az
+from guts_base.sim import GutsBase
+def construct_sim_from_config(
+    scenario: str,
+    simulation_class: type,
+    output_path=None
+) -> GutsBase:
+    """Helper function to construct simulations for debugging"""
+    sim = simulation_class(f"scenarios/{scenario}/settings.cfg")
+    # this sets a different output directory
+    if output_path is not None:
+        p = output_path / sim.config.case_study.name / "results" / sim.config.case_study.scenario
+        sim.config.case_study.output_path = str(p)
+    else:
+        sim.config.case_study.scenario = "debug_test"
+    sim.setup()
+    return sim
+def load_idata(sim: GutsBase, idata_file: str) -> GutsBase:
+    sim.set_inferer("numpyro")
+    if os.path.exists(idata_file):
+        sim.inferer.idata = az.from_netcdf(idata_file)
+    else:
+        sim.inferer.idata = None
+    return sim

guts-base 0.8.5__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

guts-base 0.8.5py3-none-any.whl → 1.0.0py3-none-any.whl