PyPI - nhp-model - Versions diffs - 5.0.0__py3-none-any.whl - Mend

nhp-model 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

nhp/docker/__init__.py +1 -0
nhp/docker/__main__.py +100 -0
nhp/docker/config.py +77 -0
nhp/docker/run.py +350 -0
nhp/model/__init__.py +11 -0
nhp/model/__main__.py +97 -0
nhp/model/_version.py +24 -0
nhp/model/aae.py +207 -0
nhp/model/activity_resampling.py +282 -0
nhp/model/data/__init__.py +8 -0
nhp/model/data/data.py +93 -0
nhp/model/data/local.py +126 -0
nhp/model/data/reference/__init__.py +44 -0
nhp/model/data/reference/hsa_split_normal_params.csv +145 -0
nhp/model/data/reference/life_expectancy.csv +277 -0
nhp/model/data/reference/variant_lookup.json +19 -0
nhp/model/health_status_adjustment.py +227 -0
nhp/model/helpers.py +37 -0
nhp/model/inpatients.py +514 -0
nhp/model/model.py +520 -0
nhp/model/model_iteration.py +276 -0
nhp/model/outpatients.py +263 -0
nhp/model/params/__init__.py +71 -0
nhp/model/params/__main__.py +55 -0
nhp/model/params/params-sample.json +1549 -0
nhp/model/params/params-schema.json +1417 -0
nhp/model/results.py +337 -0
nhp/model/run.py +193 -0
nhp_model-5.0.0.dist-info/METADATA +19 -0
nhp_model-5.0.0.dist-info/RECORD +35 -0
nhp_model-5.0.0.dist-info/WHEEL +5 -0
nhp_model-5.0.0.dist-info/licenses/LICENSE +21 -0
nhp_model-5.0.0.dist-info/scm_file_list.json +96 -0
nhp_model-5.0.0.dist-info/scm_version.json +8 -0
nhp_model-5.0.0.dist-info/top_level.txt +1 -0

nhp/docker/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """NHP Demand Model - Docker runtime."""

nhp/docker/__main__.py ADDED Viewed

@@ -0,0 +1,100 @@
+"""Methods for running the NHP model in a Docker container."""
+import argparse
+import logging
+import uuid
+from datetime import datetime
+from nhp.docker.config import Config
+from nhp.docker.run import RunWithAzureStorage, RunWithLocalStorage
+from nhp.model.data import Local
+from nhp.model.run import run_all
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "params_file",
+        help="Name of the parameters file stored in Azure",
+    )
+    parser.add_argument(
+        "model_run_id",
+        help="Unique identifier for this model run.",
+        default=uuid.uuid4,
+        type=uuid.UUID,
+        nargs="?",
+    )
+    parser.add_argument(
+        "--local-storage",
+        "-l",
+        action="store_true",
+        help="Use local storage (instead of Azure)",
+    )
+    parser.add_argument("--save-full-model-results", action="store_true")
+    return parser.parse_args()
+def main(config: Config):
+    """The main method."""
+    # run the model in a try catch block - ensures any exceptions that occur in the
+    # multiprocessing pool are handled and logged correctly.
+    # this prevents the docker container from hanging indefinitely.
+    try:
+        args = parse_args()
+        logging.basicConfig(
+            format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s",
+            level=logging.INFO,
+            datefmt="%Y-%m-%d %H:%M:%S",
+        )
+        if args.local_storage:
+            runner = RunWithLocalStorage(args.params_file)
+        else:
+            runner = RunWithAzureStorage(args.model_run_id, args.params_file, config)
+        logging.info("running model for: %s", args.params_file)
+        logging.info("submitted by: %s", runner.params.get("user"))
+        logging.info("model_runs:   %s", runner.params["model_runs"])
+        logging.info("start_year:   %s", runner.params["start_year"])
+        logging.info("end_year:     %s", runner.params["end_year"])
+        logging.info("app_version:  %s", runner.params["app_version"])
+        start_time = datetime.now()
+        results, variants = run_all(
+            runner.params,
+            Local.create("data"),
+            runner.progress_callback(),
+            args.save_full_model_results,
+        )
+        end_time = datetime.now()
+        elapsed_time = end_time - start_time
+        additional_metadata = {
+            "model_run_start_time": start_time.isoformat(),
+            "model_run_end_time": end_time.isoformat(),
+            "model_run_elapsed_time_seconds": elapsed_time.total_seconds(),
+        }
+        runner.finish(results, variants, args.save_full_model_results, additional_metadata)
+        logging.info("complete")
+    except Exception as e:
+        logging.error("An error occurred: %s", str(e))
+        runner.error(str(e))
+def init():
+    """Method for calling main."""
+    if __name__ == "__main__":
+        config = Config()
+        main(config)
+init()

nhp/docker/config.py ADDED Viewed

@@ -0,0 +1,77 @@
+"""config values for docker container."""
+import os
+import dotenv
+class Config:
+    """Configuration class for Docker container."""
+    def __init__(self):
+        """Configuration settings for the Docker container."""
+        dotenv.load_dotenv()
+        self._app_version = os.environ.get("APP_VERSION", "dev")
+        self._data_version = os.environ.get("DATA_VERSION", "dev")
+        default_storage_account = os.environ.get("STORAGE_ACCOUNT")
+        self._queue_storage_account = os.environ.get(
+            "QUEUE_STORAGE_ACCOUNT", default_storage_account
+        )
+        self._data_storage_account = os.environ.get("DATA_STORAGE_ACCOUNT", default_storage_account)
+        self._results_storage_account = os.environ.get(
+            "RESULTS_STORAGE_ACCOUNT", default_storage_account
+        )
+        self._full_model_results_storage_account = os.environ.get(
+            "FULL_MODEL_RESULTS_STORAGE_ACCOUNT", default_storage_account
+        )
+        self._model_runs_table_storage_account = os.environ.get(
+            "MODEL_RUNS_TABLE_STORAGE_ACCOUNT", default_storage_account
+        )
+    @property
+    def APP_VERSION(self) -> str:
+        """What is the version of the app?"""
+        return self._app_version
+    @property
+    def DATA_VERSION(self) -> str:
+        """What version of the data are we using?"""
+        return self._data_version
+    @property
+    def QUEUE_STORAGE_ACCOUNT(self) -> str:
+        """What is the name of the storage account for the queue container?"""
+        if self._queue_storage_account is None:
+            raise ValueError("QUEUE_STORAGE_ACCOUNT environment variable must be set")
+        return self._queue_storage_account
+    @property
+    def DATA_STORAGE_ACCOUNT(self) -> str:
+        """What is the name of the storage account for the data container?"""
+        if self._data_storage_account is None:
+            raise ValueError("DATA_STORAGE_ACCOUNT environment variable must be set")
+        return self._data_storage_account
+    @property
+    def RESULTS_STORAGE_ACCOUNT(self) -> str:
+        """What is the name of the storage account for the results container?"""
+        if self._results_storage_account is None:
+            raise ValueError("RESULTS_STORAGE_ACCOUNT environment variable must be set")
+        return self._results_storage_account
+    @property
+    def FULL_MODEL_RESULTS_STORAGE_ACCOUNT(self) -> str:
+        """What is the name of the storage account for the full model results container?"""
+        if self._full_model_results_storage_account is None:
+            raise ValueError("FULL_MODEL_RESULTS_STORAGE_ACCOUNT environment variable must be set")
+        return self._full_model_results_storage_account
+    @property
+    def MODEL_RUNS_TABLE_STORAGE_ACCOUNT(self) -> str:
+        """What is the name of the storage account for the model runs table?"""
+        if self._model_runs_table_storage_account is None:
+            raise ValueError("MODEL_RUNS_TABLE_STORAGE_ACCOUNT environment variable must be set")
+        return self._model_runs_table_storage_account

nhp/docker/run.py ADDED Viewed

@@ -0,0 +1,350 @@
+"""Run the model inside of the docker container."""
+import gzip
+import json
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Any, Callable
+from uuid import UUID
+import pandas as pd
+from azure.data.tables import TableServiceClient, UpdateMode
+from azure.identity import DefaultAzureCredential
+from azure.storage.blob import ContainerClient
+from azure.storage.filedatalake import DataLakeServiceClient
+from nhp.docker.config import Config
+from nhp.model.params import load_params
+from nhp.model.results import generate_results_json, save_results_files
+from nhp.model.run import noop_progress_callback
+class RunWithLocalStorage:
+    """Methods for running with local storage."""
+    def __init__(self, filename: str):
+        """Initialize the RunWithLocalStorage instance.
+        Args:
+            filename: Name of the parameter file to load.
+        """
+        self.params = load_params(f"queue/{filename}")
+    def finish(
+        self,
+        results: dict[str, pd.DataFrame],
+        variants: list[str],
+        _save_full_model_results: bool,
+        _additional_metadata: dict[str, Any],
+    ) -> None:
+        """Post model run steps.
+        Args:
+            results: A dictionary containing the results dataframes.
+            variants: A list of the variants that were run.
+            save_full_model_results: Whether to save the full model results or not.
+            additional_metadata: Additional metadata to log.
+        """
+        save_results_files(results, self.params, variants)
+    def error(self, error_message: str) -> None:
+        """Error handling.
+        If there is an error during the model run, log the error message.
+        Args:
+            error_message: The error message to log.
+        """
+        pass
+    def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]:
+        """Progress callback method.
+        For local storage do nothing.
+        Returns:
+            A no-op progress callback function.
+        """
+        return noop_progress_callback
+class RunWithAzureStorage:
+    """Methods for running with azure storage."""
+    def __init__(self, model_run_id: UUID, filename: str, config: Config | None = None):
+        """Initialise RunWithAzureStorage.
+        Args:
+            model_run_id: Unique identifier for this model run.
+            filename: Name of the parameter file to load.
+            config: The configuration for the run. Defaults to Config().
+        """
+        logging.getLogger("azure.storage.common.storageclient").setLevel(logging.WARNING)
+        logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
+            logging.WARNING
+        )
+        self._model_run_id = model_run_id
+        self._config = config or Config()
+        self._app_version = re.sub("(\\d+\\.\\d+)\\..*", "\\1", self._config.APP_VERSION)
+        self.params = self._get_params(filename)
+        self._get_data(self.params["start_year"], self.params["dataset"])
+        self._table_client = TableServiceClient(
+            endpoint=f"https://{self._config.MODEL_RUNS_TABLE_STORAGE_ACCOUNT}.table.core.windows.net",
+            credential=DefaultAzureCredential(),
+        ).get_table_client("modelruns")
+        self._update_table_storage(status="running")
+    def _get_container(self, account_name: str, container_name: str):
+        return ContainerClient(
+            account_url=f"https://{account_name}.blob.core.windows.net",
+            container_name=container_name,
+            credential=DefaultAzureCredential(),
+        )
+    def _get_params(self, filename: str) -> dict:
+        """Get the parameters for the model.
+        Args:
+            filename: The name of the params file.
+        Returns:
+            The parameters for the model.
+        """
+        logging.info("downloading params: %s", filename)
+        self._queue_blob = self._get_container(
+            self._config.QUEUE_STORAGE_ACCOUNT, "queue"
+        ).get_blob_client(filename)
+        params_content = self._queue_blob.download_blob().readall()
+        return json.loads(params_content)
+    def _get_data(self, year: str, dataset: str) -> None:
+        """Get data to run the model.
+        Downloads data from Azure storage for the specified year and dataset.
+        Args:
+            year: The year of data to load.
+            dataset: The dataset to load.
+        """
+        logging.info("downloading data (%s / %s)", year, dataset)
+        fs_client = DataLakeServiceClient(
+            account_url=f"https://{self._config.DATA_STORAGE_ACCOUNT}.dfs.core.windows.net",
+            credential=DefaultAzureCredential(),
+        ).get_file_system_client("data")
+        version = self._config.DATA_VERSION
+        paths = [p.name for p in fs_client.get_paths(version, recursive=False)]
+        for p in paths:
+            subpath = f"{p}/fyear={year}/dataset={dataset}"
+            os.makedirs(f"data{subpath.removeprefix(version)}", exist_ok=True)
+            for i in fs_client.get_paths(subpath):
+                filename = i.name
+                if not filename.endswith("parquet"):
+                    continue
+                logging.info(" * %s", filename)
+                local_name = "data" + filename.removeprefix(version)
+                with open(local_name, "wb") as local_file:
+                    file_client = fs_client.get_file_client(filename)
+                    local_file.write(file_client.download_file().readall())
+    def _upload_results_json(
+        self, results: dict[str, pd.DataFrame], metadata: dict[str, Any], variants: list[str]
+    ) -> None:
+        """Upload the results.
+        Once the model has run, upload the results to blob storage.
+        Args:
+            results: Dictionary containing the results dataframes.
+            metadata: The metadata to attach to the blob.
+            variants: A list of the variants that were run.
+        """
+        container = self._get_container(self._config.RESULTS_STORAGE_ACCOUNT, "results")
+        results_file = generate_results_json(results, self.params, variants)
+        results_json_gz_path = f"prod/{self._app_version}/{results_file}.json.gz"
+        with open(f"results/{results_file}.json", "rb") as file:
+            container.upload_blob(
+                results_json_gz_path,
+                gzip.compress(file.read()),
+                metadata={k: str(v) for k, v in metadata.items()},
+                overwrite=True,
+            )
+        self._update_table_storage(
+            results_json_gz_path=results_json_gz_path,
+        )
+    def _upload_results_files(
+        self,
+        file_path: str,
+        results: dict[str, pd.DataFrame],
+        metadata: dict[str, str],
+        variants: list[str],
+    ) -> None:
+        """Upload the results.
+        Once the model has run, upload the files (parquet for model results and json for
+        model params) to blob storage.
+        Args:
+            file_path: The path to save the results to.
+            results: A dictionary containing the results dataframes.
+            metadata: The metadata to attach to the blob.
+            variants: A list of the variants that were run.
+        """
+        params = self.params
+        container = self._get_container(self._config.RESULTS_STORAGE_ACCOUNT, "results")
+        for k, v in results.items():
+            container.upload_blob(
+                file_path + f"/{k}.parquet",
+                v.to_parquet(index=False),
+                overwrite=True,
+                metadata=metadata,
+            )
+        container.upload_blob(
+            f"{file_path}/params.json",
+            json.dumps(params).encode("utf-8"),
+            overwrite=True,
+            metadata=metadata,
+        )
+        container.upload_blob(
+            f"{file_path}/variants.json",
+            json.dumps(variants).encode("utf-8"),
+            overwrite=True,
+            metadata=metadata,
+        )
+    def _upload_full_model_results(self) -> None:
+        container = self._get_container(self._config.FULL_MODEL_RESULTS_STORAGE_ACCOUNT, "results")
+        dataset = self.params["dataset"]
+        scenario = self.params["scenario"]
+        create_datetime = self.params["create_datetime"]
+        path = Path(f"results/{dataset}/{scenario}/{create_datetime}")
+        for file in path.glob("**/*.parquet"):
+            filename = file.as_posix()[8:]
+            with open(file, "rb") as f:
+                container.upload_blob(
+                    f"full-model-results/{self._app_version}/{filename}",
+                    f.read(),
+                    overwrite=True,
+                )
+    def _update_table_storage(self, **kwargs) -> None:
+        """Update the table storage with the given data."""
+        entity = {
+            "PartitionKey": self.params["dataset"],
+            "RowKey": self._model_run_id,
+            **kwargs,
+        }
+        self._table_client.update_entity(entity, mode=UpdateMode.MERGE)
+    def _cleanup(self) -> None:
+        """Cleanup.
+        Once the model has run, remove the file from the queue.
+        """
+        logging.info("cleaning up queue")
+        self._queue_blob.delete_blob()
+    def finish(
+        self,
+        results: dict[str, pd.DataFrame],
+        variants: list[str],
+        save_full_model_results: bool,
+        additional_metadata: dict[str, Any],
+    ) -> None:
+        """Post model run steps.
+        Args:
+            results: A dictionary containing the results dataframes.
+            variants: A list of the variants that were run.
+            save_full_model_results: Whether to save the full model results or not.
+            additional_metadata: Additional metadata to log.
+        """
+        metadata = {
+            k: v
+            for k, v in self.params.items()
+            if not isinstance(v, dict) and not isinstance(v, list)
+        }
+        metadata.update(additional_metadata)
+        file_path = "/".join(
+            [
+                "aggregated-model-results",
+                self._app_version,
+                self.params["dataset"],
+                self.params["scenario"],
+                self.params["create_datetime"],
+            ]
+        )
+        self._update_table_storage(
+            status="complete",
+            aggregated_results_path=file_path,
+            outputs_app_uri=f"{self.params['dataset']}/{self._model_run_id}",
+        )
+        self._upload_results_files(
+            file_path, results, {"model_run_id": str(self._model_run_id)}, variants
+        )
+        # see issue #286, this should be removed once we no longer need the results json file
+        self._upload_results_json(results, metadata, variants)
+        if save_full_model_results:
+            self._upload_full_model_results()
+        self._cleanup()
+    def error(self, error_message: str) -> None:
+        """Error handling.
+        If there is an error during the model run, update the table storage with the error
+        message and clean up the queue.
+        Args:
+            error_message: The error message to log.
+        """
+        self._update_table_storage(status="error", error_message=error_message)
+    def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]:
+        """Progress callback method.
+        Updates the metadata for the blob in the queue to give progress.
+        Returns:
+            A callback function that updates progress for each model type.
+        """
+        current_progress = {
+            "Inpatients": 0,
+            "Outpatients": 0,
+            "AaE": 0,
+        }
+        self._update_table_storage(progress=json.dumps(current_progress))
+        def callback(model_type: Any) -> Callable[[Any], None]:
+            def update(n_completed: Any) -> None:
+                current_progress[model_type] = n_completed
+                self._update_table_storage(progress=json.dumps(current_progress))
+            return update
+        return callback

nhp/model/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""New Hospitals Programme Model."""
+# re-export anything useful
+from nhp.model.aae import AaEModel
+from nhp.model.activity_resampling import ActivityResampling
+from nhp.model.health_status_adjustment import HealthStatusAdjustmentInterpolated
+from nhp.model.inpatients import InpatientEfficiencies, InpatientsModel
+from nhp.model.model import Model
+from nhp.model.model_iteration import ModelIteration
+from nhp.model.outpatients import OutpatientsModel
+from nhp.model.params import load_params, load_sample_params

nhp/model/__main__.py ADDED Viewed

@@ -0,0 +1,97 @@
+"""Functions to run the model.
+This module allows you to run the various models. It allows you to run a single model run of one of
+the different types of models for debugging purposes, or it allows you to run all of the models in
+parallel saving the results to disk.
+There are existing launch profiles for vscode that use this file, or you can use it directly in the
+console, e.g.
+    python -m nhp.model -d data --model-run 1 -t ip
+will run a single run of the inpatients model, returning the results to display.
+"""
+import argparse
+import logging
+from nhp.model.aae import AaEModel
+from nhp.model.data import Local
+from nhp.model.inpatients import InpatientsModel
+from nhp.model.outpatients import OutpatientsModel
+from nhp.model.params import load_params, load_sample_params
+from nhp.model.results import save_results_files
+from nhp.model.run import run_all, run_single_model_run
+def _parse_args() -> argparse.Namespace:  # pragma: no cover
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "params_file",
+        nargs="?",
+        default="",
+        help="Path to the params.json file (leave empty to use sample parameters).",
+    )
+    parser.add_argument("-d", "--data-path", help="Path to the data", default="data")
+    parser.add_argument(
+        "-r", "--model-run", help="Which model iteration to run", default=1, type=int
+    )
+    parser.add_argument(
+        "-t",
+        "--type",
+        default="all",
+        choices=["all", "aae", "ip", "op"],
+        help="Model type, either: all, ip, op, aae",
+        type=str,
+    )
+    parser.add_argument("--save-full-model-results", action="store_true")
+    return parser.parse_args()
+def main() -> None:
+    """Main method.
+    Runs when __name__ == "__main__"
+    """
+    # Grab the Arguments
+    args = _parse_args()
+    if args.params_file == "":
+        params = load_sample_params()
+    else:
+        params = load_params(args.params_file)
+    # define the model to run
+    match args.type:
+        case "all":
+            logging.basicConfig(
+                format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s",
+                level=logging.INFO,
+                datefmt="%Y-%m-%d %H:%M:%S",
+            )
+            results, variants = run_all(
+                params,
+                Local.create(args.data_path),
+                lambda _: lambda _: None,
+                args.save_full_model_results,
+            )
+            save_results_files(results, params, variants)
+            return
+        case "aae":
+            model_type = AaEModel
+        case "ip":
+            model_type = InpatientsModel
+        case "op":
+            model_type = OutpatientsModel
+        case _:
+            raise ValueError(f"Unknown model type: {args.type}")
+    run_single_model_run(params, args.data_path, model_type, args.model_run)
+def init():
+    """Method for calling main."""
+    if __name__ == "__main__":
+        main()
+init()

nhp/model/_version.py ADDED Viewed

@@ -0,0 +1,24 @@
+# file generated by vcs-versioning
+# don't change, don't track in version control
+from __future__ import annotations
+__all__ = [
+    "__version__",
+    "__version_tuple__",
+    "version",
+    "version_tuple",
+    "__commit_id__",
+    "commit_id",
+]
+version: str
+__version__: str
+__version_tuple__: tuple[int | str, ...]
+version_tuple: tuple[int | str, ...]
+commit_id: str | None
+__commit_id__: str | None
+__version__ = version = '5.0.0'
+__version_tuple__ = version_tuple = (5, 0, 0)
+__commit_id__ = commit_id = None