nhp-model 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
nhp/docker/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """NHP Demand Model - Docker runtime."""
nhp/docker/__main__.py ADDED
@@ -0,0 +1,100 @@
1
+ """Methods for running the NHP model in a Docker container."""
2
+
3
+ import argparse
4
+ import logging
5
+ import uuid
6
+ from datetime import datetime
7
+
8
+ from nhp.docker.config import Config
9
+ from nhp.docker.run import RunWithAzureStorage, RunWithLocalStorage
10
+ from nhp.model.data import Local
11
+ from nhp.model.run import run_all
12
+
13
+
14
+ def parse_args():
15
+ """Parse command line arguments."""
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument(
18
+ "params_file",
19
+ help="Name of the parameters file stored in Azure",
20
+ )
21
+ parser.add_argument(
22
+ "model_run_id",
23
+ help="Unique identifier for this model run.",
24
+ default=uuid.uuid4,
25
+ type=uuid.UUID,
26
+ nargs="?",
27
+ )
28
+
29
+ parser.add_argument(
30
+ "--local-storage",
31
+ "-l",
32
+ action="store_true",
33
+ help="Use local storage (instead of Azure)",
34
+ )
35
+
36
+ parser.add_argument("--save-full-model-results", action="store_true")
37
+
38
+ return parser.parse_args()
39
+
40
+
41
+ def main(config: Config):
42
+ """The main method."""
43
+ # run the model in a try catch block - ensures any exceptions that occur in the
44
+ # multiprocessing pool are handled and logged correctly.
45
+ # this prevents the docker container from hanging indefinitely.
46
+ try:
47
+ args = parse_args()
48
+
49
+ logging.basicConfig(
50
+ format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s",
51
+ level=logging.INFO,
52
+ datefmt="%Y-%m-%d %H:%M:%S",
53
+ )
54
+
55
+ if args.local_storage:
56
+ runner = RunWithLocalStorage(args.params_file)
57
+ else:
58
+ runner = RunWithAzureStorage(args.model_run_id, args.params_file, config)
59
+
60
+ logging.info("running model for: %s", args.params_file)
61
+ logging.info("submitted by: %s", runner.params.get("user"))
62
+ logging.info("model_runs: %s", runner.params["model_runs"])
63
+ logging.info("start_year: %s", runner.params["start_year"])
64
+ logging.info("end_year: %s", runner.params["end_year"])
65
+ logging.info("app_version: %s", runner.params["app_version"])
66
+
67
+ start_time = datetime.now()
68
+
69
+ results, variants = run_all(
70
+ runner.params,
71
+ Local.create("data"),
72
+ runner.progress_callback(),
73
+ args.save_full_model_results,
74
+ )
75
+
76
+ end_time = datetime.now()
77
+ elapsed_time = end_time - start_time
78
+
79
+ additional_metadata = {
80
+ "model_run_start_time": start_time.isoformat(),
81
+ "model_run_end_time": end_time.isoformat(),
82
+ "model_run_elapsed_time_seconds": elapsed_time.total_seconds(),
83
+ }
84
+
85
+ runner.finish(results, variants, args.save_full_model_results, additional_metadata)
86
+
87
+ logging.info("complete")
88
+ except Exception as e:
89
+ logging.error("An error occurred: %s", str(e))
90
+ runner.error(str(e))
91
+
92
+
93
+ def init():
94
+ """Method for calling main."""
95
+ if __name__ == "__main__":
96
+ config = Config()
97
+ main(config)
98
+
99
+
100
+ init()
nhp/docker/config.py ADDED
@@ -0,0 +1,77 @@
1
+ """config values for docker container."""
2
+
3
+ import os
4
+
5
+ import dotenv
6
+
7
+
8
+ class Config:
9
+ """Configuration class for Docker container."""
10
+
11
+ def __init__(self):
12
+ """Configuration settings for the Docker container."""
13
+ dotenv.load_dotenv()
14
+
15
+ self._app_version = os.environ.get("APP_VERSION", "dev")
16
+ self._data_version = os.environ.get("DATA_VERSION", "dev")
17
+
18
+ default_storage_account = os.environ.get("STORAGE_ACCOUNT")
19
+
20
+ self._queue_storage_account = os.environ.get(
21
+ "QUEUE_STORAGE_ACCOUNT", default_storage_account
22
+ )
23
+ self._data_storage_account = os.environ.get("DATA_STORAGE_ACCOUNT", default_storage_account)
24
+ self._results_storage_account = os.environ.get(
25
+ "RESULTS_STORAGE_ACCOUNT", default_storage_account
26
+ )
27
+ self._full_model_results_storage_account = os.environ.get(
28
+ "FULL_MODEL_RESULTS_STORAGE_ACCOUNT", default_storage_account
29
+ )
30
+ self._model_runs_table_storage_account = os.environ.get(
31
+ "MODEL_RUNS_TABLE_STORAGE_ACCOUNT", default_storage_account
32
+ )
33
+
34
+ @property
35
+ def APP_VERSION(self) -> str:
36
+ """What is the version of the app?"""
37
+ return self._app_version
38
+
39
+ @property
40
+ def DATA_VERSION(self) -> str:
41
+ """What version of the data are we using?"""
42
+ return self._data_version
43
+
44
+ @property
45
+ def QUEUE_STORAGE_ACCOUNT(self) -> str:
46
+ """What is the name of the storage account for the queue container?"""
47
+ if self._queue_storage_account is None:
48
+ raise ValueError("QUEUE_STORAGE_ACCOUNT environment variable must be set")
49
+ return self._queue_storage_account
50
+
51
+ @property
52
+ def DATA_STORAGE_ACCOUNT(self) -> str:
53
+ """What is the name of the storage account for the data container?"""
54
+ if self._data_storage_account is None:
55
+ raise ValueError("DATA_STORAGE_ACCOUNT environment variable must be set")
56
+ return self._data_storage_account
57
+
58
+ @property
59
+ def RESULTS_STORAGE_ACCOUNT(self) -> str:
60
+ """What is the name of the storage account for the results container?"""
61
+ if self._results_storage_account is None:
62
+ raise ValueError("RESULTS_STORAGE_ACCOUNT environment variable must be set")
63
+ return self._results_storage_account
64
+
65
+ @property
66
+ def FULL_MODEL_RESULTS_STORAGE_ACCOUNT(self) -> str:
67
+ """What is the name of the storage account for the full model results container?"""
68
+ if self._full_model_results_storage_account is None:
69
+ raise ValueError("FULL_MODEL_RESULTS_STORAGE_ACCOUNT environment variable must be set")
70
+ return self._full_model_results_storage_account
71
+
72
+ @property
73
+ def MODEL_RUNS_TABLE_STORAGE_ACCOUNT(self) -> str:
74
+ """What is the name of the storage account for the model runs table?"""
75
+ if self._model_runs_table_storage_account is None:
76
+ raise ValueError("MODEL_RUNS_TABLE_STORAGE_ACCOUNT environment variable must be set")
77
+ return self._model_runs_table_storage_account
nhp/docker/run.py ADDED
@@ -0,0 +1,350 @@
1
+ """Run the model inside of the docker container."""
2
+
3
+ import gzip
4
+ import json
5
+ import logging
6
+ import os
7
+ import re
8
+ from pathlib import Path
9
+ from typing import Any, Callable
10
+ from uuid import UUID
11
+
12
+ import pandas as pd
13
+ from azure.data.tables import TableServiceClient, UpdateMode
14
+ from azure.identity import DefaultAzureCredential
15
+ from azure.storage.blob import ContainerClient
16
+ from azure.storage.filedatalake import DataLakeServiceClient
17
+
18
+ from nhp.docker.config import Config
19
+ from nhp.model.params import load_params
20
+ from nhp.model.results import generate_results_json, save_results_files
21
+ from nhp.model.run import noop_progress_callback
22
+
23
+
24
+ class RunWithLocalStorage:
25
+ """Methods for running with local storage."""
26
+
27
+ def __init__(self, filename: str):
28
+ """Initialize the RunWithLocalStorage instance.
29
+
30
+ Args:
31
+ filename: Name of the parameter file to load.
32
+ """
33
+ self.params = load_params(f"queue/{filename}")
34
+
35
+ def finish(
36
+ self,
37
+ results: dict[str, pd.DataFrame],
38
+ variants: list[str],
39
+ _save_full_model_results: bool,
40
+ _additional_metadata: dict[str, Any],
41
+ ) -> None:
42
+ """Post model run steps.
43
+
44
+ Args:
45
+ results: A dictionary containing the results dataframes.
46
+ variants: A list of the variants that were run.
47
+ save_full_model_results: Whether to save the full model results or not.
48
+ additional_metadata: Additional metadata to log.
49
+ """
50
+ save_results_files(results, self.params, variants)
51
+
52
+ def error(self, error_message: str) -> None:
53
+ """Error handling.
54
+
55
+ If there is an error during the model run, log the error message.
56
+
57
+ Args:
58
+ error_message: The error message to log.
59
+ """
60
+ pass
61
+
62
+ def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]:
63
+ """Progress callback method.
64
+
65
+ For local storage do nothing.
66
+
67
+ Returns:
68
+ A no-op progress callback function.
69
+ """
70
+ return noop_progress_callback
71
+
72
+
73
+ class RunWithAzureStorage:
74
+ """Methods for running with azure storage."""
75
+
76
+ def __init__(self, model_run_id: UUID, filename: str, config: Config | None = None):
77
+ """Initialise RunWithAzureStorage.
78
+
79
+ Args:
80
+ model_run_id: Unique identifier for this model run.
81
+ filename: Name of the parameter file to load.
82
+ config: The configuration for the run. Defaults to Config().
83
+ """
84
+ logging.getLogger("azure.storage.common.storageclient").setLevel(logging.WARNING)
85
+ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
86
+ logging.WARNING
87
+ )
88
+ self._model_run_id = model_run_id
89
+ self._config = config or Config()
90
+
91
+ self._app_version = re.sub("(\\d+\\.\\d+)\\..*", "\\1", self._config.APP_VERSION)
92
+
93
+ self.params = self._get_params(filename)
94
+ self._get_data(self.params["start_year"], self.params["dataset"])
95
+
96
+ self._table_client = TableServiceClient(
97
+ endpoint=f"https://{self._config.MODEL_RUNS_TABLE_STORAGE_ACCOUNT}.table.core.windows.net",
98
+ credential=DefaultAzureCredential(),
99
+ ).get_table_client("modelruns")
100
+
101
+ self._update_table_storage(status="running")
102
+
103
+ def _get_container(self, account_name: str, container_name: str):
104
+ return ContainerClient(
105
+ account_url=f"https://{account_name}.blob.core.windows.net",
106
+ container_name=container_name,
107
+ credential=DefaultAzureCredential(),
108
+ )
109
+
110
+ def _get_params(self, filename: str) -> dict:
111
+ """Get the parameters for the model.
112
+
113
+ Args:
114
+ filename: The name of the params file.
115
+
116
+ Returns:
117
+ The parameters for the model.
118
+ """
119
+ logging.info("downloading params: %s", filename)
120
+
121
+ self._queue_blob = self._get_container(
122
+ self._config.QUEUE_STORAGE_ACCOUNT, "queue"
123
+ ).get_blob_client(filename)
124
+
125
+ params_content = self._queue_blob.download_blob().readall()
126
+
127
+ return json.loads(params_content)
128
+
129
+ def _get_data(self, year: str, dataset: str) -> None:
130
+ """Get data to run the model.
131
+
132
+ Downloads data from Azure storage for the specified year and dataset.
133
+
134
+ Args:
135
+ year: The year of data to load.
136
+ dataset: The dataset to load.
137
+ """
138
+ logging.info("downloading data (%s / %s)", year, dataset)
139
+ fs_client = DataLakeServiceClient(
140
+ account_url=f"https://{self._config.DATA_STORAGE_ACCOUNT}.dfs.core.windows.net",
141
+ credential=DefaultAzureCredential(),
142
+ ).get_file_system_client("data")
143
+
144
+ version = self._config.DATA_VERSION
145
+
146
+ paths = [p.name for p in fs_client.get_paths(version, recursive=False)]
147
+
148
+ for p in paths:
149
+ subpath = f"{p}/fyear={year}/dataset={dataset}"
150
+ os.makedirs(f"data{subpath.removeprefix(version)}", exist_ok=True)
151
+
152
+ for i in fs_client.get_paths(subpath):
153
+ filename = i.name
154
+ if not filename.endswith("parquet"):
155
+ continue
156
+
157
+ logging.info(" * %s", filename)
158
+ local_name = "data" + filename.removeprefix(version)
159
+ with open(local_name, "wb") as local_file:
160
+ file_client = fs_client.get_file_client(filename)
161
+ local_file.write(file_client.download_file().readall())
162
+
163
+ def _upload_results_json(
164
+ self, results: dict[str, pd.DataFrame], metadata: dict[str, Any], variants: list[str]
165
+ ) -> None:
166
+ """Upload the results.
167
+
168
+ Once the model has run, upload the results to blob storage.
169
+
170
+ Args:
171
+ results: Dictionary containing the results dataframes.
172
+ metadata: The metadata to attach to the blob.
173
+ variants: A list of the variants that were run.
174
+ """
175
+ container = self._get_container(self._config.RESULTS_STORAGE_ACCOUNT, "results")
176
+
177
+ results_file = generate_results_json(results, self.params, variants)
178
+
179
+ results_json_gz_path = f"prod/{self._app_version}/{results_file}.json.gz"
180
+ with open(f"results/{results_file}.json", "rb") as file:
181
+ container.upload_blob(
182
+ results_json_gz_path,
183
+ gzip.compress(file.read()),
184
+ metadata={k: str(v) for k, v in metadata.items()},
185
+ overwrite=True,
186
+ )
187
+
188
+ self._update_table_storage(
189
+ results_json_gz_path=results_json_gz_path,
190
+ )
191
+
192
+ def _upload_results_files(
193
+ self,
194
+ file_path: str,
195
+ results: dict[str, pd.DataFrame],
196
+ metadata: dict[str, str],
197
+ variants: list[str],
198
+ ) -> None:
199
+ """Upload the results.
200
+
201
+ Once the model has run, upload the files (parquet for model results and json for
202
+ model params) to blob storage.
203
+
204
+ Args:
205
+ file_path: The path to save the results to.
206
+ results: A dictionary containing the results dataframes.
207
+ metadata: The metadata to attach to the blob.
208
+ variants: A list of the variants that were run.
209
+ """
210
+ params = self.params
211
+ container = self._get_container(self._config.RESULTS_STORAGE_ACCOUNT, "results")
212
+ for k, v in results.items():
213
+ container.upload_blob(
214
+ file_path + f"/{k}.parquet",
215
+ v.to_parquet(index=False),
216
+ overwrite=True,
217
+ metadata=metadata,
218
+ )
219
+ container.upload_blob(
220
+ f"{file_path}/params.json",
221
+ json.dumps(params).encode("utf-8"),
222
+ overwrite=True,
223
+ metadata=metadata,
224
+ )
225
+ container.upload_blob(
226
+ f"{file_path}/variants.json",
227
+ json.dumps(variants).encode("utf-8"),
228
+ overwrite=True,
229
+ metadata=metadata,
230
+ )
231
+
232
+ def _upload_full_model_results(self) -> None:
233
+ container = self._get_container(self._config.FULL_MODEL_RESULTS_STORAGE_ACCOUNT, "results")
234
+
235
+ dataset = self.params["dataset"]
236
+ scenario = self.params["scenario"]
237
+ create_datetime = self.params["create_datetime"]
238
+
239
+ path = Path(f"results/{dataset}/{scenario}/{create_datetime}")
240
+
241
+ for file in path.glob("**/*.parquet"):
242
+ filename = file.as_posix()[8:]
243
+ with open(file, "rb") as f:
244
+ container.upload_blob(
245
+ f"full-model-results/{self._app_version}/{filename}",
246
+ f.read(),
247
+ overwrite=True,
248
+ )
249
+
250
+ def _update_table_storage(self, **kwargs) -> None:
251
+ """Update the table storage with the given data."""
252
+ entity = {
253
+ "PartitionKey": self.params["dataset"],
254
+ "RowKey": self._model_run_id,
255
+ **kwargs,
256
+ }
257
+
258
+ self._table_client.update_entity(entity, mode=UpdateMode.MERGE)
259
+
260
+ def _cleanup(self) -> None:
261
+ """Cleanup.
262
+
263
+ Once the model has run, remove the file from the queue.
264
+ """
265
+ logging.info("cleaning up queue")
266
+
267
+ self._queue_blob.delete_blob()
268
+
269
+ def finish(
270
+ self,
271
+ results: dict[str, pd.DataFrame],
272
+ variants: list[str],
273
+ save_full_model_results: bool,
274
+ additional_metadata: dict[str, Any],
275
+ ) -> None:
276
+ """Post model run steps.
277
+
278
+ Args:
279
+ results: A dictionary containing the results dataframes.
280
+ variants: A list of the variants that were run.
281
+ save_full_model_results: Whether to save the full model results or not.
282
+ additional_metadata: Additional metadata to log.
283
+ """
284
+ metadata = {
285
+ k: v
286
+ for k, v in self.params.items()
287
+ if not isinstance(v, dict) and not isinstance(v, list)
288
+ }
289
+ metadata.update(additional_metadata)
290
+
291
+ file_path = "/".join(
292
+ [
293
+ "aggregated-model-results",
294
+ self._app_version,
295
+ self.params["dataset"],
296
+ self.params["scenario"],
297
+ self.params["create_datetime"],
298
+ ]
299
+ )
300
+ self._update_table_storage(
301
+ status="complete",
302
+ aggregated_results_path=file_path,
303
+ outputs_app_uri=f"{self.params['dataset']}/{self._model_run_id}",
304
+ )
305
+
306
+ self._upload_results_files(
307
+ file_path, results, {"model_run_id": str(self._model_run_id)}, variants
308
+ )
309
+ # see issue #286, this should be removed once we no longer need the results json file
310
+ self._upload_results_json(results, metadata, variants)
311
+ if save_full_model_results:
312
+ self._upload_full_model_results()
313
+
314
+ self._cleanup()
315
+
316
+ def error(self, error_message: str) -> None:
317
+ """Error handling.
318
+
319
+ If there is an error during the model run, update the table storage with the error
320
+ message and clean up the queue.
321
+
322
+ Args:
323
+ error_message: The error message to log.
324
+ """
325
+ self._update_table_storage(status="error", error_message=error_message)
326
+
327
+ def progress_callback(self) -> Callable[[Any], Callable[[Any], None]]:
328
+ """Progress callback method.
329
+
330
+ Updates the metadata for the blob in the queue to give progress.
331
+
332
+ Returns:
333
+ A callback function that updates progress for each model type.
334
+ """
335
+ current_progress = {
336
+ "Inpatients": 0,
337
+ "Outpatients": 0,
338
+ "AaE": 0,
339
+ }
340
+
341
+ self._update_table_storage(progress=json.dumps(current_progress))
342
+
343
+ def callback(model_type: Any) -> Callable[[Any], None]:
344
+ def update(n_completed: Any) -> None:
345
+ current_progress[model_type] = n_completed
346
+ self._update_table_storage(progress=json.dumps(current_progress))
347
+
348
+ return update
349
+
350
+ return callback
nhp/model/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """New Hospitals Programme Model."""
2
+
3
+ # re-export anything useful
4
+ from nhp.model.aae import AaEModel
5
+ from nhp.model.activity_resampling import ActivityResampling
6
+ from nhp.model.health_status_adjustment import HealthStatusAdjustmentInterpolated
7
+ from nhp.model.inpatients import InpatientEfficiencies, InpatientsModel
8
+ from nhp.model.model import Model
9
+ from nhp.model.model_iteration import ModelIteration
10
+ from nhp.model.outpatients import OutpatientsModel
11
+ from nhp.model.params import load_params, load_sample_params
nhp/model/__main__.py ADDED
@@ -0,0 +1,97 @@
1
+ """Functions to run the model.
2
+
3
+ This module allows you to run the various models. It allows you to run a single model run of one of
4
+ the different types of models for debugging purposes, or it allows you to run all of the models in
5
+ parallel saving the results to disk.
6
+
7
+ There are existing launch profiles for vscode that use this file, or you can use it directly in the
8
+ console, e.g.
9
+
10
+ python -m nhp.model -d data --model-run 1 -t ip
11
+
12
+ will run a single run of the inpatients model, returning the results to display.
13
+ """
14
+
15
+ import argparse
16
+ import logging
17
+
18
+ from nhp.model.aae import AaEModel
19
+ from nhp.model.data import Local
20
+ from nhp.model.inpatients import InpatientsModel
21
+ from nhp.model.outpatients import OutpatientsModel
22
+ from nhp.model.params import load_params, load_sample_params
23
+ from nhp.model.results import save_results_files
24
+ from nhp.model.run import run_all, run_single_model_run
25
+
26
+
27
+ def _parse_args() -> argparse.Namespace: # pragma: no cover
28
+ parser = argparse.ArgumentParser()
29
+ parser.add_argument(
30
+ "params_file",
31
+ nargs="?",
32
+ default="",
33
+ help="Path to the params.json file (leave empty to use sample parameters).",
34
+ )
35
+ parser.add_argument("-d", "--data-path", help="Path to the data", default="data")
36
+ parser.add_argument(
37
+ "-r", "--model-run", help="Which model iteration to run", default=1, type=int
38
+ )
39
+ parser.add_argument(
40
+ "-t",
41
+ "--type",
42
+ default="all",
43
+ choices=["all", "aae", "ip", "op"],
44
+ help="Model type, either: all, ip, op, aae",
45
+ type=str,
46
+ )
47
+ parser.add_argument("--save-full-model-results", action="store_true")
48
+ return parser.parse_args()
49
+
50
+
51
+ def main() -> None:
52
+ """Main method.
53
+
54
+ Runs when __name__ == "__main__"
55
+ """
56
+ # Grab the Arguments
57
+ args = _parse_args()
58
+ if args.params_file == "":
59
+ params = load_sample_params()
60
+ else:
61
+ params = load_params(args.params_file)
62
+ # define the model to run
63
+ match args.type:
64
+ case "all":
65
+ logging.basicConfig(
66
+ format="%(asctime)s.%(msecs)03d %(levelname)-8s %(message)s",
67
+ level=logging.INFO,
68
+ datefmt="%Y-%m-%d %H:%M:%S",
69
+ )
70
+
71
+ results, variants = run_all(
72
+ params,
73
+ Local.create(args.data_path),
74
+ lambda _: lambda _: None,
75
+ args.save_full_model_results,
76
+ )
77
+ save_results_files(results, params, variants)
78
+ return
79
+ case "aae":
80
+ model_type = AaEModel
81
+ case "ip":
82
+ model_type = InpatientsModel
83
+ case "op":
84
+ model_type = OutpatientsModel
85
+ case _:
86
+ raise ValueError(f"Unknown model type: {args.type}")
87
+
88
+ run_single_model_run(params, args.data_path, model_type, args.model_run)
89
+
90
+
91
+ def init():
92
+ """Method for calling main."""
93
+ if __name__ == "__main__":
94
+ main()
95
+
96
+
97
+ init()
nhp/model/_version.py ADDED
@@ -0,0 +1,24 @@
1
+ # file generated by vcs-versioning
2
+ # don't change, don't track in version control
3
+ from __future__ import annotations
4
+
5
+ __all__ = [
6
+ "__version__",
7
+ "__version_tuple__",
8
+ "version",
9
+ "version_tuple",
10
+ "__commit_id__",
11
+ "commit_id",
12
+ ]
13
+
14
+ version: str
15
+ __version__: str
16
+ __version_tuple__: tuple[int | str, ...]
17
+ version_tuple: tuple[int | str, ...]
18
+ commit_id: str | None
19
+ __commit_id__: str | None
20
+
21
+ __version__ = version = '5.0.0'
22
+ __version_tuple__ = version_tuple = (5, 0, 0)
23
+
24
+ __commit_id__ = commit_id = None