chap-core 0.0.8__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. chap_core/__init__.py +8 -0
  2. chap_core/_legacy/__init__.py +0 -0
  3. chap_core/_legacy/file_io.py +50 -0
  4. chap_core/_legacy_dataset.py +101 -0
  5. chap_core/adaptors/__init__.py +0 -0
  6. chap_core/adaptors/gluonts.py +50 -0
  7. chap_core/alarms.py +18 -0
  8. chap_core/api.py +264 -0
  9. chap_core/api_types.py +55 -0
  10. chap_core/assessment/__init__.py +0 -0
  11. chap_core/assessment/dataset_splitting.py +141 -0
  12. chap_core/assessment/forecast.py +83 -0
  13. chap_core/assessment/multi_location_evaluator.py +124 -0
  14. chap_core/assessment/prediction_evaluator.py +353 -0
  15. chap_core/chap_cli.py +133 -0
  16. chap_core/cli.py +277 -0
  17. chap_core/climate_data/__init__.py +14 -0
  18. chap_core/climate_data/external.py +4 -0
  19. chap_core/climate_data/gee_legacy.py +97 -0
  20. chap_core/climate_data/gridded_data.py +45 -0
  21. chap_core/climate_data/meteostat_wrapper.py +242 -0
  22. chap_core/climate_data/seasonal_forecasts.py +42 -0
  23. chap_core/climate_health.py +1 -0
  24. chap_core/climate_predictor.py +84 -0
  25. chap_core/data/__init__.py +4 -0
  26. chap_core/data/adaptors.py +3 -0
  27. chap_core/data/datasets.py +7 -0
  28. chap_core/data/gluonts_adaptor/__init__.py +0 -0
  29. chap_core/data/gluonts_adaptor/dataset.py +147 -0
  30. chap_core/data/gluonts_adaptor/model.py +15 -0
  31. chap_core/data_wrangling/__init__.py +0 -0
  32. chap_core/data_wrangling/flows.py +132 -0
  33. chap_core/data_wrangling/tasks.py +35 -0
  34. chap_core/database/__init__.py +0 -0
  35. chap_core/database/database.py +42 -0
  36. chap_core/database/local_db_cache.py +49 -0
  37. chap_core/dataset_protocols.py +73 -0
  38. chap_core/datatypes.py +386 -0
  39. chap_core/dhis2_interface/ChapProgram.py +126 -0
  40. chap_core/dhis2_interface/__init__.py +0 -0
  41. chap_core/dhis2_interface/json_parsing.py +120 -0
  42. chap_core/dhis2_interface/periods.py +14 -0
  43. chap_core/dhis2_interface/pydantic_to_spatiotemporal.py +23 -0
  44. chap_core/dhis2_interface/src/Config.py +15 -0
  45. chap_core/dhis2_interface/src/HttpRequest.py +11 -0
  46. chap_core/dhis2_interface/src/PullAnalytics.py +36 -0
  47. chap_core/dhis2_interface/src/PullClimateData.py +2 -0
  48. chap_core/dhis2_interface/src/PushResult.py +49 -0
  49. chap_core/dhis2_interface/src/__init__.py +0 -0
  50. chap_core/dhis2_interface/src/create_data_element_if_not_exists.py +79 -0
  51. chap_core/dhis2_interface/src/dhis_json_parser.py +0 -0
  52. chap_core/docker_helper_functions.py +57 -0
  53. chap_core/external/__init__.py +0 -0
  54. chap_core/external/external_model.py +497 -0
  55. chap_core/external/mlflow.py +259 -0
  56. chap_core/external/python_model.py +40 -0
  57. chap_core/external/r_description.py +32 -0
  58. chap_core/external/r_model.py +60 -0
  59. chap_core/external/r_models.py +16 -0
  60. chap_core/external/spes.py +2 -0
  61. chap_core/fetch/__init__.py +4 -0
  62. chap_core/file_io/__init__.py +1 -0
  63. chap_core/file_io/cleaners.py +62 -0
  64. chap_core/file_io/example_data_set.py +59 -0
  65. chap_core/file_io/external_file.py +17 -0
  66. chap_core/file_io/file_paths.py +13 -0
  67. chap_core/file_io/load.py +6 -0
  68. chap_core/geo_coding/__init__.py +0 -0
  69. chap_core/geo_coding/location_lookup.py +111 -0
  70. chap_core/geojson.py +59 -0
  71. chap_core/geometry.py +148 -0
  72. chap_core/google_earth_engine/__init__.py +0 -0
  73. chap_core/google_earth_engine/_refactor_gee_era5.py +140 -0
  74. chap_core/google_earth_engine/gee_era5.py +246 -0
  75. chap_core/google_earth_engine/gee_raw.py +131 -0
  76. chap_core/internal_state.py +49 -0
  77. chap_core/main.py +85 -0
  78. chap_core/model_spec.py +97 -0
  79. chap_core/omnipy_lib.py +29 -0
  80. chap_core/pandas_adaptors.py +12 -0
  81. chap_core/plotting/__init__.py +1 -0
  82. chap_core/plotting/plotting.py +70 -0
  83. chap_core/plotting/prediction_plot.py +179 -0
  84. chap_core/predictor/__init__.py +29 -0
  85. chap_core/predictor/feature_spec.py +30 -0
  86. chap_core/predictor/naive_estimator.py +50 -0
  87. chap_core/predictor/naive_predictor.py +131 -0
  88. chap_core/predictor/poisson.py +24 -0
  89. chap_core/predictor/protocol.py +50 -0
  90. chap_core/reports/__init__.py +106 -0
  91. chap_core/rest_api.py +169 -0
  92. chap_core/rest_api_src/__init__.py +0 -0
  93. chap_core/rest_api_src/_legacy.py +57 -0
  94. chap_core/rest_api_src/data_models.py +27 -0
  95. chap_core/rest_api_src/generate_rest_api.py +57 -0
  96. chap_core/rest_api_src/worker_functions.py +139 -0
  97. chap_core/runners/__init__.py +0 -0
  98. chap_core/runners/command_line_runner.py +44 -0
  99. chap_core/runners/conda_runner.py +17 -0
  100. chap_core/runners/docker_runner.py +48 -0
  101. chap_core/runners/runner.py +20 -0
  102. chap_core/services/__init__.py +0 -0
  103. chap_core/services/cache_manager.py +18 -0
  104. chap_core/simulation/__init__.py +0 -0
  105. chap_core/simulation/random_noise_simulator.py +22 -0
  106. chap_core/simulation/seasonal_simulator.py +76 -0
  107. chap_core/simulation/simulator.py +23 -0
  108. chap_core/spatio_temporal_data/__init__.py +0 -0
  109. chap_core/spatio_temporal_data/multi_country_dataset.py +88 -0
  110. chap_core/spatio_temporal_data/omnipy_spatio_temporal_dataset.py +82 -0
  111. chap_core/spatio_temporal_data/temporal_dataclass.py +400 -0
  112. chap_core/testing/__init__.py +0 -0
  113. chap_core/testing/estimators.py +18 -0
  114. chap_core/testing/external_model.py +14 -0
  115. chap_core/time_period/__init__.py +15 -0
  116. chap_core/time_period/_legacy_implementation.py +90 -0
  117. chap_core/time_period/dataclasses.py +78 -0
  118. chap_core/time_period/date_util_wrapper.py +706 -0
  119. chap_core/time_period/delta.py +10 -0
  120. chap_core/time_period/multi_resolution.py +15 -0
  121. chap_core/time_period/pandas_wrappers.py +9 -0
  122. chap_core/time_period/period_range.py +65 -0
  123. chap_core/time_period/protocols.py +42 -0
  124. chap_core/time_period/relationships.py +12 -0
  125. chap_core/training_control.py +48 -0
  126. chap_core/transformations/__init__.py +0 -0
  127. chap_core/transformations/covid_mask.py +26 -0
  128. chap_core/util.py +39 -0
  129. chap_core/worker/__init__.py +0 -0
  130. chap_core/worker/background_tasks_worker.py +73 -0
  131. chap_core/worker/interface.py +25 -0
  132. chap_core/worker/rq_worker.py +76 -0
  133. chap_core-0.0.8.dist-info/LICENSE +22 -0
  134. chap_core-0.0.8.dist-info/METADATA +54 -0
  135. chap_core-0.0.8.dist-info/RECORD +138 -0
  136. chap_core-0.0.8.dist-info/WHEEL +6 -0
  137. chap_core-0.0.8.dist-info/entry_points.txt +3 -0
  138. chap_core-0.0.8.dist-info/top_level.txt +1 -0
chap_core/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """Top-level package for chap-core."""
2
+
3
+ __author__ = """Sandvelab"""
4
+ __email__ = "knutdrand@gmail.com"
5
+ __version__ = "0.0.8"
6
+
7
+ from . import fetch
8
+ from . import data
File without changes
@@ -0,0 +1,50 @@
1
+ from typing import List
2
+
3
+ import chap_core.time_period.dataclasses as dc
4
+ from chap_core.time_period import TimePeriod, Month, Day, Year
5
+
6
+
7
+ def parse_period_string(time_string: str) -> TimePeriod:
8
+ period = TimePeriod.parse(time_string)
9
+ return period
10
+
11
+
12
+ def write_time_series_data(data):
13
+ def topandas(self):
14
+ data = pd.DataFrame(
15
+ {
16
+ "time_period": self.time_period.topandas(),
17
+ "rainfall": self.rainfall,
18
+ "mean_temperature": self.mean_temperature,
19
+ "disease_cases": self.disease_cases,
20
+ }
21
+ )
22
+ return data
23
+
24
+ to_pandas = topandas
25
+
26
+ def to_csv(self, csv_file: str, **kwargs):
27
+ """Write data to a csv file."""
28
+ data = self.to_pandas()
29
+ data.to_csv(csv_file, index=False, **kwargs)
30
+
31
+
32
+ def parse_periods_strings(time_strings: List[str]) -> dc.Period:
33
+ periods = [parse_period_string(time_string) for time_string in time_strings]
34
+ if not periods:
35
+ return dc.Period.empty()
36
+ t = type(periods[0])
37
+ assert all(type(period) == t for period in periods), periods
38
+
39
+ if t == Year:
40
+ return dc.Year([period.year for period in periods])
41
+ if t == Month:
42
+ return dc.Month(
43
+ [period.year for period in periods], [period.month for period in periods]
44
+ )
45
+ elif t == Day:
46
+ return dc.Day(
47
+ [period.year for period in periods],
48
+ [period.month for period in periods],
49
+ [period.day for period in periods],
50
+ )
@@ -0,0 +1,101 @@
1
+ from typing import Protocol, TypeAlias, Union, Iterable, TypeVar, Tuple
2
+
3
+ import pandas as pd
4
+ from pydantic import BaseModel
5
+
6
+ from chap_core.datatypes import Location
7
+ from chap_core.time_period.dataclasses import Period
8
+
9
+ SpatialIndexType: TypeAlias = Union[str, Location]
10
+ TemporalIndexType: TypeAlias = Union[Period, Iterable[Period], slice]
11
+
12
+
13
+ FeaturesT = TypeVar("FeaturesT")
14
+
15
+
16
+ class ClimateData(BaseModel):
17
+ temperature: float
18
+ rainfall: float
19
+ humidity: float
20
+
21
+
22
+ class DataType(BaseModel):
23
+ disease_cases: int
24
+ climate_data: ClimateData
25
+
26
+
27
+ class IsTemporalDataSet(Protocol[FeaturesT]):
28
+ def restrict_time_period(
29
+ self, start_period: Period = None, end_period: Period = None
30
+ ) -> "IsTemporalDataSet[FeaturesT]": ...
31
+
32
+ def to_tidy_dataframe(self) -> pd.DataFrame: ...
33
+
34
+ @classmethod
35
+ def from_tidy_dataframe(cls, df: pd.DataFrame) -> "IsSpatialDataSet[FeaturesT]": ...
36
+
37
+
38
+ class TemporalArray:
39
+ def __init__(self, time_index, data):
40
+ self._time_index = time_index
41
+ self._data = data
42
+
43
+ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
44
+ return ufunc
45
+
46
+
47
+ class IsSpatialDataSet(Protocol[FeaturesT]):
48
+ def get_locations(
49
+ self, location: Iterable[SpatialIndexType]
50
+ ) -> "IsSpatialDataSet[FeaturesT]": ...
51
+
52
+ def get_location(self, location: SpatialIndexType) -> FeaturesT: ...
53
+
54
+ def locations(self) -> Iterable[SpatialIndexType]: ...
55
+
56
+ def data(self) -> Iterable[FeaturesT]: ...
57
+
58
+ def location_items(self) -> Iterable[Tuple[SpatialIndexType, FeaturesT]]: ...
59
+
60
+ def to_tidy_dataframe(self) -> pd.DataFrame: ...
61
+
62
+ @classmethod
63
+ def from_tidy_dataframe(cls, df: pd.DataFrame) -> "IsSpatialDataSet[FeaturesT]": ...
64
+
65
+
66
+ class IsSpatioTemporalDataSet(Protocol[FeaturesT]):
67
+ dataclass = ...
68
+
69
+ def get_data_for_locations(
70
+ self, location: Iterable[SpatialIndexType]
71
+ ) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
72
+
73
+ def get_data_for_location(self, location: SpatialIndexType) -> FeaturesT: ...
74
+
75
+ def restrict_time_period(
76
+ self, start_period: Period = None, end_period: Period = None
77
+ ) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
78
+
79
+ def start_time(self) -> Period: ...
80
+
81
+ def end_time(self) -> Period: ...
82
+
83
+ def locations(self) -> Iterable[SpatialIndexType]: ...
84
+
85
+ def data(self) -> Iterable[FeaturesT]: ...
86
+
87
+ def location_items(
88
+ self,
89
+ ) -> Iterable[Tuple[SpatialIndexType, IsTemporalDataSet[FeaturesT]]]: ...
90
+
91
+ def to_tidy_dataframe(self) -> pd.DataFrame: ...
92
+
93
+ @classmethod
94
+ def from_tidy_dataframe(
95
+ cls, df: pd.DataFrame
96
+ ) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
97
+
98
+ def to_csv(self, file_name: str): ...
99
+
100
+ @classmethod
101
+ def from_csv(self, file_name: str) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
File without changes
@@ -0,0 +1,50 @@
1
+ from gluonts.model.estimator import Estimator
2
+ from gluonts.dataset.common import ListDataset
3
+ from gluonts.model.predictor import Predictor
4
+ from ..data import DataSet
5
+ from ..data.gluonts_adaptor.dataset import DataSetAdaptor
6
+ from ..datatypes import Samples
7
+ from ..time_period import PeriodRange
8
+ from pathlib import Path
9
+ from dataclasses import dataclass
10
+
11
+
12
+ @dataclass
13
+ class GluonTSPredictor:
14
+ gluonts_predictor: Predictor
15
+
16
+ def predict(
17
+ self, history: DataSet, future_data: DataSet, num_samples=100
18
+ ) -> DataSet:
19
+ gluonts_dataset = DataSetAdaptor.to_gluonts_testinstances(
20
+ history, future_data, self.gluonts_predictor.prediction_length
21
+ )
22
+ forecasts = self.gluonts_predictor.predict(
23
+ gluonts_dataset, num_samples=num_samples
24
+ )
25
+ data = {
26
+ location: Samples(
27
+ PeriodRange.from_pandas(forecast.index), forecast.samples.T
28
+ )
29
+ for location, forecast in zip(history.keys(), forecasts)
30
+ }
31
+ return DataSet(data)
32
+
33
+ def save(self, filename: str):
34
+ filepath = Path(filename)
35
+ filepath.mkdir(exist_ok=True, parents=True)
36
+ self.gluonts_predictor.serialize(filepath)
37
+
38
+ @classmethod
39
+ def load(cls, filename: str):
40
+ return GluonTSPredictor(Predictor.deserialize(Path(filename)))
41
+
42
+
43
+ @dataclass
44
+ class GluonTSEstimator:
45
+ gluont_ts_estimator: Estimator
46
+
47
+ def train(self, dataset: DataSet) -> GluonTSPredictor:
48
+ gluonts_dataset = DataSetAdaptor.to_gluonts(dataset)
49
+ ds = ListDataset(gluonts_dataset, freq="m")
50
+ return GluonTSPredictor(self.gluont_ts_estimator.train(ds))
chap_core/alarms.py ADDED
@@ -0,0 +1,18 @@
1
+ from typing import Iterable
2
+
3
+ import numpy as np
4
+ from pydantic import BaseModel
5
+
6
+ # Epidemioglical week: different from calendar week
7
+ # https://www.cdc.gov/flu/weekly/overview.htm
8
+
9
+
10
+ class OutbreakParameters(BaseModel):
11
+ endemic_factor: float
12
+ probability_threshold: float
13
+
14
+
15
+ def outbreak_prediction(
16
+ parameters: OutbreakParameters, case_samples: Iterable[float]
17
+ ) -> bool:
18
+ return np.mean()
chap_core/api.py ADDED
@@ -0,0 +1,264 @@
1
+ import logging
2
+ import json
3
+
4
+ from .assessment.forecast import forecast as do_forecast
5
+ import zipfile
6
+ from pathlib import Path
7
+ from typing import Optional, List
8
+
9
+ import numpy as np
10
+
11
+ from .assessment.dataset_splitting import train_test_split_with_weather
12
+ from .datatypes import (
13
+ HealthData,
14
+ ClimateData,
15
+ HealthPopulationData,
16
+ SimpleClimateData,
17
+ FullData,
18
+ )
19
+ from .dhis2_interface.json_parsing import (
20
+ predictions_to_datavalue,
21
+ parse_disease_data,
22
+ json_to_pandas,
23
+ parse_population_data,
24
+ )
25
+ from .external.external_model import get_model_from_directory_or_github_url
26
+ from .file_io.example_data_set import DataSetType, datasets
27
+ from .geojson import geojson_to_graph, NeighbourGraph
28
+ from .plotting.prediction_plot import plot_forecast_from_summaries
29
+ from .predictor import get_model
30
+ from .spatio_temporal_data.temporal_dataclass import DataSet
31
+ import dataclasses
32
+
33
+ from .time_period.date_util_wrapper import delta_month, Month
34
+
35
+ from .transformations.covid_mask import mask_covid_data
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class DummyControl:
41
+ def set_status(self, status):
42
+ pass
43
+
44
+ @property
45
+ def current_control(self):
46
+ return None
47
+
48
+
49
+ @dataclasses.dataclass
50
+ class AreaPolygons:
51
+ shape_file: str
52
+
53
+
54
+ @dataclasses.dataclass
55
+ class PredictionData:
56
+ area_polygons: AreaPolygons = None
57
+ health_data: DataSet[HealthData] = None
58
+ climate_data: DataSet[ClimateData] = None
59
+ population_data: DataSet[HealthPopulationData] = None
60
+ disease_id: Optional[str] = None
61
+ features: List[object] = None
62
+
63
+
64
+ def extract_disease_name(health_data: dict) -> str:
65
+ return health_data["rows"][0][0]
66
+
67
+
68
+ def read_zip_folder(zip_file_path: str) -> PredictionData:
69
+ # read zipfile, create PredictionData
70
+ print(zip_file_path)
71
+ ziparchive = zipfile.ZipFile(zip_file_path)
72
+ expected_files = {
73
+ "area_polygons": "orgUnits.geojson",
74
+ "disease": "disease.json",
75
+ "population": "population.json",
76
+ "temperature": "temperature.json",
77
+ "precipitation": "precipitation.json",
78
+ }
79
+ json_data = json.load(ziparchive.open(expected_files["disease"]))
80
+ name_mapping = {"time_period": 2, "disease_cases": 3, "location": 1}
81
+ disease = parse_disease_data(json_data, name_mapping=name_mapping)
82
+ disease_id = extract_disease_name(json_data)
83
+ temperature_json = json.load(ziparchive.open(expected_files["temperature"]))
84
+ name_mapping = {"time_period": 2, "mean_temperature": 3, "location": 1}
85
+ temperature = json_to_pandas(temperature_json, name_mapping)
86
+
87
+ precipitation_json = json.load(ziparchive.open(expected_files["temperature"]))
88
+ name_mapping = {"time_period": 2, "precipitation": 3, "location": 1}
89
+ precipitation = json_to_pandas(precipitation_json, name_mapping)
90
+
91
+ assert np.all(precipitation.time_period == temperature.time_period)
92
+ assert np.all(precipitation.location == temperature.location)
93
+
94
+ temperature["rainfall"] = precipitation["precipitation"]
95
+ temperature["rainfall"] = temperature["rainfall"].astype(float)
96
+ temperature["mean_temperature"] = temperature["mean_temperature"].astype(float)
97
+
98
+ features = json.load(ziparchive.open(expected_files["area_polygons"]))["features"]
99
+ climate = DataSet.from_pandas(temperature, dataclass=SimpleClimateData)
100
+
101
+ population_json = json.load(ziparchive.open(expected_files["population"]))
102
+ population = parse_population_data(population_json)
103
+ graph_file_name = ""
104
+ graph = NeighbourGraph.from_geojson_file(
105
+ ziparchive.open(expected_files["area_polygons"])
106
+ )
107
+ print(graph)
108
+ if False:
109
+ graph_file_name = Path(zip_file_path).with_suffix(".graph")
110
+ area_polygons_file = ziparchive.open(expected_files["area_polygons"])
111
+ geojson_to_graph(area_polygons_file, graph_file_name)
112
+ # geojson_to_shape(area_polygons_file, shape_file_name)
113
+
114
+ # geojson_to_shape(str(zip_file_path) + "!area_polygons", shape_file_name)
115
+
116
+ return PredictionData(
117
+ health_data=disease,
118
+ climate_data=climate,
119
+ population_data=population,
120
+ area_polygons=graph,
121
+ disease_id=disease_id,
122
+ features=features,
123
+ )
124
+
125
+ out_data = {}
126
+
127
+
128
+ # ...
129
+
130
+
131
+ def dhis_zip_flow(
132
+ zip_file_path: str,
133
+ out_json: Optional[str] = None,
134
+ model_name=None,
135
+ n_months=4,
136
+ docker_filename: Optional[str] = None,
137
+ ) -> List[dict] | None:
138
+ data: PredictionData = read_zip_folder(zip_file_path)
139
+ json_body = train_on_prediction_data(data, model_name, n_months, docker_filename)
140
+ if out_json is not None:
141
+ with open(out_json, "w") as f:
142
+ json.dump(json_body, f)
143
+ return None
144
+ else:
145
+ return json_body
146
+
147
+
148
+ def train_on_prediction_data(
149
+ data,
150
+ model_name=None,
151
+ n_months=4,
152
+ docker_filename=None,
153
+ model_path=None,
154
+ control=None,
155
+ ):
156
+ if control is None:
157
+ control = DummyControl()
158
+ control.set_status("Preprocessing")
159
+ if model_name == "external":
160
+ model = get_model_from_directory_or_github_url(model_path)
161
+ else:
162
+ model = get_model(model_name)()
163
+ start_timestamp = min(
164
+ data.health_data.start_timestamp, data.climate_data.start_timestamp
165
+ )
166
+ end_timestamp = max(data.health_data.end_timestamp, data.climate_data.end_timestamp)
167
+ new_dict = {}
168
+ for location in data.health_data.locations():
169
+ health = data.health_data.get_location(location).fill_to_range(
170
+ start_timestamp, end_timestamp
171
+ )
172
+ climate = data.climate_data.get_location(location).fill_to_range(
173
+ start_timestamp, end_timestamp
174
+ )
175
+ assert (
176
+ location in data.population_data
177
+ ), f"Location {location} not in population data: {data.population_data.keys()}"
178
+ population = data.population_data[location]
179
+ new_dict[location] = FullData.combine(health.data(), climate.data(), population)
180
+
181
+ climate_health_data = DataSet(new_dict)
182
+ prediction_start = Month(climate_health_data.end_timestamp) - n_months * delta_month
183
+ train_data, _, future_weather = train_test_split_with_weather(
184
+ climate_health_data, prediction_start
185
+ )
186
+ logger.info(f"Training model {model_name} on {len(train_data.items())} locations")
187
+ control.set_status("Training")
188
+ if hasattr(model, "set_training_control"):
189
+ model.set_training_control(control.current_control)
190
+ if hasattr(model, "set_graph"):
191
+ model.set_graph(data.area_polygons)
192
+
193
+ model.train(train_data) # , extra_args=data.area_polygons)
194
+ logger.info(
195
+ f"Forecasting using {model_name} on {len(train_data.items())} locations"
196
+ )
197
+ control.set_status("Forecasting")
198
+ predictions = model.forecast(future_weather, forecast_delta=n_months * delta_month)
199
+ attrs = ["median", "quantile_high", "quantile_low"]
200
+ logger.info("Converting predictions to json")
201
+ control.set_status("Postprocessing")
202
+ data_values = predictions_to_datavalue(
203
+ predictions, attribute_mapping=dict(zip(attrs, attrs))
204
+ )
205
+ json_body = [dataclasses.asdict(element) for element in data_values]
206
+ diseaseId = data.disease_id
207
+ return {"diseaseId": diseaseId, "dataValues": json_body}
208
+ # return json_body
209
+
210
+
211
+ def train_with_validation(model_name, dataset_name, n_months=12):
212
+ dataset = datasets[dataset_name].load()
213
+ # assert not np.any(np.any(np.isnan(data.to_array()[:, 1:])) for data in dataset.values()), "Dataset contains NaN values"
214
+ # assert not any(np.any(np.isnan(data.mean_temperature) | np.isnan(data.rainfall)) for data in dataset.values()), "Dataset contains NaN values"
215
+ dataset = mask_covid_data(dataset)
216
+ model = get_model(model_name)(n_iter=32000)
217
+ # split_point = dataset.end_timestamp - n_months * delta_month
218
+ # train_data, test_data, future_weather = train_test_split_with_weather(dataset, split_point)
219
+ prediction_length = n_months * delta_month
220
+ split_point = dataset.end_timestamp - prediction_length
221
+ split_period = Month(split_point.year, split_point.month)
222
+ train_data, test_set, future_weather = train_test_split_with_weather(
223
+ dataset, split_period
224
+ )
225
+ model.set_validation_data(test_set)
226
+ model.train(train_data)
227
+ predictions = model.forecast(
228
+ future_weather, forecast_delta=n_months * delta_month, n_samples=100
229
+ )
230
+ # plot predictions
231
+ figs = []
232
+ for location, prediction in predictions.items():
233
+ fig = plot_forecast_from_summaries(
234
+ prediction.data(), dataset.get_location(location).data()
235
+ ) # , lambda x: np.log(x+1))
236
+ figs.append(fig)
237
+ return figs
238
+
239
+
240
+ def forecast(
241
+ model_name: str,
242
+ dataset_name: DataSetType,
243
+ n_months: int,
244
+ model_path: Optional[str] = None,
245
+ ):
246
+ logging.basicConfig(level=logging.INFO)
247
+ dataset = datasets[dataset_name].load()
248
+
249
+ if model_name == "external":
250
+ model = get_model_from_directory_or_github_url(model_path)
251
+ else:
252
+ model = get_model(model_name)
253
+ model = model()
254
+
255
+ # model = get_model(model_name)()
256
+ predictions = do_forecast(model, dataset, n_months * delta_month)
257
+
258
+ figs = []
259
+ for location, prediction in predictions.items():
260
+ fig = plot_forecast_from_summaries(
261
+ prediction.data(), dataset.get_location(location).data()
262
+ ) # , lambda x: np.log(x+1))
263
+ figs.append(fig)
264
+ return figs
chap_core/api_types.py ADDED
@@ -0,0 +1,55 @@
1
+ from pydantic import BaseModel
2
+ from pydantic_geojson import (
3
+ FeatureCollectionModel as _FeatureCollectionModel,
4
+ FeatureModel as _FeatureModel,
5
+ )
6
+
7
+
8
+ class FeatureModel(_FeatureModel):
9
+ id: str
10
+
11
+
12
+ class FeatureCollectionModel(_FeatureCollectionModel):
13
+ features: list[FeatureModel]
14
+
15
+
16
+ class DataElement(BaseModel):
17
+ pe: str
18
+ ou: str
19
+ value: float
20
+
21
+
22
+ class DataList(BaseModel):
23
+ featureId: str
24
+ dhis2Id: str
25
+ data: list[DataElement]
26
+
27
+
28
+ class RequestV1(BaseModel):
29
+ orgUnitsGeoJson: FeatureCollectionModel
30
+ features: list[DataList]
31
+
32
+ class RequestV2(RequestV1):
33
+ model_id: str = 'chap_ewars'
34
+
35
+ class PredictionRequest(RequestV2):
36
+ n_periods: int = 3
37
+
38
+
39
+ class PeriodObservation(BaseModel):
40
+ time_period: str
41
+
42
+
43
+ # class Geometry:
44
+ # type: str
45
+ # coordinates: list[list[float]]
46
+ #
47
+ #
48
+ # class GeoJSONObject(BaseModel):
49
+ # id: str
50
+ # geometry: dict
51
+ #
52
+ #
53
+ # class GeoJSON(BaseModel):
54
+ # type: str
55
+ # features: list[dict]
File without changes
@@ -0,0 +1,141 @@
1
+ from typing import Iterable, Tuple, Protocol, Optional, Type
2
+
3
+ from chap_core._legacy_dataset import IsSpatioTemporalDataSet
4
+ from chap_core.climate_predictor import FutureWeatherFetcher
5
+ from chap_core.datatypes import ClimateHealthData, ClimateData
6
+ from chap_core.spatio_temporal_data.temporal_dataclass import DataSet
7
+ from chap_core.time_period import Month, TimePeriod
8
+ from chap_core.time_period.relationships import previous
9
+
10
+
11
+ def split_period_on_resolution(param, param1, resolution) -> Iterable[Month]:
12
+ pass
13
+
14
+
15
+ def extend_to(period, future_length):
16
+ pass
17
+
18
+
19
+ class IsTimeDelta(Protocol):
20
+ pass
21
+
22
+
23
+ def split_test_train_on_period(
24
+ data_set: IsSpatioTemporalDataSet,
25
+ split_points: Iterable[TimePeriod],
26
+ future_length: Optional[IsTimeDelta] = None,
27
+ include_future_weather: bool = False,
28
+ future_weather_class: Type[ClimateData] = ClimateData,
29
+ ):
30
+ func = train_test_split_with_weather if include_future_weather else train_test_split
31
+
32
+ if include_future_weather:
33
+ return (
34
+ train_test_split_with_weather(
35
+ data_set, period, future_length, future_weather_class
36
+ )
37
+ for period in split_points
38
+ )
39
+ return (func(data_set, period, future_length) for period in split_points)
40
+
41
+
42
+ def split_train_test_with_future_weather(
43
+ data_set: IsSpatioTemporalDataSet,
44
+ split_points: Iterable[TimePeriod],
45
+ future_length: Optional[IsTimeDelta] = None,
46
+ ):
47
+ return (
48
+ train_test_split(data_set, period, future_length) for period in split_points
49
+ )
50
+
51
+
52
+ # Should we index on split-timestamp, first time period, or complete time?
53
+ def train_test_split(
54
+ data_set: IsSpatioTemporalDataSet,
55
+ prediction_start_period: TimePeriod,
56
+ extension: Optional[IsTimeDelta] = None,
57
+ restrict_test=True,
58
+ ):
59
+ last_train_period = previous(prediction_start_period)
60
+ train_data = data_set.restrict_time_period(slice(None, last_train_period))
61
+ if extension is not None:
62
+ end_period = prediction_start_period.extend_to(extension)
63
+ else:
64
+ end_period = None
65
+ if restrict_test:
66
+ test_data = data_set.restrict_time_period(
67
+ slice(prediction_start_period, end_period)
68
+ )
69
+ else:
70
+ test_data = data_set
71
+ return train_data, test_data
72
+
73
+
74
+ def train_test_generator(
75
+ dataset: DataSet, prediction_length: int, n_test_sets: int = 1, future_weather_provider: Optional[FutureWeatherFetcher] = None
76
+ ) -> tuple[DataSet, Iterable[tuple[DataSet, DataSet]]]:
77
+ """
78
+ Genereate a train set along with an iterator of test data that contains tuples of full data up until a
79
+ split point and data without target variables for the remaining steps
80
+ """
81
+ split_idx = -(prediction_length + n_test_sets)
82
+ train_set = dataset.restrict_time_period(
83
+ slice(None, dataset.period_range[split_idx])
84
+ )
85
+ historic_data = [
86
+ dataset.restrict_time_period(slice(None, dataset.period_range[split_idx + i]))
87
+ for i in range(n_test_sets)
88
+ ]
89
+ future_data = [
90
+ dataset.restrict_time_period(
91
+ slice(
92
+ dataset.period_range[split_idx + i + 1],
93
+ dataset.period_range[split_idx + i + prediction_length],
94
+ )
95
+ )
96
+ for i in range(n_test_sets)
97
+ ]
98
+ if future_weather_provider is not None:
99
+ masked_future_data = [future_weather_provider(hd).get_future_weather(fd.period_range) for (hd, fd) in zip(historic_data, future_data)]
100
+ else:
101
+ masked_future_data = (dataset.remove_field("disease_cases") for dataset in future_data)
102
+ return train_set, zip(historic_data, masked_future_data, future_data)
103
+
104
+
105
+ def train_test_split_with_weather(
106
+ data_set: DataSet,
107
+ prediction_start_period: TimePeriod,
108
+ extension: Optional[IsTimeDelta] = None,
109
+ future_weather_class: Type[ClimateData] = ClimateData,
110
+ ):
111
+ train_set, test_set = train_test_split(data_set, prediction_start_period, extension)
112
+ tmp_values: Iterable[Tuple[str, ClimateHealthData]] = (
113
+ (loc, temporal_data.data()) for loc, temporal_data in test_set.items()
114
+ )
115
+ future_weather = test_set.remove_field("disease_cases") # SpatioTemporalDict(
116
+ train_periods = {
117
+ str(period) for data in train_set.data() for period in data.data().time_period
118
+ }
119
+ future_periods = {
120
+ str(period)
121
+ for data in future_weather.data()
122
+ for period in data.data().time_period
123
+ }
124
+ assert (
125
+ train_periods & future_periods == set()
126
+ ), f"Train and future weather data overlap: {train_periods & future_periods}"
127
+ return train_set, test_set, future_weather
128
+
129
+
130
+ def get_split_points_for_data_set(
131
+ data_set: IsSpatioTemporalDataSet, max_splits: int, start_offset=1
132
+ ) -> list[TimePeriod]:
133
+ periods = (
134
+ next(iter(data_set.data())).data().time_period
135
+ ) # Uses the time for the first location, assumes it to be the same for all!
136
+ return get_split_points_for_period_range(max_splits, periods, start_offset)
137
+
138
+
139
+ def get_split_points_for_period_range(max_splits, periods, start_offset):
140
+ delta = (len(periods) - 1 - start_offset) // (max_splits + 1)
141
+ return list(periods)[start_offset + delta :: delta][:max_splits]