PyPI - chap-core - Versions diffs - 0.0.8__py2.py3-none-any.whl - Mend

chap-core 0.0.8__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (138) hide show

chap_core/__init__.py +8 -0
chap_core/_legacy/__init__.py +0 -0
chap_core/_legacy/file_io.py +50 -0
chap_core/_legacy_dataset.py +101 -0
chap_core/adaptors/__init__.py +0 -0
chap_core/adaptors/gluonts.py +50 -0
chap_core/alarms.py +18 -0
chap_core/api.py +264 -0
chap_core/api_types.py +55 -0
chap_core/assessment/__init__.py +0 -0
chap_core/assessment/dataset_splitting.py +141 -0
chap_core/assessment/forecast.py +83 -0
chap_core/assessment/multi_location_evaluator.py +124 -0
chap_core/assessment/prediction_evaluator.py +353 -0
chap_core/chap_cli.py +133 -0
chap_core/cli.py +277 -0
chap_core/climate_data/__init__.py +14 -0
chap_core/climate_data/external.py +4 -0
chap_core/climate_data/gee_legacy.py +97 -0
chap_core/climate_data/gridded_data.py +45 -0
chap_core/climate_data/meteostat_wrapper.py +242 -0
chap_core/climate_data/seasonal_forecasts.py +42 -0
chap_core/climate_health.py +1 -0
chap_core/climate_predictor.py +84 -0
chap_core/data/__init__.py +4 -0
chap_core/data/adaptors.py +3 -0
chap_core/data/datasets.py +7 -0
chap_core/data/gluonts_adaptor/__init__.py +0 -0
chap_core/data/gluonts_adaptor/dataset.py +147 -0
chap_core/data/gluonts_adaptor/model.py +15 -0
chap_core/data_wrangling/__init__.py +0 -0
chap_core/data_wrangling/flows.py +132 -0
chap_core/data_wrangling/tasks.py +35 -0
chap_core/database/__init__.py +0 -0
chap_core/database/database.py +42 -0
chap_core/database/local_db_cache.py +49 -0
chap_core/dataset_protocols.py +73 -0
chap_core/datatypes.py +386 -0
chap_core/dhis2_interface/ChapProgram.py +126 -0
chap_core/dhis2_interface/__init__.py +0 -0
chap_core/dhis2_interface/json_parsing.py +120 -0
chap_core/dhis2_interface/periods.py +14 -0
chap_core/dhis2_interface/pydantic_to_spatiotemporal.py +23 -0
chap_core/dhis2_interface/src/Config.py +15 -0
chap_core/dhis2_interface/src/HttpRequest.py +11 -0
chap_core/dhis2_interface/src/PullAnalytics.py +36 -0
chap_core/dhis2_interface/src/PullClimateData.py +2 -0
chap_core/dhis2_interface/src/PushResult.py +49 -0
chap_core/dhis2_interface/src/__init__.py +0 -0
chap_core/dhis2_interface/src/create_data_element_if_not_exists.py +79 -0
chap_core/dhis2_interface/src/dhis_json_parser.py +0 -0
chap_core/docker_helper_functions.py +57 -0
chap_core/external/__init__.py +0 -0
chap_core/external/external_model.py +497 -0
chap_core/external/mlflow.py +259 -0
chap_core/external/python_model.py +40 -0
chap_core/external/r_description.py +32 -0
chap_core/external/r_model.py +60 -0
chap_core/external/r_models.py +16 -0
chap_core/external/spes.py +2 -0
chap_core/fetch/__init__.py +4 -0
chap_core/file_io/__init__.py +1 -0
chap_core/file_io/cleaners.py +62 -0
chap_core/file_io/example_data_set.py +59 -0
chap_core/file_io/external_file.py +17 -0
chap_core/file_io/file_paths.py +13 -0
chap_core/file_io/load.py +6 -0
chap_core/geo_coding/__init__.py +0 -0
chap_core/geo_coding/location_lookup.py +111 -0
chap_core/geojson.py +59 -0
chap_core/geometry.py +148 -0
chap_core/google_earth_engine/__init__.py +0 -0
chap_core/google_earth_engine/_refactor_gee_era5.py +140 -0
chap_core/google_earth_engine/gee_era5.py +246 -0
chap_core/google_earth_engine/gee_raw.py +131 -0
chap_core/internal_state.py +49 -0
chap_core/main.py +85 -0
chap_core/model_spec.py +97 -0
chap_core/omnipy_lib.py +29 -0
chap_core/pandas_adaptors.py +12 -0
chap_core/plotting/__init__.py +1 -0
chap_core/plotting/plotting.py +70 -0
chap_core/plotting/prediction_plot.py +179 -0
chap_core/predictor/__init__.py +29 -0
chap_core/predictor/feature_spec.py +30 -0
chap_core/predictor/naive_estimator.py +50 -0
chap_core/predictor/naive_predictor.py +131 -0
chap_core/predictor/poisson.py +24 -0
chap_core/predictor/protocol.py +50 -0
chap_core/reports/__init__.py +106 -0
chap_core/rest_api.py +169 -0
chap_core/rest_api_src/__init__.py +0 -0
chap_core/rest_api_src/_legacy.py +57 -0
chap_core/rest_api_src/data_models.py +27 -0
chap_core/rest_api_src/generate_rest_api.py +57 -0
chap_core/rest_api_src/worker_functions.py +139 -0
chap_core/runners/__init__.py +0 -0
chap_core/runners/command_line_runner.py +44 -0
chap_core/runners/conda_runner.py +17 -0
chap_core/runners/docker_runner.py +48 -0
chap_core/runners/runner.py +20 -0
chap_core/services/__init__.py +0 -0
chap_core/services/cache_manager.py +18 -0
chap_core/simulation/__init__.py +0 -0
chap_core/simulation/random_noise_simulator.py +22 -0
chap_core/simulation/seasonal_simulator.py +76 -0
chap_core/simulation/simulator.py +23 -0
chap_core/spatio_temporal_data/__init__.py +0 -0
chap_core/spatio_temporal_data/multi_country_dataset.py +88 -0
chap_core/spatio_temporal_data/omnipy_spatio_temporal_dataset.py +82 -0
chap_core/spatio_temporal_data/temporal_dataclass.py +400 -0
chap_core/testing/__init__.py +0 -0
chap_core/testing/estimators.py +18 -0
chap_core/testing/external_model.py +14 -0
chap_core/time_period/__init__.py +15 -0
chap_core/time_period/_legacy_implementation.py +90 -0
chap_core/time_period/dataclasses.py +78 -0
chap_core/time_period/date_util_wrapper.py +706 -0
chap_core/time_period/delta.py +10 -0
chap_core/time_period/multi_resolution.py +15 -0
chap_core/time_period/pandas_wrappers.py +9 -0
chap_core/time_period/period_range.py +65 -0
chap_core/time_period/protocols.py +42 -0
chap_core/time_period/relationships.py +12 -0
chap_core/training_control.py +48 -0
chap_core/transformations/__init__.py +0 -0
chap_core/transformations/covid_mask.py +26 -0
chap_core/util.py +39 -0
chap_core/worker/__init__.py +0 -0
chap_core/worker/background_tasks_worker.py +73 -0
chap_core/worker/interface.py +25 -0
chap_core/worker/rq_worker.py +76 -0
chap_core-0.0.8.dist-info/LICENSE +22 -0
chap_core-0.0.8.dist-info/METADATA +54 -0
chap_core-0.0.8.dist-info/RECORD +138 -0
chap_core-0.0.8.dist-info/WHEEL +6 -0
chap_core-0.0.8.dist-info/entry_points.txt +3 -0
chap_core-0.0.8.dist-info/top_level.txt +1 -0

chap_core/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Top-level package for chap-core."""
+__author__ = """Sandvelab"""
+__email__ = "knutdrand@gmail.com"
+__version__ = "0.0.8"
+from . import fetch
+from . import data

chap_core/_legacy/__init__.py ADDED Viewed

File without changes

chap_core/_legacy/file_io.py ADDED Viewed

@@ -0,0 +1,50 @@
+from typing import List
+import chap_core.time_period.dataclasses as dc
+from chap_core.time_period import TimePeriod, Month, Day, Year
+def parse_period_string(time_string: str) -> TimePeriod:
+    period = TimePeriod.parse(time_string)
+    return period
+def write_time_series_data(data):
+    def topandas(self):
+        data = pd.DataFrame(
+            {
+                "time_period": self.time_period.topandas(),
+                "rainfall": self.rainfall,
+                "mean_temperature": self.mean_temperature,
+                "disease_cases": self.disease_cases,
+            }
+        )
+        return data
+    to_pandas = topandas
+    def to_csv(self, csv_file: str, **kwargs):
+        """Write data to a csv file."""
+        data = self.to_pandas()
+        data.to_csv(csv_file, index=False, **kwargs)
+def parse_periods_strings(time_strings: List[str]) -> dc.Period:
+    periods = [parse_period_string(time_string) for time_string in time_strings]
+    if not periods:
+        return dc.Period.empty()
+    t = type(periods[0])
+    assert all(type(period) == t for period in periods), periods
+    if t == Year:
+        return dc.Year([period.year for period in periods])
+    if t == Month:
+        return dc.Month(
+            [period.year for period in periods], [period.month for period in periods]
+        )
+    elif t == Day:
+        return dc.Day(
+            [period.year for period in periods],
+            [period.month for period in periods],
+            [period.day for period in periods],
+        )

chap_core/_legacy_dataset.py ADDED Viewed

@@ -0,0 +1,101 @@
+from typing import Protocol, TypeAlias, Union, Iterable, TypeVar, Tuple
+import pandas as pd
+from pydantic import BaseModel
+from chap_core.datatypes import Location
+from chap_core.time_period.dataclasses import Period
+SpatialIndexType: TypeAlias = Union[str, Location]
+TemporalIndexType: TypeAlias = Union[Period, Iterable[Period], slice]
+FeaturesT = TypeVar("FeaturesT")
+class ClimateData(BaseModel):
+    temperature: float
+    rainfall: float
+    humidity: float
+class DataType(BaseModel):
+    disease_cases: int
+    climate_data: ClimateData
+class IsTemporalDataSet(Protocol[FeaturesT]):
+    def restrict_time_period(
+        self, start_period: Period = None, end_period: Period = None
+    ) -> "IsTemporalDataSet[FeaturesT]": ...
+    def to_tidy_dataframe(self) -> pd.DataFrame: ...
+    @classmethod
+    def from_tidy_dataframe(cls, df: pd.DataFrame) -> "IsSpatialDataSet[FeaturesT]": ...
+class TemporalArray:
+    def __init__(self, time_index, data):
+        self._time_index = time_index
+        self._data = data
+    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
+        return ufunc
+class IsSpatialDataSet(Protocol[FeaturesT]):
+    def get_locations(
+        self, location: Iterable[SpatialIndexType]
+    ) -> "IsSpatialDataSet[FeaturesT]": ...
+    def get_location(self, location: SpatialIndexType) -> FeaturesT: ...
+    def locations(self) -> Iterable[SpatialIndexType]: ...
+    def data(self) -> Iterable[FeaturesT]: ...
+    def location_items(self) -> Iterable[Tuple[SpatialIndexType, FeaturesT]]: ...
+    def to_tidy_dataframe(self) -> pd.DataFrame: ...
+    @classmethod
+    def from_tidy_dataframe(cls, df: pd.DataFrame) -> "IsSpatialDataSet[FeaturesT]": ...
+class IsSpatioTemporalDataSet(Protocol[FeaturesT]):
+    dataclass = ...
+    def get_data_for_locations(
+        self, location: Iterable[SpatialIndexType]
+    ) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
+    def get_data_for_location(self, location: SpatialIndexType) -> FeaturesT: ...
+    def restrict_time_period(
+        self, start_period: Period = None, end_period: Period = None
+    ) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
+    def start_time(self) -> Period: ...
+    def end_time(self) -> Period: ...
+    def locations(self) -> Iterable[SpatialIndexType]: ...
+    def data(self) -> Iterable[FeaturesT]: ...
+    def location_items(
+        self,
+    ) -> Iterable[Tuple[SpatialIndexType, IsTemporalDataSet[FeaturesT]]]: ...
+    def to_tidy_dataframe(self) -> pd.DataFrame: ...
+    @classmethod
+    def from_tidy_dataframe(
+        cls, df: pd.DataFrame
+    ) -> "IsSpatioTemporalDataSet[FeaturesT]": ...
+    def to_csv(self, file_name: str): ...
+    @classmethod
+    def from_csv(self, file_name: str) -> "IsSpatioTemporalDataSet[FeaturesT]": ...

chap_core/adaptors/__init__.py ADDED Viewed

File without changes

chap_core/adaptors/gluonts.py ADDED Viewed

@@ -0,0 +1,50 @@
+from gluonts.model.estimator import Estimator
+from gluonts.dataset.common import ListDataset
+from gluonts.model.predictor import Predictor
+from ..data import DataSet
+from ..data.gluonts_adaptor.dataset import DataSetAdaptor
+from ..datatypes import Samples
+from ..time_period import PeriodRange
+from pathlib import Path
+from dataclasses import dataclass
+@dataclass
+class GluonTSPredictor:
+    gluonts_predictor: Predictor
+    def predict(
+        self, history: DataSet, future_data: DataSet, num_samples=100
+    ) -> DataSet:
+        gluonts_dataset = DataSetAdaptor.to_gluonts_testinstances(
+            history, future_data, self.gluonts_predictor.prediction_length
+        )
+        forecasts = self.gluonts_predictor.predict(
+            gluonts_dataset, num_samples=num_samples
+        )
+        data = {
+            location: Samples(
+                PeriodRange.from_pandas(forecast.index), forecast.samples.T
+            )
+            for location, forecast in zip(history.keys(), forecasts)
+        }
+        return DataSet(data)
+    def save(self, filename: str):
+        filepath = Path(filename)
+        filepath.mkdir(exist_ok=True, parents=True)
+        self.gluonts_predictor.serialize(filepath)
+    @classmethod
+    def load(cls, filename: str):
+        return GluonTSPredictor(Predictor.deserialize(Path(filename)))
+@dataclass
+class GluonTSEstimator:
+    gluont_ts_estimator: Estimator
+    def train(self, dataset: DataSet) -> GluonTSPredictor:
+        gluonts_dataset = DataSetAdaptor.to_gluonts(dataset)
+        ds = ListDataset(gluonts_dataset, freq="m")
+        return GluonTSPredictor(self.gluont_ts_estimator.train(ds))

chap_core/alarms.py ADDED Viewed

@@ -0,0 +1,18 @@
+from typing import Iterable
+import numpy as np
+from pydantic import BaseModel
+# Epidemioglical week: different from calendar week
+# https://www.cdc.gov/flu/weekly/overview.htm
+class OutbreakParameters(BaseModel):
+    endemic_factor: float
+    probability_threshold: float
+def outbreak_prediction(
+    parameters: OutbreakParameters, case_samples: Iterable[float]
+) -> bool:
+    return np.mean()

chap_core/api.py ADDED Viewed

@@ -0,0 +1,264 @@
+import logging
+import json
+from .assessment.forecast import forecast as do_forecast
+import zipfile
+from pathlib import Path
+from typing import Optional, List
+import numpy as np
+from .assessment.dataset_splitting import train_test_split_with_weather
+from .datatypes import (
+    HealthData,
+    ClimateData,
+    HealthPopulationData,
+    SimpleClimateData,
+    FullData,
+)
+from .dhis2_interface.json_parsing import (
+    predictions_to_datavalue,
+    parse_disease_data,
+    json_to_pandas,
+    parse_population_data,
+)
+from .external.external_model import get_model_from_directory_or_github_url
+from .file_io.example_data_set import DataSetType, datasets
+from .geojson import geojson_to_graph, NeighbourGraph
+from .plotting.prediction_plot import plot_forecast_from_summaries
+from .predictor import get_model
+from .spatio_temporal_data.temporal_dataclass import DataSet
+import dataclasses
+from .time_period.date_util_wrapper import delta_month, Month
+from .transformations.covid_mask import mask_covid_data
+logger = logging.getLogger(__name__)
+class DummyControl:
+    def set_status(self, status):
+        pass
+    @property
+    def current_control(self):
+        return None
+@dataclasses.dataclass
+class AreaPolygons:
+    shape_file: str
+@dataclasses.dataclass
+class PredictionData:
+    area_polygons: AreaPolygons = None
+    health_data: DataSet[HealthData] = None
+    climate_data: DataSet[ClimateData] = None
+    population_data: DataSet[HealthPopulationData] = None
+    disease_id: Optional[str] = None
+    features: List[object] = None
+def extract_disease_name(health_data: dict) -> str:
+    return health_data["rows"][0][0]
+def read_zip_folder(zip_file_path: str) -> PredictionData:
+    # read zipfile, create PredictionData
+    print(zip_file_path)
+    ziparchive = zipfile.ZipFile(zip_file_path)
+    expected_files = {
+        "area_polygons": "orgUnits.geojson",
+        "disease": "disease.json",
+        "population": "population.json",
+        "temperature": "temperature.json",
+        "precipitation": "precipitation.json",
+    }
+    json_data = json.load(ziparchive.open(expected_files["disease"]))
+    name_mapping = {"time_period": 2, "disease_cases": 3, "location": 1}
+    disease = parse_disease_data(json_data, name_mapping=name_mapping)
+    disease_id = extract_disease_name(json_data)
+    temperature_json = json.load(ziparchive.open(expected_files["temperature"]))
+    name_mapping = {"time_period": 2, "mean_temperature": 3, "location": 1}
+    temperature = json_to_pandas(temperature_json, name_mapping)
+    precipitation_json = json.load(ziparchive.open(expected_files["temperature"]))
+    name_mapping = {"time_period": 2, "precipitation": 3, "location": 1}
+    precipitation = json_to_pandas(precipitation_json, name_mapping)
+    assert np.all(precipitation.time_period == temperature.time_period)
+    assert np.all(precipitation.location == temperature.location)
+    temperature["rainfall"] = precipitation["precipitation"]
+    temperature["rainfall"] = temperature["rainfall"].astype(float)
+    temperature["mean_temperature"] = temperature["mean_temperature"].astype(float)
+    features = json.load(ziparchive.open(expected_files["area_polygons"]))["features"]
+    climate = DataSet.from_pandas(temperature, dataclass=SimpleClimateData)
+    population_json = json.load(ziparchive.open(expected_files["population"]))
+    population = parse_population_data(population_json)
+    graph_file_name = ""
+    graph = NeighbourGraph.from_geojson_file(
+        ziparchive.open(expected_files["area_polygons"])
+    )
+    print(graph)
+    if False:
+        graph_file_name = Path(zip_file_path).with_suffix(".graph")
+        area_polygons_file = ziparchive.open(expected_files["area_polygons"])
+        geojson_to_graph(area_polygons_file, graph_file_name)
+    # geojson_to_shape(area_polygons_file, shape_file_name)
+    # geojson_to_shape(str(zip_file_path) + "!area_polygons", shape_file_name)
+    return PredictionData(
+        health_data=disease,
+        climate_data=climate,
+        population_data=population,
+        area_polygons=graph,
+        disease_id=disease_id,
+        features=features,
+    )
+    out_data = {}
+#    ...
+def dhis_zip_flow(
+    zip_file_path: str,
+    out_json: Optional[str] = None,
+    model_name=None,
+    n_months=4,
+    docker_filename: Optional[str] = None,
+) -> List[dict] | None:
+    data: PredictionData = read_zip_folder(zip_file_path)
+    json_body = train_on_prediction_data(data, model_name, n_months, docker_filename)
+    if out_json is not None:
+        with open(out_json, "w") as f:
+            json.dump(json_body, f)
+        return None
+    else:
+        return json_body
+def train_on_prediction_data(
+    data,
+    model_name=None,
+    n_months=4,
+    docker_filename=None,
+    model_path=None,
+    control=None,
+):
+    if control is None:
+        control = DummyControl()
+    control.set_status("Preprocessing")
+    if model_name == "external":
+        model = get_model_from_directory_or_github_url(model_path)
+    else:
+        model = get_model(model_name)()
+    start_timestamp = min(
+        data.health_data.start_timestamp, data.climate_data.start_timestamp
+    )
+    end_timestamp = max(data.health_data.end_timestamp, data.climate_data.end_timestamp)
+    new_dict = {}
+    for location in data.health_data.locations():
+        health = data.health_data.get_location(location).fill_to_range(
+            start_timestamp, end_timestamp
+        )
+        climate = data.climate_data.get_location(location).fill_to_range(
+            start_timestamp, end_timestamp
+        )
+        assert (
+            location in data.population_data
+        ), f"Location {location} not in population data: {data.population_data.keys()}"
+        population = data.population_data[location]
+        new_dict[location] = FullData.combine(health.data(), climate.data(), population)
+    climate_health_data = DataSet(new_dict)
+    prediction_start = Month(climate_health_data.end_timestamp) - n_months * delta_month
+    train_data, _, future_weather = train_test_split_with_weather(
+        climate_health_data, prediction_start
+    )
+    logger.info(f"Training model {model_name} on {len(train_data.items())} locations")
+    control.set_status("Training")
+    if hasattr(model, "set_training_control"):
+        model.set_training_control(control.current_control)
+    if hasattr(model, "set_graph"):
+        model.set_graph(data.area_polygons)
+    model.train(train_data)  # , extra_args=data.area_polygons)
+    logger.info(
+        f"Forecasting using {model_name} on {len(train_data.items())} locations"
+    )
+    control.set_status("Forecasting")
+    predictions = model.forecast(future_weather, forecast_delta=n_months * delta_month)
+    attrs = ["median", "quantile_high", "quantile_low"]
+    logger.info("Converting predictions to json")
+    control.set_status("Postprocessing")
+    data_values = predictions_to_datavalue(
+        predictions, attribute_mapping=dict(zip(attrs, attrs))
+    )
+    json_body = [dataclasses.asdict(element) for element in data_values]
+    diseaseId = data.disease_id
+    return {"diseaseId": diseaseId, "dataValues": json_body}
+    # return json_body
+def train_with_validation(model_name, dataset_name, n_months=12):
+    dataset = datasets[dataset_name].load()
+    # assert not np.any(np.any(np.isnan(data.to_array()[:, 1:])) for data in dataset.values()), "Dataset contains NaN values"
+    # assert not any(np.any(np.isnan(data.mean_temperature) | np.isnan(data.rainfall)) for data in dataset.values()), "Dataset contains NaN values"
+    dataset = mask_covid_data(dataset)
+    model = get_model(model_name)(n_iter=32000)
+    # split_point = dataset.end_timestamp - n_months * delta_month
+    # train_data, test_data, future_weather = train_test_split_with_weather(dataset, split_point)
+    prediction_length = n_months * delta_month
+    split_point = dataset.end_timestamp - prediction_length
+    split_period = Month(split_point.year, split_point.month)
+    train_data, test_set, future_weather = train_test_split_with_weather(
+        dataset, split_period
+    )
+    model.set_validation_data(test_set)
+    model.train(train_data)
+    predictions = model.forecast(
+        future_weather, forecast_delta=n_months * delta_month, n_samples=100
+    )
+    # plot predictions
+    figs = []
+    for location, prediction in predictions.items():
+        fig = plot_forecast_from_summaries(
+            prediction.data(), dataset.get_location(location).data()
+        )  # , lambda x: np.log(x+1))
+        figs.append(fig)
+    return figs
+def forecast(
+    model_name: str,
+    dataset_name: DataSetType,
+    n_months: int,
+    model_path: Optional[str] = None,
+):
+    logging.basicConfig(level=logging.INFO)
+    dataset = datasets[dataset_name].load()
+    if model_name == "external":
+        model = get_model_from_directory_or_github_url(model_path)
+    else:
+        model = get_model(model_name)
+        model = model()
+    # model = get_model(model_name)()
+    predictions = do_forecast(model, dataset, n_months * delta_month)
+    figs = []
+    for location, prediction in predictions.items():
+        fig = plot_forecast_from_summaries(
+            prediction.data(), dataset.get_location(location).data()
+        )  # , lambda x: np.log(x+1))
+        figs.append(fig)
+    return figs

chap_core/api_types.py ADDED Viewed

@@ -0,0 +1,55 @@
+from pydantic import BaseModel
+from pydantic_geojson import (
+    FeatureCollectionModel as _FeatureCollectionModel,
+    FeatureModel as _FeatureModel,
+)
+class FeatureModel(_FeatureModel):
+    id: str
+class FeatureCollectionModel(_FeatureCollectionModel):
+    features: list[FeatureModel]
+class DataElement(BaseModel):
+    pe: str
+    ou: str
+    value: float
+class DataList(BaseModel):
+    featureId: str
+    dhis2Id: str
+    data: list[DataElement]
+class RequestV1(BaseModel):
+    orgUnitsGeoJson: FeatureCollectionModel
+    features: list[DataList]
+class RequestV2(RequestV1):
+    model_id: str = 'chap_ewars'
+class PredictionRequest(RequestV2):
+    n_periods: int = 3
+class PeriodObservation(BaseModel):
+    time_period: str
+# class Geometry:
+#     type: str
+#     coordinates: list[list[float]]
+#
+#
+# class GeoJSONObject(BaseModel):
+#     id: str
+#     geometry: dict
+#
+#
+# class GeoJSON(BaseModel):
+#     type: str
+#     features: list[dict]

chap_core/assessment/__init__.py ADDED Viewed

File without changes

chap_core/assessment/dataset_splitting.py ADDED Viewed

@@ -0,0 +1,141 @@
+from typing import Iterable, Tuple, Protocol, Optional, Type
+from chap_core._legacy_dataset import IsSpatioTemporalDataSet
+from chap_core.climate_predictor import FutureWeatherFetcher
+from chap_core.datatypes import ClimateHealthData, ClimateData
+from chap_core.spatio_temporal_data.temporal_dataclass import DataSet
+from chap_core.time_period import Month, TimePeriod
+from chap_core.time_period.relationships import previous
+def split_period_on_resolution(param, param1, resolution) -> Iterable[Month]:
+    pass
+def extend_to(period, future_length):
+    pass
+class IsTimeDelta(Protocol):
+    pass
+def split_test_train_on_period(
+    data_set: IsSpatioTemporalDataSet,
+    split_points: Iterable[TimePeriod],
+    future_length: Optional[IsTimeDelta] = None,
+    include_future_weather: bool = False,
+    future_weather_class: Type[ClimateData] = ClimateData,
+):
+    func = train_test_split_with_weather if include_future_weather else train_test_split
+    if include_future_weather:
+        return (
+            train_test_split_with_weather(
+                data_set, period, future_length, future_weather_class
+            )
+            for period in split_points
+        )
+    return (func(data_set, period, future_length) for period in split_points)
+def split_train_test_with_future_weather(
+    data_set: IsSpatioTemporalDataSet,
+    split_points: Iterable[TimePeriod],
+    future_length: Optional[IsTimeDelta] = None,
+):
+    return (
+        train_test_split(data_set, period, future_length) for period in split_points
+    )
+# Should we index on split-timestamp, first time period, or complete time?
+def train_test_split(
+    data_set: IsSpatioTemporalDataSet,
+    prediction_start_period: TimePeriod,
+    extension: Optional[IsTimeDelta] = None,
+    restrict_test=True,
+):
+    last_train_period = previous(prediction_start_period)
+    train_data = data_set.restrict_time_period(slice(None, last_train_period))
+    if extension is not None:
+        end_period = prediction_start_period.extend_to(extension)
+    else:
+        end_period = None
+    if restrict_test:
+        test_data = data_set.restrict_time_period(
+            slice(prediction_start_period, end_period)
+        )
+    else:
+        test_data = data_set
+    return train_data, test_data
+def train_test_generator(
+    dataset: DataSet, prediction_length: int, n_test_sets: int = 1, future_weather_provider: Optional[FutureWeatherFetcher] = None
+) -> tuple[DataSet, Iterable[tuple[DataSet, DataSet]]]:
+    """
+    Genereate a train set along with an iterator of test data that contains tuples of full data up until a
+    split point and data without target variables for the remaining steps
+    """
+    split_idx = -(prediction_length + n_test_sets)
+    train_set = dataset.restrict_time_period(
+        slice(None, dataset.period_range[split_idx])
+    )
+    historic_data = [
+        dataset.restrict_time_period(slice(None, dataset.period_range[split_idx + i]))
+        for i in range(n_test_sets)
+    ]
+    future_data = [
+        dataset.restrict_time_period(
+            slice(
+                dataset.period_range[split_idx + i + 1],
+                dataset.period_range[split_idx + i + prediction_length],
+            )
+        )
+        for i in range(n_test_sets)
+    ]
+    if future_weather_provider is not None:
+        masked_future_data = [future_weather_provider(hd).get_future_weather(fd.period_range) for (hd, fd) in zip(historic_data, future_data)]
+    else:
+        masked_future_data = (dataset.remove_field("disease_cases") for dataset in future_data)
+    return train_set, zip(historic_data, masked_future_data, future_data)
+def train_test_split_with_weather(
+    data_set: DataSet,
+    prediction_start_period: TimePeriod,
+    extension: Optional[IsTimeDelta] = None,
+    future_weather_class: Type[ClimateData] = ClimateData,
+):
+    train_set, test_set = train_test_split(data_set, prediction_start_period, extension)
+    tmp_values: Iterable[Tuple[str, ClimateHealthData]] = (
+        (loc, temporal_data.data()) for loc, temporal_data in test_set.items()
+    )
+    future_weather = test_set.remove_field("disease_cases")  # SpatioTemporalDict(
+    train_periods = {
+        str(period) for data in train_set.data() for period in data.data().time_period
+    }
+    future_periods = {
+        str(period)
+        for data in future_weather.data()
+        for period in data.data().time_period
+    }
+    assert (
+        train_periods & future_periods == set()
+    ), f"Train and future weather data overlap: {train_periods & future_periods}"
+    return train_set, test_set, future_weather
+def get_split_points_for_data_set(
+    data_set: IsSpatioTemporalDataSet, max_splits: int, start_offset=1
+) -> list[TimePeriod]:
+    periods = (
+        next(iter(data_set.data())).data().time_period
+    )  # Uses the time for the first location, assumes it to be the same for all!
+    return get_split_points_for_period_range(max_splits, periods, start_offset)
+def get_split_points_for_period_range(max_splits, periods, start_offset):
+    delta = (len(periods) - 1 - start_offset) // (max_splits + 1)
+    return list(periods)[start_offset + delta :: delta][:max_splits]