cavapy 1.1.0__tar.gz → 1.1.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cavapy might be problematic. Click here for more details.
- {cavapy-1.1.0 → cavapy-1.1.5}/PKG-INFO +46 -1
- {cavapy-1.1.0 → cavapy-1.1.5}/README.md +63 -18
- cavapy-1.1.5/cava_bias.py +73 -0
- cavapy-1.1.5/cava_config.py +65 -0
- cavapy-1.1.5/cava_download.py +450 -0
- cavapy-1.1.5/cava_plot.py +204 -0
- cavapy-1.1.5/cava_validation.py +359 -0
- cavapy-1.1.5/cavapy.py +523 -0
- {cavapy-1.1.0 → cavapy-1.1.5}/pyproject.toml +2 -1
- cavapy-1.1.0/cavapy.py +0 -1177
- {cavapy-1.1.0 → cavapy-1.1.5}/LICENSE +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cavapy
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.5
|
|
4
4
|
Summary: CAVA Python package. Retrive climate data.
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -100,6 +100,13 @@ The get_climate_data function performs automatically:
|
|
|
100
100
|
- Convert into a Gregorian calendar (CORDEX-CORE models do not have a full 365 days calendar) through linear interpolation
|
|
101
101
|
- Bias correction using the empirical quantile mapping (optional)
|
|
102
102
|
|
|
103
|
+
### Parallelization strategy
|
|
104
|
+
- If you request a single model/RCP combination, cavapy parallelizes **across variables** (one process per variable).
|
|
105
|
+
- If you request multiple models and/or RCPs, cavapy parallelizes **across combo-variable tasks** (one process per variable per model), capped globally.
|
|
106
|
+
- If `num_processes <= 1` or only one variable is requested, variables run sequentially (even for a single combo).
|
|
107
|
+
- By default, up to **12 total processes** are used (capped by number of combo-variable tasks).
|
|
108
|
+
- Inside each process, a thread pool handles per-variable downloads and observation/model fetches concurrently.
|
|
109
|
+
|
|
103
110
|
## Example usage
|
|
104
111
|
|
|
105
112
|
Depending on the interest, downloading climate data can be done in a few different ways. Note that GCM stands for General Circulation Model while RCM stands for Regional Climate Model. As the climate data comes from the CORDEX-CORE initiative, users can choose between 3 different GCMs downscaled with two RCMs. In total, there are six simulations for any given domain (except for CAS-22 where only three are available).
|
|
@@ -238,6 +245,44 @@ import cavapy
|
|
|
238
245
|
Togo_climate_data = cavapy.get_climate_data(country="Togo", variables=["tasmax", "pr"], obs=True, years_obs=range(1980,2019))
|
|
239
246
|
```
|
|
240
247
|
|
|
248
|
+
### Multiple models and/or RCPs
|
|
249
|
+
|
|
250
|
+
You can pass lists (or None) to `rcp`, `gcm`, and `rcm`. If multiple combinations are requested,
|
|
251
|
+
the return structure becomes nested:
|
|
252
|
+
|
|
253
|
+
```
|
|
254
|
+
results[rcp][f"{gcm}-{rcm}"][variable] -> DataArray
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
Example: all models and both RCPs for Togo (AFR-22):
|
|
258
|
+
```
|
|
259
|
+
import cavapy
|
|
260
|
+
|
|
261
|
+
data = cavapy.get_climate_data(
|
|
262
|
+
country="Togo",
|
|
263
|
+
cordex_domain="AFR-22",
|
|
264
|
+
rcp=None, # all RCPs
|
|
265
|
+
gcm=None, # all GCMs
|
|
266
|
+
rcm=None, # all RCMs
|
|
267
|
+
years_up_to=2030,
|
|
268
|
+
historical=True,
|
|
269
|
+
dataset="CORDEX-CORE",
|
|
270
|
+
)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
Example: specific models and RCPs:
|
|
274
|
+
```
|
|
275
|
+
data = cavapy.get_climate_data(
|
|
276
|
+
country="Togo",
|
|
277
|
+
cordex_domain="AFR-22",
|
|
278
|
+
rcp=["rcp26", "rcp85"],
|
|
279
|
+
gcm=["MPI", "MOHC"],
|
|
280
|
+
rcm=["Reg", "REMO"],
|
|
281
|
+
years_up_to=2030,
|
|
282
|
+
historical=True,
|
|
283
|
+
)
|
|
284
|
+
```
|
|
285
|
+
|
|
241
286
|
## Plotting Functionality
|
|
242
287
|
|
|
243
288
|
`cavapy` now includes built-in plotting functions to easily visualize your climate data as maps and time series. The plotting functions work seamlessly with the data returned by `get_climate_data()`. **However, if your main goal is visualisation, we strongly encourage you to check out [CAVAanalytics](https://risk-team.github.io/CAVAanalytics/), our R package**.
|
|
@@ -63,13 +63,20 @@ conda activate test
|
|
|
63
63
|
pip install cavapy
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
-
## Process
|
|
67
|
-
|
|
68
|
-
The get_climate_data function performs automatically:
|
|
69
|
-
- Data retrieval in parallel
|
|
70
|
-
- Unit conversion
|
|
71
|
-
- Convert into a Gregorian calendar (CORDEX-CORE models do not have a full 365 days calendar) through linear interpolation
|
|
72
|
-
- Bias correction using the empirical quantile mapping (optional)
|
|
66
|
+
## Process
|
|
67
|
+
|
|
68
|
+
The get_climate_data function performs automatically:
|
|
69
|
+
- Data retrieval in parallel
|
|
70
|
+
- Unit conversion
|
|
71
|
+
- Convert into a Gregorian calendar (CORDEX-CORE models do not have a full 365 days calendar) through linear interpolation
|
|
72
|
+
- Bias correction using the empirical quantile mapping (optional)
|
|
73
|
+
|
|
74
|
+
### Parallelization strategy
|
|
75
|
+
- If you request a single model/RCP combination, cavapy parallelizes **across variables** (one process per variable).
|
|
76
|
+
- If you request multiple models and/or RCPs, cavapy parallelizes **across combo-variable tasks** (one process per variable per model), capped globally.
|
|
77
|
+
- If `num_processes <= 1` or only one variable is requested, variables run sequentially (even for a single combo).
|
|
78
|
+
- By default, up to **12 total processes** are used (capped by number of combo-variable tasks).
|
|
79
|
+
- Inside each process, a thread pool handles per-variable downloads and observation/model fetches concurrently.
|
|
73
80
|
|
|
74
81
|
## Example usage
|
|
75
82
|
|
|
@@ -77,9 +84,9 @@ Depending on the interest, downloading climate data can be done in a few differe
|
|
|
77
84
|
Since bias-correction requires both the historical run of the CORDEX model and the observational dataset (in this case ERA5), even when the historical argument is set to False, the historical run will be used for learning the bias correction factor.
|
|
78
85
|
|
|
79
86
|
|
|
80
|
-
### Bias-corrected climate projections
|
|
81
|
-
|
|
82
|
-
**Option 1: Use pre-bias-corrected ISIMIP data (Recommended)**
|
|
87
|
+
### Bias-corrected climate projections
|
|
88
|
+
|
|
89
|
+
**Option 1: Use pre-bias-corrected ISIMIP data (Recommended)**
|
|
83
90
|
|
|
84
91
|
*Example with AFR-22 domain:*
|
|
85
92
|
```
|
|
@@ -129,7 +136,7 @@ Togo_climate_data = cavapy.get_climate_data(
|
|
|
129
136
|
dataset="CORDEX-CORE" # Original data with on-the-fly bias correction
|
|
130
137
|
)
|
|
131
138
|
```
|
|
132
|
-
### Non bias-corrected climate projections (Original CORDEX-CORE data)
|
|
139
|
+
### Non bias-corrected climate projections (Original CORDEX-CORE data)
|
|
133
140
|
|
|
134
141
|
```
|
|
135
142
|
import cavapy
|
|
@@ -145,7 +152,7 @@ Togo_climate_data = cavapy.get_climate_data(
|
|
|
145
152
|
dataset="CORDEX-CORE" # Original data, no bias correction
|
|
146
153
|
)
|
|
147
154
|
```
|
|
148
|
-
### Climate projections plus historical run
|
|
155
|
+
### Climate projections plus historical run
|
|
149
156
|
|
|
150
157
|
This is useful when assessing changes from the historical period.
|
|
151
158
|
|
|
@@ -202,12 +209,50 @@ Togo_climate_data = cavapy.get_climate_data(
|
|
|
202
209
|
dataset="CORDEX-CORE"
|
|
203
210
|
)
|
|
204
211
|
```
|
|
205
|
-
### Observations only (ERA5)
|
|
206
|
-
|
|
207
|
-
```
|
|
208
|
-
import cavapy
|
|
209
|
-
Togo_climate_data = cavapy.get_climate_data(country="Togo", variables=["tasmax", "pr"], obs=True, years_obs=range(1980,2019))
|
|
210
|
-
```
|
|
212
|
+
### Observations only (ERA5)
|
|
213
|
+
|
|
214
|
+
```
|
|
215
|
+
import cavapy
|
|
216
|
+
Togo_climate_data = cavapy.get_climate_data(country="Togo", variables=["tasmax", "pr"], obs=True, years_obs=range(1980,2019))
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Multiple models and/or RCPs
|
|
220
|
+
|
|
221
|
+
You can pass lists (or None) to `rcp`, `gcm`, and `rcm`. If multiple combinations are requested,
|
|
222
|
+
the return structure becomes nested:
|
|
223
|
+
|
|
224
|
+
```
|
|
225
|
+
results[rcp][f"{gcm}-{rcm}"][variable] -> DataArray
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Example: all models and both RCPs for Togo (AFR-22):
|
|
229
|
+
```
|
|
230
|
+
import cavapy
|
|
231
|
+
|
|
232
|
+
data = cavapy.get_climate_data(
|
|
233
|
+
country="Togo",
|
|
234
|
+
cordex_domain="AFR-22",
|
|
235
|
+
rcp=None, # all RCPs
|
|
236
|
+
gcm=None, # all GCMs
|
|
237
|
+
rcm=None, # all RCMs
|
|
238
|
+
years_up_to=2030,
|
|
239
|
+
historical=True,
|
|
240
|
+
dataset="CORDEX-CORE",
|
|
241
|
+
)
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
Example: specific models and RCPs:
|
|
245
|
+
```
|
|
246
|
+
data = cavapy.get_climate_data(
|
|
247
|
+
country="Togo",
|
|
248
|
+
cordex_domain="AFR-22",
|
|
249
|
+
rcp=["rcp26", "rcp85"],
|
|
250
|
+
gcm=["MPI", "MOHC"],
|
|
251
|
+
rcm=["Reg", "REMO"],
|
|
252
|
+
years_up_to=2030,
|
|
253
|
+
historical=True,
|
|
254
|
+
)
|
|
255
|
+
```
|
|
211
256
|
|
|
212
257
|
## Plotting Functionality
|
|
213
258
|
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"""Bias-correction utilities for CORDEX data using xsdba."""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import xsdba as sdba
|
|
5
|
+
import xarray as xr
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _leave_one_out_bias_correction(ref, hist, variable, log):
|
|
9
|
+
"""
|
|
10
|
+
Perform leave-one-out cross-validation for bias correction to avoid overfitting.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
ref: Reference (observational) data
|
|
14
|
+
hist: Historical model data
|
|
15
|
+
variable: Variable name for determining correction method
|
|
16
|
+
log: Logger instance
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
xr.DataArray: Bias-corrected historical data
|
|
20
|
+
"""
|
|
21
|
+
log.info("Starting leave-one-out cross-validation for bias correction")
|
|
22
|
+
|
|
23
|
+
# Get unique years from historical data
|
|
24
|
+
hist_years = hist.time.dt.year.values
|
|
25
|
+
unique_years = np.unique(hist_years)
|
|
26
|
+
|
|
27
|
+
# Initialize list to store corrected data for each year
|
|
28
|
+
corrected_years = []
|
|
29
|
+
|
|
30
|
+
for leave_out_year in unique_years:
|
|
31
|
+
log.info(f"Processing leave-out year: {leave_out_year}")
|
|
32
|
+
|
|
33
|
+
# Create masks for training (all years except leave_out_year) and testing (only leave_out_year)
|
|
34
|
+
train_mask = hist.time.dt.year != leave_out_year
|
|
35
|
+
test_mask = hist.time.dt.year == leave_out_year
|
|
36
|
+
|
|
37
|
+
# Get training data (all years except the current one)
|
|
38
|
+
hist_train = hist.sel(time=train_mask)
|
|
39
|
+
hist_test = hist.sel(time=test_mask)
|
|
40
|
+
|
|
41
|
+
# Get corresponding reference data for training period
|
|
42
|
+
ref_train_mask = ref.time.dt.year != leave_out_year
|
|
43
|
+
ref_train = ref.sel(time=ref_train_mask)
|
|
44
|
+
|
|
45
|
+
# Train the bias correction model on the training data
|
|
46
|
+
QM_leave_out = sdba.EmpiricalQuantileMapping.train(
|
|
47
|
+
ref_train,
|
|
48
|
+
hist_train,
|
|
49
|
+
group="time.month",
|
|
50
|
+
kind="*" if variable in ["pr", "rsds", "sfcWind"] else "+",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# Apply bias correction to the left-out year
|
|
54
|
+
hist_corrected_year = QM_leave_out.adjust(
|
|
55
|
+
hist_test, extrapolation="constant", interp="linear"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Apply variable-specific constraints
|
|
59
|
+
if variable == "hurs":
|
|
60
|
+
hist_corrected_year = hist_corrected_year.where(
|
|
61
|
+
hist_corrected_year <= 100, 100
|
|
62
|
+
)
|
|
63
|
+
hist_corrected_year = hist_corrected_year.where(
|
|
64
|
+
hist_corrected_year >= 0, 0
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
corrected_years.append(hist_corrected_year)
|
|
68
|
+
|
|
69
|
+
# Concatenate all corrected years and sort by time
|
|
70
|
+
hist_bs = xr.concat(corrected_years, dim="time").sortby("time")
|
|
71
|
+
|
|
72
|
+
log.info("Leave-one-out cross-validation bias correction completed")
|
|
73
|
+
return hist_bs
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Configuration constants and logging setup for cavapy."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
import warnings
|
|
6
|
+
|
|
7
|
+
# Suppress cartopy download warnings for Natural Earth data
|
|
8
|
+
try:
|
|
9
|
+
from cartopy.io import DownloadWarning
|
|
10
|
+
warnings.filterwarnings("ignore", category=DownloadWarning)
|
|
11
|
+
except ImportError:
|
|
12
|
+
# Fallback to suppressing all UserWarnings from cartopy.io
|
|
13
|
+
warnings.filterwarnings("ignore", category=UserWarning, module="cartopy.io")
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("climate")
|
|
16
|
+
logger.handlers = [] # Remove any existing handlers
|
|
17
|
+
handler = logging.StreamHandler()
|
|
18
|
+
formatter = logging.Formatter(
|
|
19
|
+
"%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
20
|
+
datefmt="%H:%M:%S",
|
|
21
|
+
)
|
|
22
|
+
handler.setFormatter(formatter)
|
|
23
|
+
for hdlr in logger.handlers[:]: # remove all old handlers
|
|
24
|
+
logger.removeHandler(hdlr)
|
|
25
|
+
logger.addHandler(handler)
|
|
26
|
+
logger.setLevel(logging.DEBUG)
|
|
27
|
+
|
|
28
|
+
VARIABLES_MAP = {
|
|
29
|
+
"pr": "tp",
|
|
30
|
+
"tasmax": "t2mx",
|
|
31
|
+
"tasmin": "t2mn",
|
|
32
|
+
"hurs": "hurs",
|
|
33
|
+
"sfcWind": "sfcwind",
|
|
34
|
+
"rsds": "ssrd",
|
|
35
|
+
}
|
|
36
|
+
VALID_VARIABLES = list(VARIABLES_MAP)
|
|
37
|
+
VALID_DOMAINS = [
|
|
38
|
+
"NAM-22",
|
|
39
|
+
"EUR-22",
|
|
40
|
+
"AFR-22",
|
|
41
|
+
"EAS-22",
|
|
42
|
+
"SEA-22",
|
|
43
|
+
"WAS-22",
|
|
44
|
+
"AUS-22",
|
|
45
|
+
"SAM-22",
|
|
46
|
+
"CAM-22",
|
|
47
|
+
]
|
|
48
|
+
VALID_RCPS = ["rcp26", "rcp85"]
|
|
49
|
+
VALID_GCM = ["MOHC", "MPI", "NCC"]
|
|
50
|
+
VALID_RCM = ["REMO", "Reg"]
|
|
51
|
+
VALID_DATASETS = ["CORDEX-CORE", "CORDEX-CORE-BC"]
|
|
52
|
+
|
|
53
|
+
INVENTORY_DATA_REMOTE_URL = (
|
|
54
|
+
"https://hub.ipcc.ifca.es/thredds/fileServer/inventories/cava.csv"
|
|
55
|
+
)
|
|
56
|
+
INVENTORY_DATA_LOCAL_PATH = os.path.join(
|
|
57
|
+
os.path.expanduser("~"), "shared/inventories/cava/inventory.csv"
|
|
58
|
+
)
|
|
59
|
+
ERA5_DATA_REMOTE_URL = (
|
|
60
|
+
"https://hub.ipcc.ifca.es/thredds/dodsC/fao/observations/ERA5/0.25/ERA5_025.ncml"
|
|
61
|
+
)
|
|
62
|
+
ERA5_DATA_LOCAL_PATH = os.path.join(
|
|
63
|
+
os.path.expanduser("~"), "shared/data/observations/ERA5/0.25/ERA5_025.ncml"
|
|
64
|
+
)
|
|
65
|
+
DEFAULT_YEARS_OBS = range(1980, 2006)
|