climdata 0.0.2__tar.gz → 0.0.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of climdata might be problematic. Click here for more details.
- {climdata-0.0.2 → climdata-0.0.5}/.github/workflows/docs.yml +1 -1
- climdata-0.0.5/.github/workflows/pypi.yml +43 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/workflows/ubuntu.yml +0 -1
- {climdata-0.0.2 → climdata-0.0.5}/PKG-INFO +7 -1
- {climdata-0.0.2 → climdata-0.0.5}/climdata/__init__.py +4 -1
- climdata-0.0.5/climdata/conf/config.yaml +37 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata/conf/mappings/parameters.yaml +1 -1
- climdata-0.0.5/climdata/conf/mappings/variables.yaml +77 -0
- climdata-0.0.5/climdata/datasets/CMIP.py +224 -0
- climdata-0.0.5/climdata/datasets/MSWX.py +300 -0
- climdata-0.0.5/climdata/utils/config.py +30 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata/utils/utils_download.py +1 -2
- {climdata-0.0.2 → climdata-0.0.5}/climdata.egg-info/PKG-INFO +7 -1
- {climdata-0.0.2 → climdata-0.0.5}/climdata.egg-info/SOURCES.txt +3 -2
- {climdata-0.0.2 → climdata-0.0.5}/climdata.egg-info/requires.txt +6 -0
- climdata-0.0.5/docs/climdata.md +4 -0
- climdata-0.0.5/docs/common.md +41 -0
- climdata-0.0.5/docs/index.md +193 -0
- climdata-0.0.5/examples/extract_dwd_loc.ipynb +1097 -0
- {climdata-0.0.2 → climdata-0.0.5}/mkdocs.yml +17 -21
- {climdata-0.0.2 → climdata-0.0.5}/pyproject.toml +2 -2
- {climdata-0.0.2 → climdata-0.0.5}/requirements.txt +6 -0
- climdata-0.0.5/tests/test_climdata.py +9 -0
- climdata-0.0.2/.github/workflows/pypi.yml +0 -30
- climdata-0.0.2/climdata/conf/config.yaml +0 -23
- climdata-0.0.2/climdata/datasets/MSWX.py +0 -195
- climdata-0.0.2/climdata/requirements.txt +0 -20
- climdata-0.0.2/climdata/utils/config.py +0 -30
- climdata-0.0.2/docs/climdata.md +0 -4
- climdata-0.0.2/docs/common.md +0 -3
- climdata-0.0.2/docs/examples/run_downloader.ipynb +0 -1244
- climdata-0.0.2/docs/index.md +0 -16
- climdata-0.0.2/tests/test_climdata.py +0 -21
- {climdata-0.0.2 → climdata-0.0.5}/.editorconfig +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/workflows/docs-build.yml +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/workflows/installation.yml +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/workflows/macos.yml +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.github/workflows/windows.yml +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/.gitignore +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/LICENSE +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/MANIFEST.in +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/README.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata/__main__.py +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata/datasets/DWD.py +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata/main.py +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata/utils/__init__.py +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata.egg-info/dependency_links.txt +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata.egg-info/entry_points.txt +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/climdata.egg-info/top_level.txt +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/docs/changelog.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/docs/contributing.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/docs/faq.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/docs/installation.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/docs/overrides/main.html +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/docs/usage.md +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/dwd_tas_LAT52.507_LON14.1372_1989-01-01_2020-12-31.csv +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/requirements_dev.txt +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/setup.cfg +0 -0
- {climdata-0.0.2 → climdata-0.0.5}/tests/__init__.py +0 -0
|
@@ -23,7 +23,7 @@ jobs:
|
|
|
23
23
|
pip install .
|
|
24
24
|
- name: Discover typos with codespell
|
|
25
25
|
run: |
|
|
26
|
-
codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,hist"
|
|
26
|
+
codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,hist" || true
|
|
27
27
|
- name: PKG-TEST
|
|
28
28
|
run: |
|
|
29
29
|
python -m unittest discover tests/
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*" # Trigger when pushing tags like v0.1.0
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
release:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
steps:
|
|
13
|
+
# 1️⃣ Checkout code
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
with:
|
|
16
|
+
fetch-depth: 0
|
|
17
|
+
|
|
18
|
+
# 2️⃣ Set up Python
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.11"
|
|
23
|
+
|
|
24
|
+
# 3️⃣ Upgrade pip and install build/test dependencies
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install -r requirements.txt
|
|
29
|
+
pip install build twine wheel setuptools
|
|
30
|
+
|
|
31
|
+
# 4️⃣ Run unit tests
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: |
|
|
34
|
+
python -m unittest discover tests/
|
|
35
|
+
|
|
36
|
+
# 5️⃣ Build the package
|
|
37
|
+
- name: Build package
|
|
38
|
+
run: python -m build
|
|
39
|
+
|
|
40
|
+
# 6️⃣ Publish to PyPI
|
|
41
|
+
- name: Publish to PyPI
|
|
42
|
+
run: |
|
|
43
|
+
python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} --verbose
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climdata
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.5
|
|
4
4
|
Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
|
|
5
5
|
Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -35,6 +35,8 @@ Requires-Dist: pymannkendall
|
|
|
35
35
|
Requires-Dist: tqdm
|
|
36
36
|
Requires-Dist: zarr
|
|
37
37
|
Requires-Dist: ipyleaflet
|
|
38
|
+
Requires-Dist: wetterdienst
|
|
39
|
+
Requires-Dist: pint-pandas
|
|
38
40
|
Requires-Dist: hydra-core
|
|
39
41
|
Requires-Dist: intake
|
|
40
42
|
Requires-Dist: intake-esm
|
|
@@ -54,6 +56,10 @@ Requires-Dist: scikit-learn
|
|
|
54
56
|
Requires-Dist: xgboost
|
|
55
57
|
Requires-Dist: optuna
|
|
56
58
|
Requires-Dist: gitpython
|
|
59
|
+
Requires-Dist: beautifulsoup4
|
|
60
|
+
Requires-Dist: google-auth
|
|
61
|
+
Requires-Dist: google-api-python-client
|
|
62
|
+
Requires-Dist: ipdb
|
|
57
63
|
Provides-Extra: all
|
|
58
64
|
Requires-Dist: pandas; extra == "all"
|
|
59
65
|
Provides-Extra: extra
|
|
@@ -2,7 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
__author__ = """Kaushik Muduchuru"""
|
|
4
4
|
__email__ = "kaushik.reddy.m@gmail.com"
|
|
5
|
-
__version__ = "0.0.
|
|
5
|
+
__version__ = "0.0.5"
|
|
6
6
|
|
|
7
7
|
from .utils.utils_download import * # etc.
|
|
8
|
+
from .utils.config import load_config
|
|
8
9
|
from .datasets.DWD import DWDmirror as DWD
|
|
10
|
+
from .datasets.MSWX import MSWXmirror as MSWX
|
|
11
|
+
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
|
|
2
|
+
defaults:
|
|
3
|
+
- _self_
|
|
4
|
+
- mappings/parameters
|
|
5
|
+
- mappings/variables
|
|
6
|
+
dataset: dwd
|
|
7
|
+
data_dir: /beegfs/muduchuru/data
|
|
8
|
+
weather:
|
|
9
|
+
parameter: tas # standardized variable name (e.g., tas, pr, rsds)
|
|
10
|
+
|
|
11
|
+
region: europe
|
|
12
|
+
|
|
13
|
+
bounds:
|
|
14
|
+
global:
|
|
15
|
+
lat_min: -90.0
|
|
16
|
+
lat_max: 90.0
|
|
17
|
+
lon_min: -180.0
|
|
18
|
+
lon_max: 180.0
|
|
19
|
+
europe:
|
|
20
|
+
lat_min: 34.0 # Southern Europe (e.g., southern Greece)
|
|
21
|
+
lat_max: 71.0 # Northern Europe (e.g., northern Norway)
|
|
22
|
+
lon_min: -25.0 # Western Europe (e.g., Azores)
|
|
23
|
+
lon_max: 45.0 # Eastern Europe (Ural Mountains, excludes most of Russia)
|
|
24
|
+
|
|
25
|
+
location:
|
|
26
|
+
lat: 52.5070
|
|
27
|
+
lon: 14.1372
|
|
28
|
+
buffer_km: 25
|
|
29
|
+
|
|
30
|
+
time_range:
|
|
31
|
+
start_date: "1989-01-01"
|
|
32
|
+
end_date: "2020-12-31"
|
|
33
|
+
|
|
34
|
+
output:
|
|
35
|
+
out_dir: "./climdata/data/"
|
|
36
|
+
filename: "{provider}_{parameter}_LAT{lat}_LON{lon}_{start}_{end}.csv"
|
|
37
|
+
fmt: 'standard' # 'standard', 'ICASA', 'simplace', 'monica'
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
info:
|
|
2
|
+
tas:
|
|
3
|
+
cf_name: air_temperature
|
|
4
|
+
long_name: Near-surface air temperature
|
|
5
|
+
units: degC
|
|
6
|
+
tasmax:
|
|
7
|
+
cf_name: air_temperature
|
|
8
|
+
long_name: Daily maximum near-surface air temperature
|
|
9
|
+
units: degC
|
|
10
|
+
tasmin:
|
|
11
|
+
cf_name: air_temperature
|
|
12
|
+
long_name: Daily minimum near-surface air temperature
|
|
13
|
+
units: degC
|
|
14
|
+
pr:
|
|
15
|
+
cf_name: precipitation_flux
|
|
16
|
+
long_name: Precipitation
|
|
17
|
+
units: mm/day
|
|
18
|
+
pracc:
|
|
19
|
+
cf_name: precipitation_amount
|
|
20
|
+
long_name: Accumulated precipitation
|
|
21
|
+
units: mm
|
|
22
|
+
ps:
|
|
23
|
+
cf_name: surface_air_pressure
|
|
24
|
+
long_name: Surface air pressure
|
|
25
|
+
units: Pa
|
|
26
|
+
hurs:
|
|
27
|
+
cf_name: relative_humidity
|
|
28
|
+
long_name: Near-surface relative humidity
|
|
29
|
+
units: '%'
|
|
30
|
+
huss:
|
|
31
|
+
cf_name: specific_humidity
|
|
32
|
+
long_name: Near-surface specific humidity
|
|
33
|
+
units: 1 # kg/kg
|
|
34
|
+
uas:
|
|
35
|
+
cf_name: eastward_wind
|
|
36
|
+
long_name: Eastward near-surface wind
|
|
37
|
+
units: m s-1
|
|
38
|
+
vas:
|
|
39
|
+
cf_name: northward_wind
|
|
40
|
+
long_name: Northward near-surface wind
|
|
41
|
+
units: m s-1
|
|
42
|
+
sfcWind:
|
|
43
|
+
cf_name: wind_speed
|
|
44
|
+
long_name: Near-surface wind speed
|
|
45
|
+
units: m s-1
|
|
46
|
+
rsds:
|
|
47
|
+
cf_name: surface_downwelling_shortwave_flux_in_air
|
|
48
|
+
long_name: Surface downwelling shortwave radiation
|
|
49
|
+
units: W m-2
|
|
50
|
+
rlds:
|
|
51
|
+
cf_name: surface_downwelling_longwave_flux_in_air
|
|
52
|
+
long_name: Surface downwelling longwave radiation
|
|
53
|
+
units: W m-2
|
|
54
|
+
rlus:
|
|
55
|
+
cf_name: surface_upwelling_longwave_flux_in_air
|
|
56
|
+
long_name: Surface upwelling longwave radiation
|
|
57
|
+
units: W m-2
|
|
58
|
+
rlut:
|
|
59
|
+
cf_name: toa_outgoing_longwave_flux
|
|
60
|
+
long_name: Top-of-atmosphere outgoing longwave radiation
|
|
61
|
+
units: W m-2
|
|
62
|
+
psml:
|
|
63
|
+
cf_name: mean_sea_level_pressure
|
|
64
|
+
long_name: Mean sea level pressure
|
|
65
|
+
units: Pa
|
|
66
|
+
evspsbl:
|
|
67
|
+
cf_name: water_evapotranspiration_flux
|
|
68
|
+
long_name: Evaporation including sublimation and transpiration
|
|
69
|
+
units: mm/day
|
|
70
|
+
snd:
|
|
71
|
+
cf_name: surface_snow_thickness
|
|
72
|
+
long_name: Snow depth
|
|
73
|
+
units: m
|
|
74
|
+
snw:
|
|
75
|
+
cf_name: surface_snow_amount
|
|
76
|
+
long_name: Snow water equivalent
|
|
77
|
+
units: mm
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import glob
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import xarray as xr
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Optional, Dict, Union
|
|
7
|
+
from omegaconf import DictConfig
|
|
8
|
+
import warnings
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from tqdm.notebook import tqdm
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
13
|
+
from xclim.core import units
|
|
14
|
+
warnings.filterwarnings("ignore", category=Warning)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class CMIP:
|
|
18
|
+
def __init__(self, var_cfg: DictConfig, experiments):
|
|
19
|
+
self.var_cfg = var_cfg
|
|
20
|
+
self.files = []
|
|
21
|
+
self.dataset = None
|
|
22
|
+
self.experiments = experiments
|
|
23
|
+
|
|
24
|
+
def _subset_by_bounds(self, ds, bounds, lat_name='lat', lon_name='lon'):
|
|
25
|
+
return ds.sel(
|
|
26
|
+
**{
|
|
27
|
+
lat_name: slice(bounds['lat_min'], bounds['lat_max']),
|
|
28
|
+
lon_name: slice(bounds['lon_min'], bounds['lon_max'])
|
|
29
|
+
}
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
def _check_lat_lon(self, ds: xr.Dataset) -> xr.Dataset:
|
|
33
|
+
# Fix latitude ascending order
|
|
34
|
+
if "lat" in ds.coords:
|
|
35
|
+
lat = ds["lat"]
|
|
36
|
+
if lat.values[0] > lat.values[-1]: # descending
|
|
37
|
+
ds = ds.sortby("lat")
|
|
38
|
+
|
|
39
|
+
# Fix longitude range to -180 to 180
|
|
40
|
+
if "lon" in ds.coords:
|
|
41
|
+
lon = ds["lon"]
|
|
42
|
+
lon_vals = lon.values
|
|
43
|
+
if lon_vals.max() > 180:
|
|
44
|
+
lon_fixed = ((lon_vals + 180) % 360) - 180
|
|
45
|
+
ds = ds.assign_coords(lon=lon_fixed)
|
|
46
|
+
ds = ds.sortby("lon")
|
|
47
|
+
return ds
|
|
48
|
+
|
|
49
|
+
def fetch(self, base_dir,tbl_id):
|
|
50
|
+
nc_files = [
|
|
51
|
+
f
|
|
52
|
+
for exp in self.experiments
|
|
53
|
+
for f in glob.glob(
|
|
54
|
+
os.path.join(base_dir, "*/*/*", exp, f"*/{tbl_id}/*/*/*/*.nc"),
|
|
55
|
+
recursive=True
|
|
56
|
+
)
|
|
57
|
+
]
|
|
58
|
+
rows = []
|
|
59
|
+
for file_path in tqdm(nc_files, desc="Indexing CMIP6 files"):
|
|
60
|
+
parts = file_path.split(os.sep)
|
|
61
|
+
try:
|
|
62
|
+
activity_id = parts[6]
|
|
63
|
+
institution_id = parts[7]
|
|
64
|
+
source_id = parts[8]
|
|
65
|
+
experiment_id = parts[9]
|
|
66
|
+
member_id = parts[10]
|
|
67
|
+
table_id = parts[11]
|
|
68
|
+
variable_id = parts[12]
|
|
69
|
+
grid_label = parts[13]
|
|
70
|
+
version = parts[14]
|
|
71
|
+
except IndexError:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
# Extract start and end date from filename
|
|
75
|
+
fname = os.path.basename(file_path)
|
|
76
|
+
# Example: pr_day_MIROC6_ssp245-nat_r8i1p1f1_gn_20210101-20301231.nc
|
|
77
|
+
date_part = fname.split("_")[-1].replace(".nc", "")
|
|
78
|
+
start_str, end_str = date_part.split("-")
|
|
79
|
+
|
|
80
|
+
if tbl_id == 'Amon':
|
|
81
|
+
start_date = pd.to_datetime(start_str, format="%Y%m")
|
|
82
|
+
end_date = pd.to_datetime(end_str, format="%Y%m")
|
|
83
|
+
elif tbl_id == 'day':
|
|
84
|
+
start_date = pd.to_datetime(start_str, format="%Y%m%d")
|
|
85
|
+
end_date = pd.to_datetime(end_str, format="%Y%m%d")
|
|
86
|
+
rows.append({
|
|
87
|
+
"path": file_path,
|
|
88
|
+
"activity_id": activity_id,
|
|
89
|
+
"institution_id": institution_id,
|
|
90
|
+
"source_id": source_id,
|
|
91
|
+
"experiment_id": experiment_id,
|
|
92
|
+
"member_id": member_id,
|
|
93
|
+
"table_id": table_id,
|
|
94
|
+
"variable_id": variable_id,
|
|
95
|
+
"grid_label": grid_label,
|
|
96
|
+
"version": version,
|
|
97
|
+
"start_date": start_date,
|
|
98
|
+
"end_date": end_date
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
df = pd.DataFrame(rows)
|
|
102
|
+
# import ipdb; ipdb.set_trace()
|
|
103
|
+
# keep only experiments that match all requested
|
|
104
|
+
grouped = df.groupby(["institution_id", "source_id"])["experiment_id"].unique()
|
|
105
|
+
valid_pairs = grouped[grouped.apply(lambda exps: set(self.experiments).issubset(set(exps)))].index
|
|
106
|
+
df = df[df.set_index(["institution_id", "source_id"]).index.isin(valid_pairs)]
|
|
107
|
+
|
|
108
|
+
# keep only versions with "v"
|
|
109
|
+
df = df[df['version'].str.contains('v')]
|
|
110
|
+
|
|
111
|
+
# compute file-level duration
|
|
112
|
+
df["years"] = (df["end_date"] - df["start_date"]).dt.days / 365.25
|
|
113
|
+
|
|
114
|
+
# compute total duration per dataset
|
|
115
|
+
coverage = df.groupby(
|
|
116
|
+
["institution_id", "source_id", "experiment_id", "member_id", "variable_id", "grid_label"]
|
|
117
|
+
).agg(
|
|
118
|
+
total_years=("years", "sum"),
|
|
119
|
+
start=("start_date", "min"),
|
|
120
|
+
end=("end_date", "max"),
|
|
121
|
+
nfiles=("path", "count")
|
|
122
|
+
).reset_index()
|
|
123
|
+
|
|
124
|
+
# keep only groups with ≥ 60 years
|
|
125
|
+
valid_groups = coverage[coverage["total_years"] >= 60]
|
|
126
|
+
|
|
127
|
+
# filter original dataframe
|
|
128
|
+
df_filtered = df.merge(
|
|
129
|
+
valid_groups,
|
|
130
|
+
on=["institution_id", "source_id", "experiment_id", "member_id", "variable_id", "grid_label"],
|
|
131
|
+
how="inner"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
return df_filtered
|
|
135
|
+
|
|
136
|
+
def _process_var_model(self, var, model, df_filtered,subset_experiments):
|
|
137
|
+
ds_list = []
|
|
138
|
+
for exp in subset_experiments:
|
|
139
|
+
df_filtered_sub = df_filtered[
|
|
140
|
+
(df_filtered['variable_id'] == var) &
|
|
141
|
+
(df_filtered['source_id'] == model) &
|
|
142
|
+
(df_filtered['experiment_id'] == exp)
|
|
143
|
+
]
|
|
144
|
+
members = df_filtered_sub['member_id'].unique()
|
|
145
|
+
for i,member in enumerate(members[:3]):
|
|
146
|
+
df_filt = df_filtered_sub[
|
|
147
|
+
(df_filtered_sub['experiment_id'] == exp) &
|
|
148
|
+
(df_filtered_sub['member_id'] == member)
|
|
149
|
+
]
|
|
150
|
+
if df_filt.empty:
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
paths = df_filt['path'].values
|
|
154
|
+
ds = xr.open_mfdataset(paths, combine="by_coords", chunks={"time": 365})
|
|
155
|
+
if var == "pr":
|
|
156
|
+
ds[var] = units.convert_units_to(ds[var], "mm d-1")
|
|
157
|
+
elif var in ["tas", "tasmax", "tasmin"]:
|
|
158
|
+
ds[var] = units.convert_units_to(ds[var], "degC")
|
|
159
|
+
ds = self._check_lat_lon(ds)
|
|
160
|
+
ds_europe = self._subset_by_bounds(
|
|
161
|
+
ds,
|
|
162
|
+
self.var_cfg.bounds[self.var_cfg.region]
|
|
163
|
+
)
|
|
164
|
+
ds_list.append(ds_europe.expand_dims({
|
|
165
|
+
"experiment": [exp],
|
|
166
|
+
"member": [i]
|
|
167
|
+
}))
|
|
168
|
+
|
|
169
|
+
if ds_list:
|
|
170
|
+
ds_list = xr.align(*ds_list, join="inner", exclude=["experiment", "member"])
|
|
171
|
+
combined_ds = xr.combine_by_coords(ds_list, combine_attrs="override")
|
|
172
|
+
return (var, model, combined_ds)
|
|
173
|
+
else:
|
|
174
|
+
return (var, model, None)
|
|
175
|
+
|
|
176
|
+
def load(self, df_filtered, vars_of_interest, subset_experiments = ["historical", "hist-aer", "hist-GHG"]):
|
|
177
|
+
data_dict = defaultdict(dict)
|
|
178
|
+
var_model_pairs = list(
|
|
179
|
+
df_filtered[df_filtered['variable_id'].isin(vars_of_interest)]
|
|
180
|
+
[['variable_id', 'source_id']]
|
|
181
|
+
.drop_duplicates()
|
|
182
|
+
.itertuples(index=False, name=None)
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
with ProcessPoolExecutor(max_workers=4) as executor:
|
|
186
|
+
futures = [
|
|
187
|
+
executor.submit(self._process_var_model, var, model, df_filtered, subset_experiments)
|
|
188
|
+
for var, model in var_model_pairs
|
|
189
|
+
]
|
|
190
|
+
for f in futures:
|
|
191
|
+
var, model, ds = f.result()
|
|
192
|
+
if ds is not None:
|
|
193
|
+
data_dict[model][var] = ds.chunk({'lat': 10, 'lon': 10, 'time': -1})[var]
|
|
194
|
+
self.dataset = data_dict
|
|
195
|
+
return data_dict
|
|
196
|
+
|
|
197
|
+
def to_zarr(self,dataset):
|
|
198
|
+
if self.dataset is None:
|
|
199
|
+
raise ValueError("No dataset loaded. Call `load()` before `to_zarr()`.")
|
|
200
|
+
for var_name in self.dataset.keys():
|
|
201
|
+
for mod_name in self.dataset[var_name].keys():
|
|
202
|
+
ds_model = self.dataset[var_name][mod_name]
|
|
203
|
+
|
|
204
|
+
dataset_name = mod_name
|
|
205
|
+
region = self.var_cfg.region
|
|
206
|
+
|
|
207
|
+
if var_name == 'pr':
|
|
208
|
+
self.dataset.attrs['units'] = 'kg m-2 s-1'
|
|
209
|
+
elif var_name in ['tas', 'tasmax', 'tasmin']:
|
|
210
|
+
self.dataset.attrs['units'] = 'degC'
|
|
211
|
+
|
|
212
|
+
zarr_filename = self.var_cfg.output.filename.format(
|
|
213
|
+
index=var_name,
|
|
214
|
+
dataset=dataset_name,
|
|
215
|
+
region=region,
|
|
216
|
+
start=self.var_cfg.time_range.start_date,
|
|
217
|
+
end=self.var_cfg.time_range.end_date,
|
|
218
|
+
freq='1D',
|
|
219
|
+
)
|
|
220
|
+
zarr_path = os.path.join(f"data/{mod_name}/", zarr_filename)
|
|
221
|
+
os.makedirs(os.path.dirname(zarr_path), exist_ok=True)
|
|
222
|
+
|
|
223
|
+
print(f"💾 Saving {var_name} to Zarr: {zarr_path}")
|
|
224
|
+
self.dataset.to_zarr(zarr_path, mode="w")
|