climdata 0.0.6__tar.gz → 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of climdata might be problematic. Click here for more details.
- {climdata-0.0.6 → climdata-0.1.0}/PKG-INFO +1 -1
- {climdata-0.0.6 → climdata-0.1.0}/climdata/__init__.py +3 -1
- {climdata-0.0.6 → climdata-0.1.0}/climdata/conf/config.yaml +2 -1
- climdata-0.1.0/climdata/datasets/CMIPCloud.py +149 -0
- climdata-0.0.6/climdata/datasets/CMIP.py → climdata-0.1.0/climdata/datasets/CMIPlocal.py +1 -1
- {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/PKG-INFO +1 -1
- {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/SOURCES.txt +2 -1
- climdata-0.1.0/examples/extract_dwd_loc.ipynb +2429 -0
- {climdata-0.0.6 → climdata-0.1.0}/pyproject.toml +2 -2
- climdata-0.0.6/examples/extract_dwd_loc.ipynb +0 -310
- {climdata-0.0.6 → climdata-0.1.0}/.editorconfig +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/docs-build.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/docs.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/installation.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/macos.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/pypi.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/ubuntu.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/windows.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/.gitignore +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/LICENSE +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/MANIFEST.in +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/README.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/__main__.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/conf/mappings/parameters.yaml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/conf/mappings/variables.yaml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/datasets/DWD.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/datasets/ERA5.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/datasets/MSWX.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/main.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/utils/__init__.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/utils/config.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata/utils/utils_download.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/dependency_links.txt +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/entry_points.txt +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/requires.txt +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/top_level.txt +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/changelog.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/climdata.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/common.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/contributing.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/faq.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/index.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/installation.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/overrides/main.html +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/docs/usage.md +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/dwd_tas_LAT52.507_LON14.1372_1989-01-01_2020-12-31.csv +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/examples/zarr_tas_data/metadata.json +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/mkdocs.yml +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/requirements.txt +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/requirements_dev.txt +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/setup.cfg +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/tests/__init__.py +0 -0
- {climdata-0.0.6 → climdata-0.1.0}/tests/test_climdata.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climdata
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
|
|
5
5
|
Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -2,11 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
__author__ = """Kaushik Muduchuru"""
|
|
4
4
|
__email__ = "kaushik.reddy.m@gmail.com"
|
|
5
|
-
__version__ = "0.0
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
6
|
|
|
7
7
|
from .utils.utils_download import * # etc.
|
|
8
8
|
from .utils.config import load_config
|
|
9
9
|
from .datasets.DWD import DWDmirror as DWD
|
|
10
10
|
from .datasets.MSWX import MSWXmirror as MSWX
|
|
11
11
|
from .datasets.ERA5 import ERA5Mirror as ERA5
|
|
12
|
+
from .datasets.CMIPlocal import CMIPmirror as CMIPlocal
|
|
13
|
+
from .datasets.CMIPCloud import CMIPCloud as CMIP
|
|
12
14
|
|
|
@@ -33,5 +33,6 @@ time_range:
|
|
|
33
33
|
|
|
34
34
|
output:
|
|
35
35
|
out_dir: "./climdata/data/"
|
|
36
|
-
|
|
36
|
+
filename_csv: "{provider}_{parameter}_LAT_{lat}_LON_{lon}_{start}_{end}.csv"
|
|
37
|
+
filename_zarr: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.zarr"
|
|
37
38
|
fmt: 'standard' # 'standard', 'ICASA', 'simplace', 'monica'
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import intake
|
|
2
|
+
import xarray as xr
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
class CMIPCloud:
|
|
6
|
+
def __init__(self, experiment_id, source_id, table_id, variables, region_bounds=None):
|
|
7
|
+
self.experiment_id = experiment_id
|
|
8
|
+
self.source_id = source_id
|
|
9
|
+
self.table_id = table_id
|
|
10
|
+
self.variables = variables
|
|
11
|
+
self.region_bounds = region_bounds
|
|
12
|
+
self.col_subsets = []
|
|
13
|
+
self.ds = None
|
|
14
|
+
|
|
15
|
+
def fetch(self):
|
|
16
|
+
"""Collect intake catalog subsets for each variable."""
|
|
17
|
+
col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
|
|
18
|
+
self.col_subsets = []
|
|
19
|
+
for var in self.variables:
|
|
20
|
+
query = dict(
|
|
21
|
+
experiment_id=[self.experiment_id],
|
|
22
|
+
source_id=self.source_id,
|
|
23
|
+
table_id=self.table_id,
|
|
24
|
+
variable_id=var,
|
|
25
|
+
)
|
|
26
|
+
col_subset = col.search(require_all_on=["source_id"], **query)
|
|
27
|
+
if len(col_subset.df) == 0:
|
|
28
|
+
continue
|
|
29
|
+
self.col_subsets.append(col_subset)
|
|
30
|
+
return self.col_subsets
|
|
31
|
+
|
|
32
|
+
def load(self):
|
|
33
|
+
"""Load and merge datasets from collected col_subsets."""
|
|
34
|
+
datasets = []
|
|
35
|
+
for col_subset in self.col_subsets:
|
|
36
|
+
zstore_path = col_subset.df.zstore.values[0].replace('gs:/', "https://storage.googleapis.com")
|
|
37
|
+
ds_var = xr.open_zarr(zstore_path)
|
|
38
|
+
datasets.append(ds_var)
|
|
39
|
+
if datasets:
|
|
40
|
+
self.ds = xr.merge(datasets)
|
|
41
|
+
else:
|
|
42
|
+
self.ds = None
|
|
43
|
+
return self.ds
|
|
44
|
+
|
|
45
|
+
def extract(self, *, point=None, box=None, shapefile=None, buffer_km=0.0):
|
|
46
|
+
"""
|
|
47
|
+
Extract a subset of the dataset by point, bounding box (dict), or shapefile.
|
|
48
|
+
"""
|
|
49
|
+
import geopandas as gpd
|
|
50
|
+
from shapely.geometry import mapping
|
|
51
|
+
|
|
52
|
+
if self.ds is None:
|
|
53
|
+
raise ValueError("No dataset loaded. Call `load()` first.")
|
|
54
|
+
|
|
55
|
+
ds = self.ds
|
|
56
|
+
|
|
57
|
+
if point is not None:
|
|
58
|
+
lon, lat = point
|
|
59
|
+
if buffer_km > 0:
|
|
60
|
+
buffer_deg = buffer_km / 111
|
|
61
|
+
ds_subset = ds.sel(
|
|
62
|
+
lon=slice(lon-buffer_deg, lon+buffer_deg),
|
|
63
|
+
lat=slice(lat-buffer_deg, lat+buffer_deg)
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
ds_subset = ds.sel(lon=lon, lat=lat, method="nearest")
|
|
67
|
+
|
|
68
|
+
elif box is not None:
|
|
69
|
+
# Accept dict: {'lat_min': ..., 'lat_max': ..., 'lon_min': ..., 'lon_max': ...}
|
|
70
|
+
ds_subset = ds.sel(
|
|
71
|
+
lon=slice(box['lon_min'], box['lon_max']),
|
|
72
|
+
lat=slice(box['lat_min'], box['lat_max'])
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
elif shapefile is not None:
|
|
76
|
+
if isinstance(shapefile, str):
|
|
77
|
+
gdf = gpd.read_file(shapefile)
|
|
78
|
+
else:
|
|
79
|
+
gdf = shapefile
|
|
80
|
+
if buffer_km > 0:
|
|
81
|
+
gdf = gdf.to_crs(epsg=3857)
|
|
82
|
+
gdf["geometry"] = gdf.buffer(buffer_km * 1000)
|
|
83
|
+
gdf = gdf.to_crs(epsg=4326)
|
|
84
|
+
geom = [mapping(g) for g in gdf.geometry]
|
|
85
|
+
import rioxarray
|
|
86
|
+
ds = ds.rio.write_crs("EPSG:4326", inplace=False)
|
|
87
|
+
ds_subset = ds.rio.clip(geom, gdf.crs, drop=True)
|
|
88
|
+
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError("Must provide either point, box, or shapefile.")
|
|
91
|
+
self.ds = ds_subset
|
|
92
|
+
return ds_subset
|
|
93
|
+
def _subset_time(self, start_date, end_date):
|
|
94
|
+
"""
|
|
95
|
+
Subset the dataset by time range.
|
|
96
|
+
Dates should be strings in 'YYYY-MM-DD' format.
|
|
97
|
+
"""
|
|
98
|
+
if self.ds is None:
|
|
99
|
+
return None
|
|
100
|
+
ds_time = self.ds.sel(time=slice(start_date, end_date))
|
|
101
|
+
self.ds = ds_time
|
|
102
|
+
return ds_time
|
|
103
|
+
|
|
104
|
+
def save_netcdf(self, filename):
|
|
105
|
+
if self.ds is not None:
|
|
106
|
+
if "time" in self.ds.variables:
|
|
107
|
+
self.ds["time"].encoding.clear()
|
|
108
|
+
self.ds.to_netcdf(filename)
|
|
109
|
+
print(f"Saved NetCDF to {filename}")
|
|
110
|
+
|
|
111
|
+
def save_zarr(self, store_path):
|
|
112
|
+
if self.ds is not None:
|
|
113
|
+
self.ds.to_zarr(store_path, mode="w")
|
|
114
|
+
print(f"Saved Zarr to {store_path}")
|
|
115
|
+
|
|
116
|
+
def _format(self, df):
|
|
117
|
+
"""
|
|
118
|
+
Format the dataframe for standardized output:
|
|
119
|
+
- Adds source_id, experiment_id, table_id, variable, value, units columns.
|
|
120
|
+
- Stacks variables into long format.
|
|
121
|
+
"""
|
|
122
|
+
# Melt the dataframe to long format: variable, value
|
|
123
|
+
value_vars = [v for v in self.variables if v in df.columns]
|
|
124
|
+
id_vars = [c for c in df.columns if c not in value_vars]
|
|
125
|
+
df_long = df.melt(id_vars=id_vars, value_vars=value_vars,
|
|
126
|
+
var_name="variable", value_name="value")
|
|
127
|
+
|
|
128
|
+
# Add units column (from attrs)
|
|
129
|
+
df_long["units"] = df_long["variable"].map(
|
|
130
|
+
lambda v: self.ds[v].attrs.get("units", "unknown") if v in self.ds.data_vars else "unknown"
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Add metadata columns if missing
|
|
134
|
+
df_long["source_id"] = self.source_id
|
|
135
|
+
df_long["experiment_id"] = self.experiment_id
|
|
136
|
+
df_long["table_id"] = self.table_id
|
|
137
|
+
|
|
138
|
+
# Reorder columns
|
|
139
|
+
cols = ["source_id", "experiment_id", "table_id", "time", "lat", "lon", "variable", "value", "units"]
|
|
140
|
+
df_long = df_long[[c for c in cols if c in df_long.columns]]
|
|
141
|
+
|
|
142
|
+
return df_long
|
|
143
|
+
|
|
144
|
+
def save_csv(self, filename):
|
|
145
|
+
if self.ds is not None:
|
|
146
|
+
df = self.ds.to_dataframe().reset_index()
|
|
147
|
+
df = self._format(df)
|
|
148
|
+
df.to_csv(filename, index=False)
|
|
149
|
+
print(f"Saved CSV to {filename}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climdata
|
|
3
|
-
Version: 0.0
|
|
3
|
+
Version: 0.1.0
|
|
4
4
|
Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
|
|
5
5
|
Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -30,7 +30,8 @@ climdata.egg-info/top_level.txt
|
|
|
30
30
|
climdata/conf/config.yaml
|
|
31
31
|
climdata/conf/mappings/parameters.yaml
|
|
32
32
|
climdata/conf/mappings/variables.yaml
|
|
33
|
-
climdata/datasets/
|
|
33
|
+
climdata/datasets/CMIPCloud.py
|
|
34
|
+
climdata/datasets/CMIPlocal.py
|
|
34
35
|
climdata/datasets/DWD.py
|
|
35
36
|
climdata/datasets/ERA5.py
|
|
36
37
|
climdata/datasets/MSWX.py
|