climdata 0.0.6__tar.gz → 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of climdata might be problematic. Click here for more details.

Files changed (56) hide show
  1. {climdata-0.0.6 → climdata-0.1.0}/PKG-INFO +1 -1
  2. {climdata-0.0.6 → climdata-0.1.0}/climdata/__init__.py +3 -1
  3. {climdata-0.0.6 → climdata-0.1.0}/climdata/conf/config.yaml +2 -1
  4. climdata-0.1.0/climdata/datasets/CMIPCloud.py +149 -0
  5. climdata-0.0.6/climdata/datasets/CMIP.py → climdata-0.1.0/climdata/datasets/CMIPlocal.py +1 -1
  6. {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/PKG-INFO +1 -1
  7. {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/SOURCES.txt +2 -1
  8. climdata-0.1.0/examples/extract_dwd_loc.ipynb +2429 -0
  9. {climdata-0.0.6 → climdata-0.1.0}/pyproject.toml +2 -2
  10. climdata-0.0.6/examples/extract_dwd_loc.ipynb +0 -310
  11. {climdata-0.0.6 → climdata-0.1.0}/.editorconfig +0 -0
  12. {climdata-0.0.6 → climdata-0.1.0}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  13. {climdata-0.0.6 → climdata-0.1.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  14. {climdata-0.0.6 → climdata-0.1.0}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  15. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/docs-build.yml +0 -0
  16. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/docs.yml +0 -0
  17. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/installation.yml +0 -0
  18. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/macos.yml +0 -0
  19. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/pypi.yml +0 -0
  20. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/ubuntu.yml +0 -0
  21. {climdata-0.0.6 → climdata-0.1.0}/.github/workflows/windows.yml +0 -0
  22. {climdata-0.0.6 → climdata-0.1.0}/.gitignore +0 -0
  23. {climdata-0.0.6 → climdata-0.1.0}/LICENSE +0 -0
  24. {climdata-0.0.6 → climdata-0.1.0}/MANIFEST.in +0 -0
  25. {climdata-0.0.6 → climdata-0.1.0}/README.md +0 -0
  26. {climdata-0.0.6 → climdata-0.1.0}/climdata/__main__.py +0 -0
  27. {climdata-0.0.6 → climdata-0.1.0}/climdata/conf/mappings/parameters.yaml +0 -0
  28. {climdata-0.0.6 → climdata-0.1.0}/climdata/conf/mappings/variables.yaml +0 -0
  29. {climdata-0.0.6 → climdata-0.1.0}/climdata/datasets/DWD.py +0 -0
  30. {climdata-0.0.6 → climdata-0.1.0}/climdata/datasets/ERA5.py +0 -0
  31. {climdata-0.0.6 → climdata-0.1.0}/climdata/datasets/MSWX.py +0 -0
  32. {climdata-0.0.6 → climdata-0.1.0}/climdata/main.py +0 -0
  33. {climdata-0.0.6 → climdata-0.1.0}/climdata/utils/__init__.py +0 -0
  34. {climdata-0.0.6 → climdata-0.1.0}/climdata/utils/config.py +0 -0
  35. {climdata-0.0.6 → climdata-0.1.0}/climdata/utils/utils_download.py +0 -0
  36. {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/dependency_links.txt +0 -0
  37. {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/entry_points.txt +0 -0
  38. {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/requires.txt +0 -0
  39. {climdata-0.0.6 → climdata-0.1.0}/climdata.egg-info/top_level.txt +0 -0
  40. {climdata-0.0.6 → climdata-0.1.0}/docs/changelog.md +0 -0
  41. {climdata-0.0.6 → climdata-0.1.0}/docs/climdata.md +0 -0
  42. {climdata-0.0.6 → climdata-0.1.0}/docs/common.md +0 -0
  43. {climdata-0.0.6 → climdata-0.1.0}/docs/contributing.md +0 -0
  44. {climdata-0.0.6 → climdata-0.1.0}/docs/faq.md +0 -0
  45. {climdata-0.0.6 → climdata-0.1.0}/docs/index.md +0 -0
  46. {climdata-0.0.6 → climdata-0.1.0}/docs/installation.md +0 -0
  47. {climdata-0.0.6 → climdata-0.1.0}/docs/overrides/main.html +0 -0
  48. {climdata-0.0.6 → climdata-0.1.0}/docs/usage.md +0 -0
  49. {climdata-0.0.6 → climdata-0.1.0}/dwd_tas_LAT52.507_LON14.1372_1989-01-01_2020-12-31.csv +0 -0
  50. {climdata-0.0.6 → climdata-0.1.0}/examples/zarr_tas_data/metadata.json +0 -0
  51. {climdata-0.0.6 → climdata-0.1.0}/mkdocs.yml +0 -0
  52. {climdata-0.0.6 → climdata-0.1.0}/requirements.txt +0 -0
  53. {climdata-0.0.6 → climdata-0.1.0}/requirements_dev.txt +0 -0
  54. {climdata-0.0.6 → climdata-0.1.0}/setup.cfg +0 -0
  55. {climdata-0.0.6 → climdata-0.1.0}/tests/__init__.py +0 -0
  56. {climdata-0.0.6 → climdata-0.1.0}/tests/test_climdata.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climdata
3
- Version: 0.0.6
3
+ Version: 0.1.0
4
4
  Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
5
5
  Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
6
6
  License: MIT License
@@ -2,11 +2,13 @@
2
2
 
3
3
  __author__ = """Kaushik Muduchuru"""
4
4
  __email__ = "kaushik.reddy.m@gmail.com"
5
- __version__ = "0.0.6"
5
+ __version__ = "0.1.0"
6
6
 
7
7
  from .utils.utils_download import * # etc.
8
8
  from .utils.config import load_config
9
9
  from .datasets.DWD import DWDmirror as DWD
10
10
  from .datasets.MSWX import MSWXmirror as MSWX
11
11
  from .datasets.ERA5 import ERA5Mirror as ERA5
12
+ from .datasets.CMIPlocal import CMIPmirror as CMIPlocal
13
+ from .datasets.CMIPCloud import CMIPCloud as CMIP
12
14
 
@@ -33,5 +33,6 @@ time_range:
33
33
 
34
34
  output:
35
35
  out_dir: "./climdata/data/"
36
- filename: "{provider}_{parameter}_LAT{lat}_LON{lon}_{start}_{end}.csv"
36
+ filename_csv: "{provider}_{parameter}_LAT_{lat}_LON_{lon}_{start}_{end}.csv"
37
+ filename_zarr: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.zarr"
37
38
  fmt: 'standard' # 'standard', 'ICASA', 'simplace', 'monica'
@@ -0,0 +1,149 @@
1
+ import intake
2
+ import xarray as xr
3
+ import pandas as pd
4
+
5
+ class CMIPCloud:
6
+ def __init__(self, experiment_id, source_id, table_id, variables, region_bounds=None):
7
+ self.experiment_id = experiment_id
8
+ self.source_id = source_id
9
+ self.table_id = table_id
10
+ self.variables = variables
11
+ self.region_bounds = region_bounds
12
+ self.col_subsets = []
13
+ self.ds = None
14
+
15
+ def fetch(self):
16
+ """Collect intake catalog subsets for each variable."""
17
+ col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
18
+ self.col_subsets = []
19
+ for var in self.variables:
20
+ query = dict(
21
+ experiment_id=[self.experiment_id],
22
+ source_id=self.source_id,
23
+ table_id=self.table_id,
24
+ variable_id=var,
25
+ )
26
+ col_subset = col.search(require_all_on=["source_id"], **query)
27
+ if len(col_subset.df) == 0:
28
+ continue
29
+ self.col_subsets.append(col_subset)
30
+ return self.col_subsets
31
+
32
+ def load(self):
33
+ """Load and merge datasets from collected col_subsets."""
34
+ datasets = []
35
+ for col_subset in self.col_subsets:
36
+ zstore_path = col_subset.df.zstore.values[0].replace('gs:/', "https://storage.googleapis.com")
37
+ ds_var = xr.open_zarr(zstore_path)
38
+ datasets.append(ds_var)
39
+ if datasets:
40
+ self.ds = xr.merge(datasets)
41
+ else:
42
+ self.ds = None
43
+ return self.ds
44
+
45
+ def extract(self, *, point=None, box=None, shapefile=None, buffer_km=0.0):
46
+ """
47
+ Extract a subset of the dataset by point, bounding box (dict), or shapefile.
48
+ """
49
+ import geopandas as gpd
50
+ from shapely.geometry import mapping
51
+
52
+ if self.ds is None:
53
+ raise ValueError("No dataset loaded. Call `load()` first.")
54
+
55
+ ds = self.ds
56
+
57
+ if point is not None:
58
+ lon, lat = point
59
+ if buffer_km > 0:
60
+ buffer_deg = buffer_km / 111
61
+ ds_subset = ds.sel(
62
+ lon=slice(lon-buffer_deg, lon+buffer_deg),
63
+ lat=slice(lat-buffer_deg, lat+buffer_deg)
64
+ )
65
+ else:
66
+ ds_subset = ds.sel(lon=lon, lat=lat, method="nearest")
67
+
68
+ elif box is not None:
69
+ # Accept dict: {'lat_min': ..., 'lat_max': ..., 'lon_min': ..., 'lon_max': ...}
70
+ ds_subset = ds.sel(
71
+ lon=slice(box['lon_min'], box['lon_max']),
72
+ lat=slice(box['lat_min'], box['lat_max'])
73
+ )
74
+
75
+ elif shapefile is not None:
76
+ if isinstance(shapefile, str):
77
+ gdf = gpd.read_file(shapefile)
78
+ else:
79
+ gdf = shapefile
80
+ if buffer_km > 0:
81
+ gdf = gdf.to_crs(epsg=3857)
82
+ gdf["geometry"] = gdf.buffer(buffer_km * 1000)
83
+ gdf = gdf.to_crs(epsg=4326)
84
+ geom = [mapping(g) for g in gdf.geometry]
85
+ import rioxarray
86
+ ds = ds.rio.write_crs("EPSG:4326", inplace=False)
87
+ ds_subset = ds.rio.clip(geom, gdf.crs, drop=True)
88
+
89
+ else:
90
+ raise ValueError("Must provide either point, box, or shapefile.")
91
+ self.ds = ds_subset
92
+ return ds_subset
93
+ def _subset_time(self, start_date, end_date):
94
+ """
95
+ Subset the dataset by time range.
96
+ Dates should be strings in 'YYYY-MM-DD' format.
97
+ """
98
+ if self.ds is None:
99
+ return None
100
+ ds_time = self.ds.sel(time=slice(start_date, end_date))
101
+ self.ds = ds_time
102
+ return ds_time
103
+
104
+ def save_netcdf(self, filename):
105
+ if self.ds is not None:
106
+ if "time" in self.ds.variables:
107
+ self.ds["time"].encoding.clear()
108
+ self.ds.to_netcdf(filename)
109
+ print(f"Saved NetCDF to {filename}")
110
+
111
+ def save_zarr(self, store_path):
112
+ if self.ds is not None:
113
+ self.ds.to_zarr(store_path, mode="w")
114
+ print(f"Saved Zarr to {store_path}")
115
+
116
+ def _format(self, df):
117
+ """
118
+ Format the dataframe for standardized output:
119
+ - Adds source_id, experiment_id, table_id, variable, value, units columns.
120
+ - Stacks variables into long format.
121
+ """
122
+ # Melt the dataframe to long format: variable, value
123
+ value_vars = [v for v in self.variables if v in df.columns]
124
+ id_vars = [c for c in df.columns if c not in value_vars]
125
+ df_long = df.melt(id_vars=id_vars, value_vars=value_vars,
126
+ var_name="variable", value_name="value")
127
+
128
+ # Add units column (from attrs)
129
+ df_long["units"] = df_long["variable"].map(
130
+ lambda v: self.ds[v].attrs.get("units", "unknown") if v in self.ds.data_vars else "unknown"
131
+ )
132
+
133
+ # Add metadata columns if missing
134
+ df_long["source_id"] = self.source_id
135
+ df_long["experiment_id"] = self.experiment_id
136
+ df_long["table_id"] = self.table_id
137
+
138
+ # Reorder columns
139
+ cols = ["source_id", "experiment_id", "table_id", "time", "lat", "lon", "variable", "value", "units"]
140
+ df_long = df_long[[c for c in cols if c in df_long.columns]]
141
+
142
+ return df_long
143
+
144
+ def save_csv(self, filename):
145
+ if self.ds is not None:
146
+ df = self.ds.to_dataframe().reset_index()
147
+ df = self._format(df)
148
+ df.to_csv(filename, index=False)
149
+ print(f"Saved CSV to {filename}")
@@ -14,7 +14,7 @@ from xclim.core import units
14
14
  warnings.filterwarnings("ignore", category=Warning)
15
15
 
16
16
 
17
- class CMIP:
17
+ class CMIPmirror:
18
18
  def __init__(self, var_cfg: DictConfig, experiments):
19
19
  self.var_cfg = var_cfg
20
20
  self.files = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climdata
3
- Version: 0.0.6
3
+ Version: 0.1.0
4
4
  Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
5
5
  Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
6
6
  License: MIT License
@@ -30,7 +30,8 @@ climdata.egg-info/top_level.txt
30
30
  climdata/conf/config.yaml
31
31
  climdata/conf/mappings/parameters.yaml
32
32
  climdata/conf/mappings/variables.yaml
33
- climdata/datasets/CMIP.py
33
+ climdata/datasets/CMIPCloud.py
34
+ climdata/datasets/CMIPlocal.py
34
35
  climdata/datasets/DWD.py
35
36
  climdata/datasets/ERA5.py
36
37
  climdata/datasets/MSWX.py