climdata 0.0.2__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/docs.yml +1 -1
- climdata-0.3.4/.github/workflows/pypi.yml +43 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/ubuntu.yml +0 -1
- {climdata-0.0.2 → climdata-0.3.4}/.gitignore +3 -1
- {climdata-0.0.2 → climdata-0.3.4}/MANIFEST.in +2 -0
- climdata-0.3.4/PKG-INFO +304 -0
- climdata-0.3.4/README.md +235 -0
- climdata-0.3.4/climdata/__init__.py +16 -0
- climdata-0.3.4/climdata/conf/config.yaml +43 -0
- climdata-0.3.4/climdata/conf/mappings/indices.yaml +338 -0
- climdata-0.3.4/climdata/conf/mappings/parameters.yaml +218 -0
- climdata-0.3.4/climdata/conf/mappings/variables.yaml +76 -0
- climdata-0.3.4/climdata/datasets/CMIPCloud.py +186 -0
- climdata-0.3.4/climdata/datasets/CMIPlocal.py +224 -0
- climdata-0.3.4/climdata/datasets/DWD.py +170 -0
- climdata-0.3.4/climdata/datasets/ERA5.py +322 -0
- climdata-0.3.4/climdata/datasets/HYRAS.py +497 -0
- climdata-0.3.4/climdata/datasets/ISD_down.sh +90 -0
- climdata-0.3.4/climdata/datasets/MSWX.py +269 -0
- climdata-0.3.4/climdata/extremes/calc_extremes.py +22 -0
- climdata-0.3.4/climdata/extremes/indices.py +101 -0
- climdata-0.3.4/climdata/impute/brits.py +100 -0
- climdata-0.3.4/climdata/utils/config.py +30 -0
- climdata-0.3.4/climdata/utils/utils_download.py +250 -0
- climdata-0.3.4/climdata/utils/wrapper.py +263 -0
- climdata-0.3.4/climdata.egg-info/PKG-INFO +304 -0
- {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/SOURCES.txt +18 -6
- {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/requires.txt +7 -0
- climdata-0.3.4/docs/climdata.md +4 -0
- climdata-0.3.4/docs/common.md +41 -0
- climdata-0.0.2/README.md → climdata-0.3.4/docs/index.md +3 -1
- climdata-0.3.4/examples/climdata_cli.py +49 -0
- climdata-0.3.4/examples/extremes.ipynb +904 -0
- climdata-0.3.4/examples/wrapper.ipynb +330 -0
- {climdata-0.0.2 → climdata-0.3.4}/mkdocs.yml +17 -21
- {climdata-0.0.2 → climdata-0.3.4}/pyproject.toml +8 -2
- {climdata-0.0.2 → climdata-0.3.4}/requirements.txt +7 -0
- climdata-0.3.4/tests/test_climdata.py +9 -0
- climdata-0.3.4/usecase/extremes_custom.ipynb +610 -0
- climdata-0.3.4/usecase/impute.ipynb +564 -0
- climdata-0.3.4/usecase/simplace_dataprep.ipynb +1639 -0
- climdata-0.0.2/.github/workflows/pypi.yml +0 -30
- climdata-0.0.2/PKG-INFO +0 -253
- climdata-0.0.2/climdata/__init__.py +0 -8
- climdata-0.0.2/climdata/__main__.py +0 -5
- climdata-0.0.2/climdata/conf/config.yaml +0 -23
- climdata-0.0.2/climdata/conf/mappings/parameters.yaml +0 -172
- climdata-0.0.2/climdata/datasets/DWD.py +0 -73
- climdata-0.0.2/climdata/datasets/MSWX.py +0 -195
- climdata-0.0.2/climdata/main.py +0 -56
- climdata-0.0.2/climdata/requirements.txt +0 -20
- climdata-0.0.2/climdata/utils/config.py +0 -30
- climdata-0.0.2/climdata/utils/utils_download.py +0 -976
- climdata-0.0.2/climdata.egg-info/PKG-INFO +0 -253
- climdata-0.0.2/docs/climdata.md +0 -4
- climdata-0.0.2/docs/common.md +0 -3
- climdata-0.0.2/docs/examples/run_downloader.ipynb +0 -1244
- climdata-0.0.2/docs/index.md +0 -16
- climdata-0.0.2/dwd_tas_LAT52.507_LON14.1372_1989-01-01_2020-12-31.csv +0 -11506
- climdata-0.0.2/tests/test_climdata.py +0 -21
- {climdata-0.0.2 → climdata-0.3.4}/.editorconfig +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/docs-build.yml +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/installation.yml +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/macos.yml +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/windows.yml +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/LICENSE +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/climdata/utils/__init__.py +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/dependency_links.txt +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/entry_points.txt +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/top_level.txt +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/docs/changelog.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/docs/contributing.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/docs/faq.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/docs/installation.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/docs/overrides/main.html +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/docs/usage.md +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/requirements_dev.txt +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/setup.cfg +0 -0
- {climdata-0.0.2 → climdata-0.3.4}/tests/__init__.py +0 -0
|
@@ -23,7 +23,7 @@ jobs:
|
|
|
23
23
|
pip install .
|
|
24
24
|
- name: Discover typos with codespell
|
|
25
25
|
run: |
|
|
26
|
-
codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,hist"
|
|
26
|
+
codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,hist" || true
|
|
27
27
|
- name: PKG-TEST
|
|
28
28
|
run: |
|
|
29
29
|
python -m unittest discover tests/
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*" # Trigger when pushing tags like v0.1.0
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
release:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
|
|
12
|
+
steps:
|
|
13
|
+
# 1️⃣ Checkout code
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
with:
|
|
16
|
+
fetch-depth: 0
|
|
17
|
+
|
|
18
|
+
# 2️⃣ Set up Python
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.11"
|
|
23
|
+
|
|
24
|
+
# 3️⃣ Upgrade pip and install build/test dependencies
|
|
25
|
+
- name: Install dependencies
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip
|
|
28
|
+
pip install -r requirements.txt
|
|
29
|
+
pip install build twine wheel setuptools
|
|
30
|
+
|
|
31
|
+
# 4️⃣ Run unit tests
|
|
32
|
+
- name: Run tests
|
|
33
|
+
run: |
|
|
34
|
+
python -m unittest discover tests/
|
|
35
|
+
|
|
36
|
+
# 5️⃣ Build the package
|
|
37
|
+
- name: Build package
|
|
38
|
+
run: python -m build
|
|
39
|
+
|
|
40
|
+
# 6️⃣ Publish to PyPI
|
|
41
|
+
- name: Publish to PyPI
|
|
42
|
+
run: |
|
|
43
|
+
python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} --verbose
|
climdata-0.3.4/PKG-INFO
ADDED
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: climdata
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
|
|
5
|
+
Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
Project-URL: Homepage, https://github.com/Kaushikreddym/climdata
|
|
8
|
+
Keywords: climdata
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Natural Language :: English
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Requires-Python: >=3.8
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: xarray
|
|
21
|
+
Requires-Dist: xesmf
|
|
22
|
+
Requires-Dist: ipython
|
|
23
|
+
Requires-Dist: jupyter
|
|
24
|
+
Requires-Dist: openpyxl
|
|
25
|
+
Requires-Dist: pandas
|
|
26
|
+
Requires-Dist: geopandas
|
|
27
|
+
Requires-Dist: rioxarray
|
|
28
|
+
Requires-Dist: dask[complete]
|
|
29
|
+
Requires-Dist: xclim
|
|
30
|
+
Requires-Dist: cartopy
|
|
31
|
+
Requires-Dist: colormaps
|
|
32
|
+
Requires-Dist: h5netcdf
|
|
33
|
+
Requires-Dist: netCDF4
|
|
34
|
+
Requires-Dist: pymannkendall
|
|
35
|
+
Requires-Dist: tqdm
|
|
36
|
+
Requires-Dist: zarr
|
|
37
|
+
Requires-Dist: ipyleaflet
|
|
38
|
+
Requires-Dist: wetterdienst
|
|
39
|
+
Requires-Dist: pint-pandas
|
|
40
|
+
Requires-Dist: cdsapi
|
|
41
|
+
Requires-Dist: hydra-core
|
|
42
|
+
Requires-Dist: intake
|
|
43
|
+
Requires-Dist: intake-esm
|
|
44
|
+
Requires-Dist: aiohttp
|
|
45
|
+
Requires-Dist: requests
|
|
46
|
+
Requires-Dist: gcsfs
|
|
47
|
+
Requires-Dist: dask-jobqueue
|
|
48
|
+
Requires-Dist: seaborn
|
|
49
|
+
Requires-Dist: earthengine-api
|
|
50
|
+
Requires-Dist: geemap
|
|
51
|
+
Requires-Dist: xsdba
|
|
52
|
+
Requires-Dist: xclim
|
|
53
|
+
Requires-Dist: pyarrow
|
|
54
|
+
Requires-Dist: fastparquet
|
|
55
|
+
Requires-Dist: mlflow
|
|
56
|
+
Requires-Dist: scikit-learn
|
|
57
|
+
Requires-Dist: xgboost
|
|
58
|
+
Requires-Dist: optuna
|
|
59
|
+
Requires-Dist: gitpython
|
|
60
|
+
Requires-Dist: beautifulsoup4
|
|
61
|
+
Requires-Dist: google-auth
|
|
62
|
+
Requires-Dist: google-api-python-client
|
|
63
|
+
Requires-Dist: ipdb
|
|
64
|
+
Provides-Extra: all
|
|
65
|
+
Requires-Dist: pandas; extra == "all"
|
|
66
|
+
Provides-Extra: extra
|
|
67
|
+
Requires-Dist: pandas; extra == "extra"
|
|
68
|
+
Dynamic: license-file
|
|
69
|
+
|
|
70
|
+
# climdata
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
[](https://pypi.python.org/pypi/climdata)
|
|
74
|
+
[](https://anaconda.org/conda-forge/climdata)
|
|
75
|
+
|
|
76
|
+
`climdata` is a Python package designed to automate fetching, extraction, and processing of climate data from various sources, including MSWX, DWD HYRAS, ERA5-Land, and NASA-NEX-GDDP. It provides tools to retrieve data for specific locations and time ranges, facilitating climate analysis and research.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Key features
|
|
81
|
+
- Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
|
|
82
|
+
- Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
|
|
83
|
+
- Temporal subsetting via config or programmatic call
|
|
84
|
+
- Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
|
|
85
|
+
- Hydra configuration + easy CLI overrides
|
|
86
|
+
- Helper to normalize AOI (GeoJSON → point / bbox / polygon)
|
|
87
|
+
- Provenance-friendly workflow (designed to be used with CI/CD workflows)
|
|
88
|
+
|
|
89
|
+
## Install (development)
|
|
90
|
+
1. Clone repository
|
|
91
|
+
```bash
|
|
92
|
+
git clone <repo-url>
|
|
93
|
+
cd climdata
|
|
94
|
+
```
|
|
95
|
+
2. Create virtualenv and install deps
|
|
96
|
+
```bash
|
|
97
|
+
python -m venv .venv
|
|
98
|
+
source .venv/bin/activate
|
|
99
|
+
pip install -U pip
|
|
100
|
+
pip install -e ".[dev]" # or pip install -r requirements.txt
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Quick CLI (Hydra) usage
|
|
104
|
+
Hydra reads configs from `conf/`. Override any config value on the CLI.
|
|
105
|
+
|
|
106
|
+
Examples:
|
|
107
|
+
```bash
|
|
108
|
+
# Region extraction (saves NetCDF by default when region is used)
|
|
109
|
+
python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
|
|
110
|
+
|
|
111
|
+
# Point extraction (saves CSV)
|
|
112
|
+
python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
|
|
113
|
+
|
|
114
|
+
# HYRAS / DWD (point only)
|
|
115
|
+
python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Notes:
|
|
119
|
+
- Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
|
|
120
|
+
- Override any config key: e.g. `time_range.start_date=2000-01-01`.
|
|
121
|
+
- DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
|
|
122
|
+
|
|
123
|
+
## Programmatic usage
|
|
124
|
+
Use the wrapper to compose configs, preprocess AOI, extract, and save.
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from climdata.utils.wrapper import extract_data
|
|
128
|
+
|
|
129
|
+
# returns (cfg, filename, ds, index) when save_to_file=True
|
|
130
|
+
cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Or use the dataset classes directly:
|
|
134
|
+
```python
|
|
135
|
+
import climdata, xarray as xr
|
|
136
|
+
cmip = climdata.CMIP(cfg)
|
|
137
|
+
cmip.fetch()
|
|
138
|
+
cmip.load()
|
|
139
|
+
cmip.extract(box=cfg.bounds[cfg.region])
|
|
140
|
+
cmip.save_netcdf("output.nc")
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Configs
|
|
144
|
+
- Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
|
|
145
|
+
- Filename templates are configurable in `cfg.output`:
|
|
146
|
+
- `cfg.output.filename_nc`
|
|
147
|
+
- `cfg.output.filename_csv`
|
|
148
|
+
- `cfg.output.filename_zarr`
|
|
149
|
+
|
|
150
|
+
The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
|
|
151
|
+
|
|
152
|
+
## Output CSV format
|
|
153
|
+
CSV produced by `save_csv` is standardized to the long form with columns (where available):
|
|
154
|
+
- source_id, experiment_id, table_id, time, lat, lon, variable, value, units
|
|
155
|
+
|
|
156
|
+
This ensures a single `value` column and a `variable` column for stacked variables.
|
|
157
|
+
|
|
158
|
+
## Common issues & tips
|
|
159
|
+
- NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
|
|
160
|
+
- PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
|
|
161
|
+
- CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
|
|
162
|
+
|
|
163
|
+
## AOI handling
|
|
164
|
+
`preprocess_aoi(cfg)` accepts:
|
|
165
|
+
- GeoJSON strings / Feature / FeatureCollection
|
|
166
|
+
- Point → sets `cfg.lat`, `cfg.lon`
|
|
167
|
+
- Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
|
|
168
|
+
|
|
169
|
+
## HYRAS support
|
|
170
|
+
HYRAS class mirrors MSWX design:
|
|
171
|
+
- `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
|
|
172
|
+
- HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
|
|
173
|
+
|
|
174
|
+
## Development & provenance
|
|
175
|
+
- CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
|
|
176
|
+
- Keep config and runtime overrides in Hydra to enable reproducible runs.
|
|
177
|
+
- Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
|
|
178
|
+
|
|
179
|
+
## Contributing
|
|
180
|
+
- Run tests: `pytest`
|
|
181
|
+
- Style: follow repository linting config
|
|
182
|
+
- Open PRs against `main` with tests and a short changelog entry
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
|
|
186
|
+
|
|
187
|
+
---
|
|
188
|
+
|
|
189
|
+
For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).// filepath: /beegfs/muduchuru/pkgs_fnl/climdata/README.md
|
|
190
|
+
# climdata
|
|
191
|
+
|
|
192
|
+
Lightweight toolkit to fetch, subset and export climate data (MSWX, CMIP, DWD, HYRAS).
|
|
193
|
+
Provides a Hydra-driven CLI, programmatic wrapper, cloud-native CMIP access, local dataset handling, and standardized CSV/NetCDF/Zarr exports.
|
|
194
|
+
|
|
195
|
+
## Key features
|
|
196
|
+
- Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
|
|
197
|
+
- Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
|
|
198
|
+
- Temporal subsetting via config or programmatic call
|
|
199
|
+
- Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
|
|
200
|
+
- Hydra configuration + easy CLI overrides
|
|
201
|
+
- Helper to normalize AOI (GeoJSON → point / bbox / polygon)
|
|
202
|
+
- Provenance-friendly workflow (designed to be used with CI/CD workflows)
|
|
203
|
+
|
|
204
|
+
## Install (development)
|
|
205
|
+
1. Clone repository
|
|
206
|
+
```bash
|
|
207
|
+
git clone <repo-url>
|
|
208
|
+
cd climdata
|
|
209
|
+
```
|
|
210
|
+
2. Create virtualenv and install deps
|
|
211
|
+
```bash
|
|
212
|
+
python -m venv .venv
|
|
213
|
+
source .venv/bin/activate
|
|
214
|
+
pip install -U pip
|
|
215
|
+
pip install -e ".[dev]" # or pip install -r requirements.txt
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## Quick CLI (Hydra) usage
|
|
219
|
+
Hydra reads configs from `conf/`. Override any config value on the CLI.
|
|
220
|
+
|
|
221
|
+
Examples:
|
|
222
|
+
```bash
|
|
223
|
+
# Region extraction (saves NetCDF by default when region is used)
|
|
224
|
+
python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
|
|
225
|
+
|
|
226
|
+
# Point extraction (saves CSV)
|
|
227
|
+
python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
|
|
228
|
+
|
|
229
|
+
# HYRAS / DWD (point only)
|
|
230
|
+
python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
Notes:
|
|
234
|
+
- Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
|
|
235
|
+
- Override any config key: e.g. `time_range.start_date=2000-01-01`.
|
|
236
|
+
- DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
|
|
237
|
+
|
|
238
|
+
## Programmatic usage
|
|
239
|
+
Use the wrapper to compose configs, preprocess AOI, extract, and save.
|
|
240
|
+
|
|
241
|
+
```python
|
|
242
|
+
from climdata.utils.wrapper import extract_data
|
|
243
|
+
|
|
244
|
+
# returns (cfg, filename, ds, index) when save_to_file=True
|
|
245
|
+
cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
Or use the dataset classes directly:
|
|
249
|
+
```python
|
|
250
|
+
import climdata, xarray as xr
|
|
251
|
+
cmip = climdata.CMIP(cfg)
|
|
252
|
+
cmip.fetch()
|
|
253
|
+
cmip.load()
|
|
254
|
+
cmip.extract(box=cfg.bounds[cfg.region])
|
|
255
|
+
cmip.save_netcdf("output.nc")
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Configs
|
|
259
|
+
- Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
|
|
260
|
+
- Filename templates are configurable in `cfg.output`:
|
|
261
|
+
- `cfg.output.filename_nc`
|
|
262
|
+
- `cfg.output.filename_csv`
|
|
263
|
+
- `cfg.output.filename_zarr`
|
|
264
|
+
|
|
265
|
+
The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
|
|
266
|
+
|
|
267
|
+
## Output CSV format
|
|
268
|
+
CSV produced by `save_csv` is standardized to the long form with columns (where available):
|
|
269
|
+
- source_id, experiment_id, table_id, time, lat, lon, variable, value, units
|
|
270
|
+
|
|
271
|
+
This ensures a single `value` column and a `variable` column for stacked variables.
|
|
272
|
+
|
|
273
|
+
## Common issues & tips
|
|
274
|
+
- NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
|
|
275
|
+
- PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
|
|
276
|
+
- CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
|
|
277
|
+
|
|
278
|
+
## AOI handling
|
|
279
|
+
`preprocess_aoi(cfg)` accepts:
|
|
280
|
+
- GeoJSON strings / Feature / FeatureCollection
|
|
281
|
+
- Point → sets `cfg.lat`, `cfg.lon`
|
|
282
|
+
- Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
|
|
283
|
+
|
|
284
|
+
## HYRAS support
|
|
285
|
+
HYRAS class mirrors MSWX design:
|
|
286
|
+
- `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
|
|
287
|
+
- HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
|
|
288
|
+
|
|
289
|
+
## Development & provenance
|
|
290
|
+
- CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
|
|
291
|
+
- Keep config and runtime overrides in Hydra to enable reproducible runs.
|
|
292
|
+
- Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
|
|
293
|
+
|
|
294
|
+
## Contributing
|
|
295
|
+
- Run tests: `pytest`
|
|
296
|
+
- Style: follow repository linting config
|
|
297
|
+
- Open PRs against `main` with tests and a short changelog entry
|
|
298
|
+
|
|
299
|
+
## License
|
|
300
|
+
Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
|
|
301
|
+
|
|
302
|
+
---
|
|
303
|
+
|
|
304
|
+
For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).
|
climdata-0.3.4/README.md
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# climdata
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
[](https://pypi.python.org/pypi/climdata)
|
|
5
|
+
[](https://anaconda.org/conda-forge/climdata)
|
|
6
|
+
|
|
7
|
+
`climdata` is a Python package designed to automate fetching, extraction, and processing of climate data from various sources, including MSWX, DWD HYRAS, ERA5-Land, and NASA-NEX-GDDP. It provides tools to retrieve data for specific locations and time ranges, facilitating climate analysis and research.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Key features
|
|
12
|
+
- Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
|
|
13
|
+
- Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
|
|
14
|
+
- Temporal subsetting via config or programmatic call
|
|
15
|
+
- Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
|
|
16
|
+
- Hydra configuration + easy CLI overrides
|
|
17
|
+
- Helper to normalize AOI (GeoJSON → point / bbox / polygon)
|
|
18
|
+
- Provenance-friendly workflow (designed to be used with CI/CD workflows)
|
|
19
|
+
|
|
20
|
+
## Install (development)
|
|
21
|
+
1. Clone repository
|
|
22
|
+
```bash
|
|
23
|
+
git clone <repo-url>
|
|
24
|
+
cd climdata
|
|
25
|
+
```
|
|
26
|
+
2. Create virtualenv and install deps
|
|
27
|
+
```bash
|
|
28
|
+
python -m venv .venv
|
|
29
|
+
source .venv/bin/activate
|
|
30
|
+
pip install -U pip
|
|
31
|
+
pip install -e ".[dev]" # or pip install -r requirements.txt
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick CLI (Hydra) usage
|
|
35
|
+
Hydra reads configs from `conf/`. Override any config value on the CLI.
|
|
36
|
+
|
|
37
|
+
Examples:
|
|
38
|
+
```bash
|
|
39
|
+
# Region extraction (saves NetCDF by default when region is used)
|
|
40
|
+
python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
|
|
41
|
+
|
|
42
|
+
# Point extraction (saves CSV)
|
|
43
|
+
python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
|
|
44
|
+
|
|
45
|
+
# HYRAS / DWD (point only)
|
|
46
|
+
python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Notes:
|
|
50
|
+
- Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
|
|
51
|
+
- Override any config key: e.g. `time_range.start_date=2000-01-01`.
|
|
52
|
+
- DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
|
|
53
|
+
|
|
54
|
+
## Programmatic usage
|
|
55
|
+
Use the wrapper to compose configs, preprocess AOI, extract, and save.
|
|
56
|
+
|
|
57
|
+
```python
|
|
58
|
+
from climdata.utils.wrapper import extract_data
|
|
59
|
+
|
|
60
|
+
# returns (cfg, filename, ds, index) when save_to_file=True
|
|
61
|
+
cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Or use the dataset classes directly:
|
|
65
|
+
```python
|
|
66
|
+
import climdata, xarray as xr
|
|
67
|
+
cmip = climdata.CMIP(cfg)
|
|
68
|
+
cmip.fetch()
|
|
69
|
+
cmip.load()
|
|
70
|
+
cmip.extract(box=cfg.bounds[cfg.region])
|
|
71
|
+
cmip.save_netcdf("output.nc")
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Configs
|
|
75
|
+
- Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
|
|
76
|
+
- Filename templates are configurable in `cfg.output`:
|
|
77
|
+
- `cfg.output.filename_nc`
|
|
78
|
+
- `cfg.output.filename_csv`
|
|
79
|
+
- `cfg.output.filename_zarr`
|
|
80
|
+
|
|
81
|
+
The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
|
|
82
|
+
|
|
83
|
+
## Output CSV format
|
|
84
|
+
CSV produced by `save_csv` is standardized to the long form with columns (where available):
|
|
85
|
+
- source_id, experiment_id, table_id, time, lat, lon, variable, value, units
|
|
86
|
+
|
|
87
|
+
This ensures a single `value` column and a `variable` column for stacked variables.
|
|
88
|
+
|
|
89
|
+
## Common issues & tips
|
|
90
|
+
- NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
|
|
91
|
+
- PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
|
|
92
|
+
- CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
|
|
93
|
+
|
|
94
|
+
## AOI handling
|
|
95
|
+
`preprocess_aoi(cfg)` accepts:
|
|
96
|
+
- GeoJSON strings / Feature / FeatureCollection
|
|
97
|
+
- Point → sets `cfg.lat`, `cfg.lon`
|
|
98
|
+
- Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
|
|
99
|
+
|
|
100
|
+
## HYRAS support
|
|
101
|
+
HYRAS class mirrors MSWX design:
|
|
102
|
+
- `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
|
|
103
|
+
- HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
|
|
104
|
+
|
|
105
|
+
## Development & provenance
|
|
106
|
+
- CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
|
|
107
|
+
- Keep config and runtime overrides in Hydra to enable reproducible runs.
|
|
108
|
+
- Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
|
|
109
|
+
|
|
110
|
+
## Contributing
|
|
111
|
+
- Run tests: `pytest`
|
|
112
|
+
- Style: follow repository linting config
|
|
113
|
+
- Open PRs against `main` with tests and a short changelog entry
|
|
114
|
+
|
|
115
|
+
## License
|
|
116
|
+
Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).// filepath: /beegfs/muduchuru/pkgs_fnl/climdata/README.md
|
|
121
|
+
# climdata
|
|
122
|
+
|
|
123
|
+
Lightweight toolkit to fetch, subset and export climate data (MSWX, CMIP, DWD, HYRAS).
|
|
124
|
+
Provides a Hydra-driven CLI, programmatic wrapper, cloud-native CMIP access, local dataset handling, and standardized CSV/NetCDF/Zarr exports.
|
|
125
|
+
|
|
126
|
+
## Key features
|
|
127
|
+
- Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
|
|
128
|
+
- Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
|
|
129
|
+
- Temporal subsetting via config or programmatic call
|
|
130
|
+
- Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
|
|
131
|
+
- Hydra configuration + easy CLI overrides
|
|
132
|
+
- Helper to normalize AOI (GeoJSON → point / bbox / polygon)
|
|
133
|
+
- Provenance-friendly workflow (designed to be used with CI/CD workflows)
|
|
134
|
+
|
|
135
|
+
## Install (development)
|
|
136
|
+
1. Clone repository
|
|
137
|
+
```bash
|
|
138
|
+
git clone <repo-url>
|
|
139
|
+
cd climdata
|
|
140
|
+
```
|
|
141
|
+
2. Create virtualenv and install deps
|
|
142
|
+
```bash
|
|
143
|
+
python -m venv .venv
|
|
144
|
+
source .venv/bin/activate
|
|
145
|
+
pip install -U pip
|
|
146
|
+
pip install -e ".[dev]" # or pip install -r requirements.txt
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Quick CLI (Hydra) usage
|
|
150
|
+
Hydra reads configs from `conf/`. Override any config value on the CLI.
|
|
151
|
+
|
|
152
|
+
Examples:
|
|
153
|
+
```bash
|
|
154
|
+
# Region extraction (saves NetCDF by default when region is used)
|
|
155
|
+
python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
|
|
156
|
+
|
|
157
|
+
# Point extraction (saves CSV)
|
|
158
|
+
python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
|
|
159
|
+
|
|
160
|
+
# HYRAS / DWD (point only)
|
|
161
|
+
python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Notes:
|
|
165
|
+
- Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
|
|
166
|
+
- Override any config key: e.g. `time_range.start_date=2000-01-01`.
|
|
167
|
+
- DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
|
|
168
|
+
|
|
169
|
+
## Programmatic usage
|
|
170
|
+
Use the wrapper to compose configs, preprocess AOI, extract, and save.
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from climdata.utils.wrapper import extract_data
|
|
174
|
+
|
|
175
|
+
# returns (cfg, filename, ds, index) when save_to_file=True
|
|
176
|
+
cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
Or use the dataset classes directly:
|
|
180
|
+
```python
|
|
181
|
+
import climdata, xarray as xr
|
|
182
|
+
cmip = climdata.CMIP(cfg)
|
|
183
|
+
cmip.fetch()
|
|
184
|
+
cmip.load()
|
|
185
|
+
cmip.extract(box=cfg.bounds[cfg.region])
|
|
186
|
+
cmip.save_netcdf("output.nc")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## Configs
|
|
190
|
+
- Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
|
|
191
|
+
- Filename templates are configurable in `cfg.output`:
|
|
192
|
+
- `cfg.output.filename_nc`
|
|
193
|
+
- `cfg.output.filename_csv`
|
|
194
|
+
- `cfg.output.filename_zarr`
|
|
195
|
+
|
|
196
|
+
The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
|
|
197
|
+
|
|
198
|
+
## Output CSV format
|
|
199
|
+
CSV produced by `save_csv` is standardized to the long form with columns (where available):
|
|
200
|
+
- source_id, experiment_id, table_id, time, lat, lon, variable, value, units
|
|
201
|
+
|
|
202
|
+
This ensures a single `value` column and a `variable` column for stacked variables.
|
|
203
|
+
|
|
204
|
+
## Common issues & tips
|
|
205
|
+
- NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
|
|
206
|
+
- PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
|
|
207
|
+
- CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
|
|
208
|
+
|
|
209
|
+
## AOI handling
|
|
210
|
+
`preprocess_aoi(cfg)` accepts:
|
|
211
|
+
- GeoJSON strings / Feature / FeatureCollection
|
|
212
|
+
- Point → sets `cfg.lat`, `cfg.lon`
|
|
213
|
+
- Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
|
|
214
|
+
|
|
215
|
+
## HYRAS support
|
|
216
|
+
HYRAS class mirrors MSWX design:
|
|
217
|
+
- `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
|
|
218
|
+
- HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
|
|
219
|
+
|
|
220
|
+
## Development & provenance
|
|
221
|
+
- CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
|
|
222
|
+
- Keep config and runtime overrides in Hydra to enable reproducible runs.
|
|
223
|
+
- Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
|
|
224
|
+
|
|
225
|
+
## Contributing
|
|
226
|
+
- Run tests: `pytest`
|
|
227
|
+
- Style: follow repository linting config
|
|
228
|
+
- Open PRs against `main` with tests and a short changelog entry
|
|
229
|
+
|
|
230
|
+
## License
|
|
231
|
+
Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Top-level package for climdata."""
|
|
2
|
+
|
|
3
|
+
__author__ = """Kaushik Muduchuru"""
|
|
4
|
+
__email__ = "kaushik.reddy.m@gmail.com"
|
|
5
|
+
__version__ = "0.3.4"
|
|
6
|
+
|
|
7
|
+
from .utils.utils_download import * # etc.
|
|
8
|
+
from .utils.config import load_config
|
|
9
|
+
from .utils.wrapper import extract_data
|
|
10
|
+
from .datasets.DWD import DWDmirror as DWD
|
|
11
|
+
from .datasets.MSWX import MSWXmirror as MSWX
|
|
12
|
+
from .datasets.ERA5 import ERA5Mirror as ERA5
|
|
13
|
+
from .datasets.CMIPlocal import CMIPmirror as CMIPlocal
|
|
14
|
+
from .datasets.CMIPCloud import CMIPCloud as CMIP
|
|
15
|
+
from .datasets.HYRAS import HYRASmirror as HYRAS
|
|
16
|
+
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
defaults:
|
|
2
|
+
- _self_
|
|
3
|
+
- mappings/parameters@dsinfo
|
|
4
|
+
- mappings/variables@varinfo
|
|
5
|
+
- mappings/indices@extinfo
|
|
6
|
+
|
|
7
|
+
dataset: mswx
|
|
8
|
+
lat: null
|
|
9
|
+
lon: null
|
|
10
|
+
aoi: null
|
|
11
|
+
shapefile: null
|
|
12
|
+
|
|
13
|
+
variables: ["tasmin","tasmax","pr"]
|
|
14
|
+
index: null
|
|
15
|
+
data_dir: ./data
|
|
16
|
+
region: null
|
|
17
|
+
|
|
18
|
+
experiment_id: historical
|
|
19
|
+
source_id: MIROC6
|
|
20
|
+
table_id: day
|
|
21
|
+
|
|
22
|
+
bounds:
|
|
23
|
+
europe:
|
|
24
|
+
lat_min: 34.0
|
|
25
|
+
lat_max: 71.0
|
|
26
|
+
lon_min: -25.0
|
|
27
|
+
lon_max: 45.0
|
|
28
|
+
custom:
|
|
29
|
+
lat_min: null
|
|
30
|
+
lat_max: null
|
|
31
|
+
lon_min: null
|
|
32
|
+
lon_max: null
|
|
33
|
+
|
|
34
|
+
time_range:
|
|
35
|
+
start_date: "1989-01-01"
|
|
36
|
+
end_date: "2020-12-31"
|
|
37
|
+
|
|
38
|
+
output:
|
|
39
|
+
out_dir: "./climdata/data/"
|
|
40
|
+
filename_csv: "{provider}_{parameter}_LAT_{lat}_LON_{lon}_{start}_{end}.csv"
|
|
41
|
+
filename_zarr: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.zarr"
|
|
42
|
+
filename_nc: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.nc"
|
|
43
|
+
fmt: "standard"
|