climdata 0.0.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/docs.yml +1 -1
  2. climdata-0.3.4/.github/workflows/pypi.yml +43 -0
  3. {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/ubuntu.yml +0 -1
  4. {climdata-0.0.2 → climdata-0.3.4}/.gitignore +3 -1
  5. {climdata-0.0.2 → climdata-0.3.4}/MANIFEST.in +2 -0
  6. climdata-0.3.4/PKG-INFO +304 -0
  7. climdata-0.3.4/README.md +235 -0
  8. climdata-0.3.4/climdata/__init__.py +16 -0
  9. climdata-0.3.4/climdata/conf/config.yaml +43 -0
  10. climdata-0.3.4/climdata/conf/mappings/indices.yaml +338 -0
  11. climdata-0.3.4/climdata/conf/mappings/parameters.yaml +218 -0
  12. climdata-0.3.4/climdata/conf/mappings/variables.yaml +76 -0
  13. climdata-0.3.4/climdata/datasets/CMIPCloud.py +186 -0
  14. climdata-0.3.4/climdata/datasets/CMIPlocal.py +224 -0
  15. climdata-0.3.4/climdata/datasets/DWD.py +170 -0
  16. climdata-0.3.4/climdata/datasets/ERA5.py +322 -0
  17. climdata-0.3.4/climdata/datasets/HYRAS.py +497 -0
  18. climdata-0.3.4/climdata/datasets/ISD_down.sh +90 -0
  19. climdata-0.3.4/climdata/datasets/MSWX.py +269 -0
  20. climdata-0.3.4/climdata/extremes/calc_extremes.py +22 -0
  21. climdata-0.3.4/climdata/extremes/indices.py +101 -0
  22. climdata-0.3.4/climdata/impute/brits.py +100 -0
  23. climdata-0.3.4/climdata/utils/config.py +30 -0
  24. climdata-0.3.4/climdata/utils/utils_download.py +250 -0
  25. climdata-0.3.4/climdata/utils/wrapper.py +263 -0
  26. climdata-0.3.4/climdata.egg-info/PKG-INFO +304 -0
  27. {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/SOURCES.txt +18 -6
  28. {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/requires.txt +7 -0
  29. climdata-0.3.4/docs/climdata.md +4 -0
  30. climdata-0.3.4/docs/common.md +41 -0
  31. climdata-0.0.2/README.md → climdata-0.3.4/docs/index.md +3 -1
  32. climdata-0.3.4/examples/climdata_cli.py +49 -0
  33. climdata-0.3.4/examples/extremes.ipynb +904 -0
  34. climdata-0.3.4/examples/wrapper.ipynb +330 -0
  35. {climdata-0.0.2 → climdata-0.3.4}/mkdocs.yml +17 -21
  36. {climdata-0.0.2 → climdata-0.3.4}/pyproject.toml +8 -2
  37. {climdata-0.0.2 → climdata-0.3.4}/requirements.txt +7 -0
  38. climdata-0.3.4/tests/test_climdata.py +9 -0
  39. climdata-0.3.4/usecase/extremes_custom.ipynb +610 -0
  40. climdata-0.3.4/usecase/impute.ipynb +564 -0
  41. climdata-0.3.4/usecase/simplace_dataprep.ipynb +1639 -0
  42. climdata-0.0.2/.github/workflows/pypi.yml +0 -30
  43. climdata-0.0.2/PKG-INFO +0 -253
  44. climdata-0.0.2/climdata/__init__.py +0 -8
  45. climdata-0.0.2/climdata/__main__.py +0 -5
  46. climdata-0.0.2/climdata/conf/config.yaml +0 -23
  47. climdata-0.0.2/climdata/conf/mappings/parameters.yaml +0 -172
  48. climdata-0.0.2/climdata/datasets/DWD.py +0 -73
  49. climdata-0.0.2/climdata/datasets/MSWX.py +0 -195
  50. climdata-0.0.2/climdata/main.py +0 -56
  51. climdata-0.0.2/climdata/requirements.txt +0 -20
  52. climdata-0.0.2/climdata/utils/config.py +0 -30
  53. climdata-0.0.2/climdata/utils/utils_download.py +0 -976
  54. climdata-0.0.2/climdata.egg-info/PKG-INFO +0 -253
  55. climdata-0.0.2/docs/climdata.md +0 -4
  56. climdata-0.0.2/docs/common.md +0 -3
  57. climdata-0.0.2/docs/examples/run_downloader.ipynb +0 -1244
  58. climdata-0.0.2/docs/index.md +0 -16
  59. climdata-0.0.2/dwd_tas_LAT52.507_LON14.1372_1989-01-01_2020-12-31.csv +0 -11506
  60. climdata-0.0.2/tests/test_climdata.py +0 -21
  61. {climdata-0.0.2 → climdata-0.3.4}/.editorconfig +0 -0
  62. {climdata-0.0.2 → climdata-0.3.4}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  63. {climdata-0.0.2 → climdata-0.3.4}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  64. {climdata-0.0.2 → climdata-0.3.4}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  65. {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/docs-build.yml +0 -0
  66. {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/installation.yml +0 -0
  67. {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/macos.yml +0 -0
  68. {climdata-0.0.2 → climdata-0.3.4}/.github/workflows/windows.yml +0 -0
  69. {climdata-0.0.2 → climdata-0.3.4}/LICENSE +0 -0
  70. {climdata-0.0.2 → climdata-0.3.4}/climdata/utils/__init__.py +0 -0
  71. {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/dependency_links.txt +0 -0
  72. {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/entry_points.txt +0 -0
  73. {climdata-0.0.2 → climdata-0.3.4}/climdata.egg-info/top_level.txt +0 -0
  74. {climdata-0.0.2 → climdata-0.3.4}/docs/changelog.md +0 -0
  75. {climdata-0.0.2 → climdata-0.3.4}/docs/contributing.md +0 -0
  76. {climdata-0.0.2 → climdata-0.3.4}/docs/faq.md +0 -0
  77. {climdata-0.0.2 → climdata-0.3.4}/docs/installation.md +0 -0
  78. {climdata-0.0.2 → climdata-0.3.4}/docs/overrides/main.html +0 -0
  79. {climdata-0.0.2 → climdata-0.3.4}/docs/usage.md +0 -0
  80. {climdata-0.0.2 → climdata-0.3.4}/requirements_dev.txt +0 -0
  81. {climdata-0.0.2 → climdata-0.3.4}/setup.cfg +0 -0
  82. {climdata-0.0.2 → climdata-0.3.4}/tests/__init__.py +0 -0
@@ -23,7 +23,7 @@ jobs:
23
23
  pip install .
24
24
  - name: Discover typos with codespell
25
25
  run: |
26
- codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,hist"
26
+ codespell --skip="*.csv,*.geojson,*.json,*.js,*.html,*cff,./.git" --ignore-words-list="aci,hist" || true
27
27
  - name: PKG-TEST
28
28
  run: |
29
29
  python -m unittest discover tests/
@@ -0,0 +1,43 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*" # Trigger when pushing tags like v0.1.0
7
+
8
+ jobs:
9
+ release:
10
+ runs-on: ubuntu-latest
11
+
12
+ steps:
13
+ # 1️⃣ Checkout code
14
+ - uses: actions/checkout@v4
15
+ with:
16
+ fetch-depth: 0
17
+
18
+ # 2️⃣ Set up Python
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.11"
23
+
24
+ # 3️⃣ Upgrade pip and install build/test dependencies
25
+ - name: Install dependencies
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install -r requirements.txt
29
+ pip install build twine wheel setuptools
30
+
31
+ # 4️⃣ Run unit tests
32
+ - name: Run tests
33
+ run: |
34
+ python -m unittest discover tests/
35
+
36
+ # 5️⃣ Build the package
37
+ - name: Build package
38
+ run: python -m build
39
+
40
+ # 6️⃣ Publish to PyPI
41
+ - name: Publish to PyPI
42
+ run: |
43
+ python -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_API_TOKEN }} --verbose
@@ -17,7 +17,6 @@ jobs:
17
17
  fail-fast: false
18
18
  matrix:
19
19
  config:
20
- - { os: ubuntu-latest, py: "3.9" }
21
20
  - { os: ubuntu-latest, py: "3.10" }
22
21
  - { os: ubuntu-latest, py: "3.11" }
23
22
  - { os: ubuntu-latest, py: "3.12" }
@@ -107,4 +107,6 @@ ENV/
107
107
  .vscode/
108
108
  climdata/conf/service.json
109
109
  outputs
110
- *.csv
110
+ *.csv
111
+ *.zarr
112
+ *.nc
@@ -5,3 +5,5 @@ include requirements.txt
5
5
  recursive-exclude * __pycache__
6
6
  recursive-exclude * *.py[co]
7
7
 
8
+ recursive-include climdata/conf *
9
+
@@ -0,0 +1,304 @@
1
+ Metadata-Version: 2.4
2
+ Name: climdata
3
+ Version: 0.3.4
4
+ Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
5
+ Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
6
+ License: MIT License
7
+ Project-URL: Homepage, https://github.com/Kaushikreddym/climdata
8
+ Keywords: climdata
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Natural Language :: English
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.8
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: xarray
21
+ Requires-Dist: xesmf
22
+ Requires-Dist: ipython
23
+ Requires-Dist: jupyter
24
+ Requires-Dist: openpyxl
25
+ Requires-Dist: pandas
26
+ Requires-Dist: geopandas
27
+ Requires-Dist: rioxarray
28
+ Requires-Dist: dask[complete]
29
+ Requires-Dist: xclim
30
+ Requires-Dist: cartopy
31
+ Requires-Dist: colormaps
32
+ Requires-Dist: h5netcdf
33
+ Requires-Dist: netCDF4
34
+ Requires-Dist: pymannkendall
35
+ Requires-Dist: tqdm
36
+ Requires-Dist: zarr
37
+ Requires-Dist: ipyleaflet
38
+ Requires-Dist: wetterdienst
39
+ Requires-Dist: pint-pandas
40
+ Requires-Dist: cdsapi
41
+ Requires-Dist: hydra-core
42
+ Requires-Dist: intake
43
+ Requires-Dist: intake-esm
44
+ Requires-Dist: aiohttp
45
+ Requires-Dist: requests
46
+ Requires-Dist: gcsfs
47
+ Requires-Dist: dask-jobqueue
48
+ Requires-Dist: seaborn
49
+ Requires-Dist: earthengine-api
50
+ Requires-Dist: geemap
51
+ Requires-Dist: xsdba
52
+ Requires-Dist: xclim
53
+ Requires-Dist: pyarrow
54
+ Requires-Dist: fastparquet
55
+ Requires-Dist: mlflow
56
+ Requires-Dist: scikit-learn
57
+ Requires-Dist: xgboost
58
+ Requires-Dist: optuna
59
+ Requires-Dist: gitpython
60
+ Requires-Dist: beautifulsoup4
61
+ Requires-Dist: google-auth
62
+ Requires-Dist: google-api-python-client
63
+ Requires-Dist: ipdb
64
+ Provides-Extra: all
65
+ Requires-Dist: pandas; extra == "all"
66
+ Provides-Extra: extra
67
+ Requires-Dist: pandas; extra == "extra"
68
+ Dynamic: license-file
69
+
70
+ # climdata
71
+
72
+
73
+ [![image](https://img.shields.io/pypi/v/climdata.svg)](https://pypi.python.org/pypi/climdata)
74
+ [![image](https://img.shields.io/conda/vn/conda-forge/climdata.svg)](https://anaconda.org/conda-forge/climdata)
75
+
76
+ `climdata` is a Python package designed to automate fetching, extraction, and processing of climate data from various sources, including MSWX, DWD HYRAS, ERA5-Land, and NASA-NEX-GDDP. It provides tools to retrieve data for specific locations and time ranges, facilitating climate analysis and research.
77
+
78
+ ---
79
+
80
+ ## Key features
81
+ - Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
82
+ - Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
83
+ - Temporal subsetting via config or programmatic call
84
+ - Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
85
+ - Hydra configuration + easy CLI overrides
86
+ - Helper to normalize AOI (GeoJSON → point / bbox / polygon)
87
+ - Provenance-friendly workflow (designed to be used with CI/CD workflows)
88
+
89
+ ## Install (development)
90
+ 1. Clone repository
91
+ ```bash
92
+ git clone <repo-url>
93
+ cd climdata
94
+ ```
95
+ 2. Create virtualenv and install deps
96
+ ```bash
97
+ python -m venv .venv
98
+ source .venv/bin/activate
99
+ pip install -U pip
100
+ pip install -e ".[dev]" # or pip install -r requirements.txt
101
+ ```
102
+
103
+ ## Quick CLI (Hydra) usage
104
+ Hydra reads configs from `conf/`. Override any config value on the CLI.
105
+
106
+ Examples:
107
+ ```bash
108
+ # Region extraction (saves NetCDF by default when region is used)
109
+ python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
110
+
111
+ # Point extraction (saves CSV)
112
+ python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
113
+
114
+ # HYRAS / DWD (point only)
115
+ python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
116
+ ```
117
+
118
+ Notes:
119
+ - Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
120
+ - Override any config key: e.g. `time_range.start_date=2000-01-01`.
121
+ - DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
122
+
123
+ ## Programmatic usage
124
+ Use the wrapper to compose configs, preprocess AOI, extract, and save.
125
+
126
+ ```python
127
+ from climdata.utils.wrapper import extract_data
128
+
129
+ # returns (cfg, filename, ds, index) when save_to_file=True
130
+ cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
131
+ ```
132
+
133
+ Or use the dataset classes directly:
134
+ ```python
135
+ import climdata, xarray as xr
136
+ cmip = climdata.CMIP(cfg)
137
+ cmip.fetch()
138
+ cmip.load()
139
+ cmip.extract(box=cfg.bounds[cfg.region])
140
+ cmip.save_netcdf("output.nc")
141
+ ```
142
+
143
+ ## Configs
144
+ - Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
145
+ - Filename templates are configurable in `cfg.output`:
146
+ - `cfg.output.filename_nc`
147
+ - `cfg.output.filename_csv`
148
+ - `cfg.output.filename_zarr`
149
+
150
+ The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
151
+
152
+ ## Output CSV format
153
+ CSV produced by `save_csv` is standardized to the long form with columns (where available):
154
+ - source_id, experiment_id, table_id, time, lat, lon, variable, value, units
155
+
156
+ This ensures a single `value` column and a `variable` column for stacked variables.
157
+
158
+ ## Common issues & tips
159
+ - NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
160
+ - PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
161
+ - CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
162
+
163
+ ## AOI handling
164
+ `preprocess_aoi(cfg)` accepts:
165
+ - GeoJSON strings / Feature / FeatureCollection
166
+ - Point → sets `cfg.lat`, `cfg.lon`
167
+ - Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
168
+
169
+ ## HYRAS support
170
+ HYRAS class mirrors MSWX design:
171
+ - `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
172
+ - HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
173
+
174
+ ## Development & provenance
175
+ - CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
176
+ - Keep config and runtime overrides in Hydra to enable reproducible runs.
177
+ - Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
178
+
179
+ ## Contributing
180
+ - Run tests: `pytest`
181
+ - Style: follow repository linting config
182
+ - Open PRs against `main` with tests and a short changelog entry
183
+
184
+ ## License
185
+ Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
186
+
187
+ ---
188
+
189
+ For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).// filepath: /beegfs/muduchuru/pkgs_fnl/climdata/README.md
190
+ # climdata
191
+
192
+ Lightweight toolkit to fetch, subset and export climate data (MSWX, CMIP, DWD, HYRAS).
193
+ Provides a Hydra-driven CLI, programmatic wrapper, cloud-native CMIP access, local dataset handling, and standardized CSV/NetCDF/Zarr exports.
194
+
195
+ ## Key features
196
+ - Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
197
+ - Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
198
+ - Temporal subsetting via config or programmatic call
199
+ - Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
200
+ - Hydra configuration + easy CLI overrides
201
+ - Helper to normalize AOI (GeoJSON → point / bbox / polygon)
202
+ - Provenance-friendly workflow (designed to be used with CI/CD workflows)
203
+
204
+ ## Install (development)
205
+ 1. Clone repository
206
+ ```bash
207
+ git clone <repo-url>
208
+ cd climdata
209
+ ```
210
+ 2. Create virtualenv and install deps
211
+ ```bash
212
+ python -m venv .venv
213
+ source .venv/bin/activate
214
+ pip install -U pip
215
+ pip install -e ".[dev]" # or pip install -r requirements.txt
216
+ ```
217
+
218
+ ## Quick CLI (Hydra) usage
219
+ Hydra reads configs from `conf/`. Override any config value on the CLI.
220
+
221
+ Examples:
222
+ ```bash
223
+ # Region extraction (saves NetCDF by default when region is used)
224
+ python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
225
+
226
+ # Point extraction (saves CSV)
227
+ python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
228
+
229
+ # HYRAS / DWD (point only)
230
+ python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
231
+ ```
232
+
233
+ Notes:
234
+ - Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
235
+ - Override any config key: e.g. `time_range.start_date=2000-01-01`.
236
+ - DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
237
+
238
+ ## Programmatic usage
239
+ Use the wrapper to compose configs, preprocess AOI, extract, and save.
240
+
241
+ ```python
242
+ from climdata.utils.wrapper import extract_data
243
+
244
+ # returns (cfg, filename, ds, index) when save_to_file=True
245
+ cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
246
+ ```
247
+
248
+ Or use the dataset classes directly:
249
+ ```python
250
+ import climdata, xarray as xr
251
+ cmip = climdata.CMIP(cfg)
252
+ cmip.fetch()
253
+ cmip.load()
254
+ cmip.extract(box=cfg.bounds[cfg.region])
255
+ cmip.save_netcdf("output.nc")
256
+ ```
257
+
258
+ ## Configs
259
+ - Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
260
+ - Filename templates are configurable in `cfg.output`:
261
+ - `cfg.output.filename_nc`
262
+ - `cfg.output.filename_csv`
263
+ - `cfg.output.filename_zarr`
264
+
265
+ The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
266
+
267
+ ## Output CSV format
268
+ CSV produced by `save_csv` is standardized to the long form with columns (where available):
269
+ - source_id, experiment_id, table_id, time, lat, lon, variable, value, units
270
+
271
+ This ensures a single `value` column and a `variable` column for stacked variables.
272
+
273
+ ## Common issues & tips
274
+ - NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
275
+ - PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
276
+ - CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
277
+
278
+ ## AOI handling
279
+ `preprocess_aoi(cfg)` accepts:
280
+ - GeoJSON strings / Feature / FeatureCollection
281
+ - Point → sets `cfg.lat`, `cfg.lon`
282
+ - Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
283
+
284
+ ## HYRAS support
285
+ HYRAS class mirrors MSWX design:
286
+ - `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
287
+ - HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
288
+
289
+ ## Development & provenance
290
+ - CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
291
+ - Keep config and runtime overrides in Hydra to enable reproducible runs.
292
+ - Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
293
+
294
+ ## Contributing
295
+ - Run tests: `pytest`
296
+ - Style: follow repository linting config
297
+ - Open PRs against `main` with tests and a short changelog entry
298
+
299
+ ## License
300
+ Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
301
+
302
+ ---
303
+
304
+ For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).
@@ -0,0 +1,235 @@
1
+ # climdata
2
+
3
+
4
+ [![image](https://img.shields.io/pypi/v/climdata.svg)](https://pypi.python.org/pypi/climdata)
5
+ [![image](https://img.shields.io/conda/vn/conda-forge/climdata.svg)](https://anaconda.org/conda-forge/climdata)
6
+
7
+ `climdata` is a Python package designed to automate fetching, extraction, and processing of climate data from various sources, including MSWX, DWD HYRAS, ERA5-Land, and NASA-NEX-GDDP. It provides tools to retrieve data for specific locations and time ranges, facilitating climate analysis and research.
8
+
9
+ ---
10
+
11
+ ## Key features
12
+ - Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
13
+ - Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
14
+ - Temporal subsetting via config or programmatic call
15
+ - Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
16
+ - Hydra configuration + easy CLI overrides
17
+ - Helper to normalize AOI (GeoJSON → point / bbox / polygon)
18
+ - Provenance-friendly workflow (designed to be used with CI/CD workflows)
19
+
20
+ ## Install (development)
21
+ 1. Clone repository
22
+ ```bash
23
+ git clone <repo-url>
24
+ cd climdata
25
+ ```
26
+ 2. Create virtualenv and install deps
27
+ ```bash
28
+ python -m venv .venv
29
+ source .venv/bin/activate
30
+ pip install -U pip
31
+ pip install -e ".[dev]" # or pip install -r requirements.txt
32
+ ```
33
+
34
+ ## Quick CLI (Hydra) usage
35
+ Hydra reads configs from `conf/`. Override any config value on the CLI.
36
+
37
+ Examples:
38
+ ```bash
39
+ # Region extraction (saves NetCDF by default when region is used)
40
+ python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
41
+
42
+ # Point extraction (saves CSV)
43
+ python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
44
+
45
+ # HYRAS / DWD (point only)
46
+ python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
47
+ ```
48
+
49
+ Notes:
50
+ - Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
51
+ - Override any config key: e.g. `time_range.start_date=2000-01-01`.
52
+ - DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
53
+
54
+ ## Programmatic usage
55
+ Use the wrapper to compose configs, preprocess AOI, extract, and save.
56
+
57
+ ```python
58
+ from climdata.utils.wrapper import extract_data
59
+
60
+ # returns (cfg, filename, ds, index) when save_to_file=True
61
+ cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
62
+ ```
63
+
64
+ Or use the dataset classes directly:
65
+ ```python
66
+ import climdata, xarray as xr
67
+ cmip = climdata.CMIP(cfg)
68
+ cmip.fetch()
69
+ cmip.load()
70
+ cmip.extract(box=cfg.bounds[cfg.region])
71
+ cmip.save_netcdf("output.nc")
72
+ ```
73
+
74
+ ## Configs
75
+ - Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
76
+ - Filename templates are configurable in `cfg.output`:
77
+ - `cfg.output.filename_nc`
78
+ - `cfg.output.filename_csv`
79
+ - `cfg.output.filename_zarr`
80
+
81
+ The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
82
+
83
+ ## Output CSV format
84
+ CSV produced by `save_csv` is standardized to the long form with columns (where available):
85
+ - source_id, experiment_id, table_id, time, lat, lon, variable, value, units
86
+
87
+ This ensures a single `value` column and a `variable` column for stacked variables.
88
+
89
+ ## Common issues & tips
90
+ - NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
91
+ - PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
92
+ - CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
93
+
94
+ ## AOI handling
95
+ `preprocess_aoi(cfg)` accepts:
96
+ - GeoJSON strings / Feature / FeatureCollection
97
+ - Point → sets `cfg.lat`, `cfg.lon`
98
+ - Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
99
+
100
+ ## HYRAS support
101
+ HYRAS class mirrors MSWX design:
102
+ - `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
103
+ - HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
104
+
105
+ ## Development & provenance
106
+ - CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
107
+ - Keep config and runtime overrides in Hydra to enable reproducible runs.
108
+ - Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
109
+
110
+ ## Contributing
111
+ - Run tests: `pytest`
112
+ - Style: follow repository linting config
113
+ - Open PRs against `main` with tests and a short changelog entry
114
+
115
+ ## License
116
+ Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
117
+
118
+ ---
119
+
120
+ For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).// filepath: /beegfs/muduchuru/pkgs_fnl/climdata/README.md
121
+ # climdata
122
+
123
+ Lightweight toolkit to fetch, subset and export climate data (MSWX, CMIP, DWD, HYRAS).
124
+ Provides a Hydra-driven CLI, programmatic wrapper, cloud-native CMIP access, local dataset handling, and standardized CSV/NetCDF/Zarr exports.
125
+
126
+ ## Key features
127
+ - Fetch and load datasets: MSWX, CMIP (cloud via intake), DWD, HYRAS
128
+ - Spatial extraction: point, box (region via config bounds), or shapefile (GeoJSON/Feature)
129
+ - Temporal subsetting via config or programmatic call
130
+ - Multi-format export: NetCDF, Zarr, CSV (standardized long format: variable, value, units)
131
+ - Hydra configuration + easy CLI overrides
132
+ - Helper to normalize AOI (GeoJSON → point / bbox / polygon)
133
+ - Provenance-friendly workflow (designed to be used with CI/CD workflows)
134
+
135
+ ## Install (development)
136
+ 1. Clone repository
137
+ ```bash
138
+ git clone <repo-url>
139
+ cd climdata
140
+ ```
141
+ 2. Create virtualenv and install deps
142
+ ```bash
143
+ python -m venv .venv
144
+ source .venv/bin/activate
145
+ pip install -U pip
146
+ pip install -e ".[dev]" # or pip install -r requirements.txt
147
+ ```
148
+
149
+ ## Quick CLI (Hydra) usage
150
+ Hydra reads configs from `conf/`. Override any config value on the CLI.
151
+
152
+ Examples:
153
+ ```bash
154
+ # Region extraction (saves NetCDF by default when region is used)
155
+ python examples/climdata_cli.py dataset=CMIP region=europe time_range.start_date=2010-01-01 time_range.end_date=2010-12-31
156
+
157
+ # Point extraction (saves CSV)
158
+ python examples/climdata_cli.py dataset=MSWX lat=52.5 lon=13.4 variables=['tas','pr'] time_range.start_date=2000-01-01
159
+
160
+ # HYRAS / DWD (point only)
161
+ python examples/climdata_cli.py dataset=HYRAS lat=52 lon=10
162
+ ```
163
+
164
+ Notes:
165
+ - Use `dataset=<MSWX|CMIP|DWD|HYRAS>` in CLI.
166
+ - Override any config key: e.g. `time_range.start_date=2000-01-01`.
167
+ - DWD/HYRAS: region (box) extraction is not supported — script will raise an error if attempted.
168
+
169
+ ## Programmatic usage
170
+ Use the wrapper to compose configs, preprocess AOI, extract, and save.
171
+
172
+ ```python
173
+ from climdata.utils.wrapper import extract_data
174
+
175
+ # returns (cfg, filename, ds, index) when save_to_file=True
176
+ cfg, filename, ds, index = extract_data(cfg_name="config", overrides=["dataset=MSWX","lat=52.5","lon=13.4"])
177
+ ```
178
+
179
+ Or use the dataset classes directly:
180
+ ```python
181
+ import climdata, xarray as xr
182
+ cmip = climdata.CMIP(cfg)
183
+ cmip.fetch()
184
+ cmip.load()
185
+ cmip.extract(box=cfg.bounds[cfg.region])
186
+ cmip.save_netcdf("output.nc")
187
+ ```
188
+
189
+ ## Configs
190
+ - Config files live in `climdata/conf/`. There are dataset-specific config entry points e.g. `config_cmip`, `config_mswx`, etc.
191
+ - Filename templates are configurable in `cfg.output`:
192
+ - `cfg.output.filename_nc`
193
+ - `cfg.output.filename_csv`
194
+ - `cfg.output.filename_zarr`
195
+
196
+ The wrapper generates filenames via `get_output_filename(cfg, output_type, ...)` using `cfg.bounds`, `cfg.time_range`, etc.
197
+
198
+ ## Output CSV format
199
+ CSV produced by `save_csv` is standardized to the long form with columns (where available):
200
+ - source_id, experiment_id, table_id, time, lat, lon, variable, value, units
201
+
202
+ This ensures a single `value` column and a `variable` column for stacked variables.
203
+
204
+ ## Common issues & tips
205
+ - NetCDF write ValueError (datetime encoding): call `ds["time"].encoding.clear()` before `to_netcdf()` (wrapper handles this).
206
+ - PermissionError writing files: ensure output directory is writable or write to `/tmp/` (or adjust permissions).
207
+ - CMIP cloud access requires network access — use the Pangeo intake catalog URL already referenced in code.
208
+
209
+ ## AOI handling
210
+ `preprocess_aoi(cfg)` accepts:
211
+ - GeoJSON strings / Feature / FeatureCollection
212
+ - Point → sets `cfg.lat`, `cfg.lon`
213
+ - Polygon or bbox → sets `cfg.bounds['custom']` and `cfg.region='custom'`
214
+
215
+ ## HYRAS support
216
+ HYRAS class mirrors MSWX design:
217
+ - `fetch()` / `load()` / `extract(point=...)` / `save_csv()` / `save_netcdf()`
218
+ - HYRAS extraction currently supports point extraction; attempt to use a region will raise an error.
219
+
220
+ ## Development & provenance
221
+ - CI: add GitHub Actions workflows to run tests and build/publish to PyPI.
222
+ - Keep config and runtime overrides in Hydra to enable reproducible runs.
223
+ - Include `CITATION.cff`, license, and a changelog for FAIR discoverability.
224
+
225
+ ## Contributing
226
+ - Run tests: `pytest`
227
+ - Style: follow repository linting config
228
+ - Open PRs against `main` with tests and a short changelog entry
229
+
230
+ ## License
231
+ Specify the license (e.g. MIT or Apache 2.0) in `LICENSE`.
232
+
233
+ ---
234
+
235
+ For further examples, see `examples/` and the `docs/` folder (usage, installation, faq).
@@ -0,0 +1,16 @@
1
+ """Top-level package for climdata."""
2
+
3
+ __author__ = """Kaushik Muduchuru"""
4
+ __email__ = "kaushik.reddy.m@gmail.com"
5
+ __version__ = "0.3.4"
6
+
7
+ from .utils.utils_download import * # etc.
8
+ from .utils.config import load_config
9
+ from .utils.wrapper import extract_data
10
+ from .datasets.DWD import DWDmirror as DWD
11
+ from .datasets.MSWX import MSWXmirror as MSWX
12
+ from .datasets.ERA5 import ERA5Mirror as ERA5
13
+ from .datasets.CMIPlocal import CMIPmirror as CMIPlocal
14
+ from .datasets.CMIPCloud import CMIPCloud as CMIP
15
+ from .datasets.HYRAS import HYRASmirror as HYRAS
16
+
@@ -0,0 +1,43 @@
1
+ defaults:
2
+ - _self_
3
+ - mappings/parameters@dsinfo
4
+ - mappings/variables@varinfo
5
+ - mappings/indices@extinfo
6
+
7
+ dataset: mswx
8
+ lat: null
9
+ lon: null
10
+ aoi: null
11
+ shapefile: null
12
+
13
+ variables: ["tasmin","tasmax","pr"]
14
+ index: null
15
+ data_dir: ./data
16
+ region: null
17
+
18
+ experiment_id: historical
19
+ source_id: MIROC6
20
+ table_id: day
21
+
22
+ bounds:
23
+ europe:
24
+ lat_min: 34.0
25
+ lat_max: 71.0
26
+ lon_min: -25.0
27
+ lon_max: 45.0
28
+ custom:
29
+ lat_min: null
30
+ lat_max: null
31
+ lon_min: null
32
+ lon_max: null
33
+
34
+ time_range:
35
+ start_date: "1989-01-01"
36
+ end_date: "2020-12-31"
37
+
38
+ output:
39
+ out_dir: "./climdata/data/"
40
+ filename_csv: "{provider}_{parameter}_LAT_{lat}_LON_{lon}_{start}_{end}.csv"
41
+ filename_zarr: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.zarr"
42
+ filename_nc: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.nc"
43
+ fmt: "standard"