climdata 0.0.6__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of climdata might be problematic. Click here for more details.

Files changed (55) hide show
  1. {climdata-0.0.6 → climdata-0.0.7}/PKG-INFO +1 -1
  2. {climdata-0.0.6 → climdata-0.0.7}/climdata/__init__.py +3 -1
  3. {climdata-0.0.6 → climdata-0.0.7}/climdata/conf/config.yaml +2 -1
  4. climdata-0.0.7/climdata/datasets/CMIPCloud.py +120 -0
  5. climdata-0.0.6/climdata/datasets/CMIP.py → climdata-0.0.7/climdata/datasets/CMIPlocal.py +1 -1
  6. {climdata-0.0.6 → climdata-0.0.7}/climdata.egg-info/PKG-INFO +1 -1
  7. {climdata-0.0.6 → climdata-0.0.7}/climdata.egg-info/SOURCES.txt +2 -1
  8. {climdata-0.0.6 → climdata-0.0.7}/examples/extract_dwd_loc.ipynb +239 -0
  9. {climdata-0.0.6 → climdata-0.0.7}/pyproject.toml +2 -2
  10. {climdata-0.0.6 → climdata-0.0.7}/.editorconfig +0 -0
  11. {climdata-0.0.6 → climdata-0.0.7}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  12. {climdata-0.0.6 → climdata-0.0.7}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  13. {climdata-0.0.6 → climdata-0.0.7}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  14. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/docs-build.yml +0 -0
  15. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/docs.yml +0 -0
  16. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/installation.yml +0 -0
  17. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/macos.yml +0 -0
  18. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/pypi.yml +0 -0
  19. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/ubuntu.yml +0 -0
  20. {climdata-0.0.6 → climdata-0.0.7}/.github/workflows/windows.yml +0 -0
  21. {climdata-0.0.6 → climdata-0.0.7}/.gitignore +0 -0
  22. {climdata-0.0.6 → climdata-0.0.7}/LICENSE +0 -0
  23. {climdata-0.0.6 → climdata-0.0.7}/MANIFEST.in +0 -0
  24. {climdata-0.0.6 → climdata-0.0.7}/README.md +0 -0
  25. {climdata-0.0.6 → climdata-0.0.7}/climdata/__main__.py +0 -0
  26. {climdata-0.0.6 → climdata-0.0.7}/climdata/conf/mappings/parameters.yaml +0 -0
  27. {climdata-0.0.6 → climdata-0.0.7}/climdata/conf/mappings/variables.yaml +0 -0
  28. {climdata-0.0.6 → climdata-0.0.7}/climdata/datasets/DWD.py +0 -0
  29. {climdata-0.0.6 → climdata-0.0.7}/climdata/datasets/ERA5.py +0 -0
  30. {climdata-0.0.6 → climdata-0.0.7}/climdata/datasets/MSWX.py +0 -0
  31. {climdata-0.0.6 → climdata-0.0.7}/climdata/main.py +0 -0
  32. {climdata-0.0.6 → climdata-0.0.7}/climdata/utils/__init__.py +0 -0
  33. {climdata-0.0.6 → climdata-0.0.7}/climdata/utils/config.py +0 -0
  34. {climdata-0.0.6 → climdata-0.0.7}/climdata/utils/utils_download.py +0 -0
  35. {climdata-0.0.6 → climdata-0.0.7}/climdata.egg-info/dependency_links.txt +0 -0
  36. {climdata-0.0.6 → climdata-0.0.7}/climdata.egg-info/entry_points.txt +0 -0
  37. {climdata-0.0.6 → climdata-0.0.7}/climdata.egg-info/requires.txt +0 -0
  38. {climdata-0.0.6 → climdata-0.0.7}/climdata.egg-info/top_level.txt +0 -0
  39. {climdata-0.0.6 → climdata-0.0.7}/docs/changelog.md +0 -0
  40. {climdata-0.0.6 → climdata-0.0.7}/docs/climdata.md +0 -0
  41. {climdata-0.0.6 → climdata-0.0.7}/docs/common.md +0 -0
  42. {climdata-0.0.6 → climdata-0.0.7}/docs/contributing.md +0 -0
  43. {climdata-0.0.6 → climdata-0.0.7}/docs/faq.md +0 -0
  44. {climdata-0.0.6 → climdata-0.0.7}/docs/index.md +0 -0
  45. {climdata-0.0.6 → climdata-0.0.7}/docs/installation.md +0 -0
  46. {climdata-0.0.6 → climdata-0.0.7}/docs/overrides/main.html +0 -0
  47. {climdata-0.0.6 → climdata-0.0.7}/docs/usage.md +0 -0
  48. {climdata-0.0.6 → climdata-0.0.7}/dwd_tas_LAT52.507_LON14.1372_1989-01-01_2020-12-31.csv +0 -0
  49. {climdata-0.0.6 → climdata-0.0.7}/examples/zarr_tas_data/metadata.json +0 -0
  50. {climdata-0.0.6 → climdata-0.0.7}/mkdocs.yml +0 -0
  51. {climdata-0.0.6 → climdata-0.0.7}/requirements.txt +0 -0
  52. {climdata-0.0.6 → climdata-0.0.7}/requirements_dev.txt +0 -0
  53. {climdata-0.0.6 → climdata-0.0.7}/setup.cfg +0 -0
  54. {climdata-0.0.6 → climdata-0.0.7}/tests/__init__.py +0 -0
  55. {climdata-0.0.6 → climdata-0.0.7}/tests/test_climdata.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climdata
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
5
5
  Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
6
6
  License: MIT License
@@ -2,11 +2,13 @@
2
2
 
3
3
  __author__ = """Kaushik Muduchuru"""
4
4
  __email__ = "kaushik.reddy.m@gmail.com"
5
- __version__ = "0.0.6"
5
+ __version__ = "0.0.7"
6
6
 
7
7
  from .utils.utils_download import * # etc.
8
8
  from .utils.config import load_config
9
9
  from .datasets.DWD import DWDmirror as DWD
10
10
  from .datasets.MSWX import MSWXmirror as MSWX
11
11
  from .datasets.ERA5 import ERA5Mirror as ERA5
12
+ from .datasets.CMIPlocal import CMIPmirror as CMIPlocal
13
+ from .datasets.CMIPCloud import CMIPCloud as CMIP
12
14
 
@@ -33,5 +33,6 @@ time_range:
33
33
 
34
34
  output:
35
35
  out_dir: "./climdata/data/"
36
- filename: "{provider}_{parameter}_LAT{lat}_LON{lon}_{start}_{end}.csv"
36
+ filename_csv: "{provider}_{parameter}_LAT_{lat}_LON_{lon}_{start}_{end}.csv"
37
+ filename_zarr: "{provider}_{parameter}_LAT{lat_range}_LON{lon_range}_{start}_{end}.zarr"
37
38
  fmt: 'standard' # 'standard', 'ICASA', 'simplace', 'monica'
@@ -0,0 +1,120 @@
1
+ import intake
2
+ import xarray as xr
3
+ import pandas as pd
4
+
5
+ class CMIPCloud:
6
+ def __init__(self, experiment_id, source_id, table_id, variables, region_bounds=None):
7
+ self.experiment_id = experiment_id
8
+ self.source_id = source_id
9
+ self.table_id = table_id
10
+ self.variables = variables
11
+ self.region_bounds = region_bounds
12
+ self.col_subsets = []
13
+ self.ds = None
14
+
15
+ def fetch(self):
16
+ """Collect intake catalog subsets for each variable."""
17
+ col = intake.open_esm_datastore("https://storage.googleapis.com/cmip6/pangeo-cmip6.json")
18
+ self.col_subsets = []
19
+ for var in self.variables:
20
+ query = dict(
21
+ experiment_id=[self.experiment_id],
22
+ source_id=self.source_id,
23
+ table_id=self.table_id,
24
+ variable_id=var,
25
+ )
26
+ col_subset = col.search(require_all_on=["source_id"], **query)
27
+ if len(col_subset.df) == 0:
28
+ continue
29
+ self.col_subsets.append(col_subset)
30
+ return self.col_subsets
31
+
32
+ def load(self):
33
+ """Load and merge datasets from collected col_subsets."""
34
+ datasets = []
35
+ for col_subset in self.col_subsets:
36
+ zstore_path = col_subset.df.zstore.values[0].replace('gs:/', "https://storage.googleapis.com")
37
+ ds_var = xr.open_zarr(zstore_path)
38
+ datasets.append(ds_var)
39
+ if datasets:
40
+ self.ds = xr.merge(datasets)
41
+ else:
42
+ self.ds = None
43
+ return self.ds
44
+
45
+ def extract(self, *, point=None, box=None, shapefile=None, buffer_km=0.0):
46
+ """
47
+ Extract a subset of the dataset by point, bounding box (dict), or shapefile.
48
+ """
49
+ import geopandas as gpd
50
+ from shapely.geometry import mapping
51
+
52
+ if self.ds is None:
53
+ raise ValueError("No dataset loaded. Call `load()` first.")
54
+
55
+ ds = self.ds
56
+
57
+ if point is not None:
58
+ lon, lat = point
59
+ if buffer_km > 0:
60
+ buffer_deg = buffer_km / 111
61
+ ds_subset = ds.sel(
62
+ lon=slice(lon-buffer_deg, lon+buffer_deg),
63
+ lat=slice(lat-buffer_deg, lat+buffer_deg)
64
+ )
65
+ else:
66
+ ds_subset = ds.sel(lon=lon, lat=lat, method="nearest")
67
+
68
+ elif box is not None:
69
+ # Accept dict: {'lat_min': ..., 'lat_max': ..., 'lon_min': ..., 'lon_max': ...}
70
+ ds_subset = ds.sel(
71
+ lon=slice(box['lon_min'], box['lon_max']),
72
+ lat=slice(box['lat_min'], box['lat_max'])
73
+ )
74
+
75
+ elif shapefile is not None:
76
+ if isinstance(shapefile, str):
77
+ gdf = gpd.read_file(shapefile)
78
+ else:
79
+ gdf = shapefile
80
+ if buffer_km > 0:
81
+ gdf = gdf.to_crs(epsg=3857)
82
+ gdf["geometry"] = gdf.buffer(buffer_km * 1000)
83
+ gdf = gdf.to_crs(epsg=4326)
84
+ geom = [mapping(g) for g in gdf.geometry]
85
+ import rioxarray
86
+ ds = ds.rio.write_crs("EPSG:4326", inplace=False)
87
+ ds_subset = ds.rio.clip(geom, gdf.crs, drop=True)
88
+
89
+ else:
90
+ raise ValueError("Must provide either point, box, or shapefile.")
91
+ self.ds = ds_subset
92
+ return ds_subset
93
+ def _subset_time(self, start_date, end_date):
94
+ """
95
+ Subset the dataset by time range.
96
+ Dates should be strings in 'YYYY-MM-DD' format.
97
+ """
98
+ if self.ds is None:
99
+ return None
100
+ ds_time = self.ds.sel(time=slice(start_date, end_date))
101
+ self.ds = ds_time
102
+ return ds_time
103
+
104
+ def save_netcdf(self, filename):
105
+ if self.ds is not None:
106
+ if "time" in self.ds.variables:
107
+ self.ds["time"].encoding.clear()
108
+ self.ds.to_netcdf(filename)
109
+ print(f"Saved NetCDF to {filename}")
110
+
111
+ def save_zarr(self, store_path):
112
+ if self.ds is not None:
113
+ self.ds.to_zarr(store_path, mode="w")
114
+ print(f"Saved Zarr to {store_path}")
115
+
116
+ def save_csv(self, filename):
117
+ if self.ds is not None:
118
+ df = self.ds.to_dataframe().reset_index()
119
+ df.to_csv(filename, index=False)
120
+ print(f"Saved CSV to {filename}")
@@ -14,7 +14,7 @@ from xclim.core import units
14
14
  warnings.filterwarnings("ignore", category=Warning)
15
15
 
16
16
 
17
- class CMIP:
17
+ class CMIPmirror:
18
18
  def __init__(self, var_cfg: DictConfig, experiments):
19
19
  self.var_cfg = var_cfg
20
20
  self.files = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climdata
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: This project automates the fetching and extraction of weather data from multiple sources — such as MSWX, DWD HYRAS, ERA5-Land, NASA-NEX-GDDP, and more — for a given location and time range.
5
5
  Author-email: Kaushik Muduchuru <kaushik.reddy.m@gmail.com>
6
6
  License: MIT License
@@ -30,7 +30,8 @@ climdata.egg-info/top_level.txt
30
30
  climdata/conf/config.yaml
31
31
  climdata/conf/mappings/parameters.yaml
32
32
  climdata/conf/mappings/variables.yaml
33
- climdata/datasets/CMIP.py
33
+ climdata/datasets/CMIPCloud.py
34
+ climdata/datasets/CMIPlocal.py
34
35
  climdata/datasets/DWD.py
35
36
  climdata/datasets/ERA5.py
36
37
  climdata/datasets/MSWX.py
@@ -282,6 +282,245 @@
282
282
  "execution_count": null,
283
283
  "id": "cfe88f29",
284
284
  "metadata": {},
285
+ "outputs": [
286
+ {
287
+ "data": {
288
+ "text/html": [
289
+ "\n",
290
+ " <style>\n",
291
+ " .geemap-dark {\n",
292
+ " --jp-widgets-color: white;\n",
293
+ " --jp-widgets-label-color: white;\n",
294
+ " --jp-ui-font-color1: white;\n",
295
+ " --jp-layout-color2: #454545;\n",
296
+ " background-color: #383838;\n",
297
+ " }\n",
298
+ "\n",
299
+ " .geemap-dark .jupyter-button {\n",
300
+ " --jp-layout-color3: #383838;\n",
301
+ " }\n",
302
+ "\n",
303
+ " .geemap-colab {\n",
304
+ " background-color: var(--colab-primary-surface-color, white);\n",
305
+ " }\n",
306
+ "\n",
307
+ " .geemap-colab .jupyter-button {\n",
308
+ " --jp-layout-color3: var(--colab-primary-surface-color, white);\n",
309
+ " }\n",
310
+ " </style>\n",
311
+ " "
312
+ ],
313
+ "text/plain": [
314
+ "<IPython.core.display.HTML object>"
315
+ ]
316
+ },
317
+ "metadata": {},
318
+ "output_type": "display_data"
319
+ }
320
+ ],
321
+ "source": [
322
+ "import argparse\n",
323
+ "import climdata\n",
324
+ "from hydra import initialize, compose\n",
325
+ "import pandas as pd\n",
326
+ "from climdata import CMIP\n",
327
+ "\n",
328
+ "with initialize(config_path=\"../climdata/conf\", version_base=None):\n",
329
+ " cfg = compose(\n",
330
+ " config_name=\"config\",\n",
331
+ " overrides=[\n",
332
+ " f\"weather.parameter={variables[0]}\", # Just for bounds/region\n",
333
+ " f\"region={region}\",\n",
334
+ " ],\n",
335
+ " )\n",
336
+ "\n",
337
+ "cmip = climdata.CMIP(\n",
338
+ " experiment_id = \"historical\",\n",
339
+ " source_id = \"MIROC6\",\n",
340
+ " table_id = \"day\",\n",
341
+ " variables = [\"tasmax\", \"tasmin\", \"pr\"],\n",
342
+ " region_bounds = cfg.bounds[cfg.region]\n",
343
+ " )\n",
344
+ "cmip.fetch() # gets file lists\n",
345
+ "cmip.load() # loads and merges datasets\n",
346
+ "cmip.extract(point = (52,15))\n",
347
+ "ds = cmip._subset_time(cfg.time_range.start_date, cfg.time_range.end_date)\n",
348
+ "cmip.save_netcdf(\"cmip_loc.nc\")"
349
+ ]
350
+ },
351
+ {
352
+ "cell_type": "code",
353
+ "execution_count": null,
354
+ "id": "8a29e214",
355
+ "metadata": {},
356
+ "outputs": [
357
+ {
358
+ "data": {
359
+ "text/html": [
360
+ "\n",
361
+ " <style>\n",
362
+ " .geemap-dark {\n",
363
+ " --jp-widgets-color: white;\n",
364
+ " --jp-widgets-label-color: white;\n",
365
+ " --jp-ui-font-color1: white;\n",
366
+ " --jp-layout-color2: #454545;\n",
367
+ " background-color: #383838;\n",
368
+ " }\n",
369
+ "\n",
370
+ " .geemap-dark .jupyter-button {\n",
371
+ " --jp-layout-color3: #383838;\n",
372
+ " }\n",
373
+ "\n",
374
+ " .geemap-colab {\n",
375
+ " background-color: var(--colab-primary-surface-color, white);\n",
376
+ " }\n",
377
+ "\n",
378
+ " .geemap-colab .jupyter-button {\n",
379
+ " --jp-layout-color3: var(--colab-primary-surface-color, white);\n",
380
+ " }\n",
381
+ " </style>\n",
382
+ " "
383
+ ],
384
+ "text/plain": [
385
+ "<IPython.core.display.HTML object>"
386
+ ]
387
+ },
388
+ "metadata": {},
389
+ "output_type": "display_data"
390
+ },
391
+ {
392
+ "name": "stdout",
393
+ "output_type": "stream",
394
+ "text": [
395
+ "Saved NetCDF to cmip_box.nc\n"
396
+ ]
397
+ }
398
+ ],
399
+ "source": [
400
+ "import argparse\n",
401
+ "import climdata\n",
402
+ "from hydra import initialize, compose\n",
403
+ "import pandas as pd\n",
404
+ "from climdata import CMIP\n",
405
+ "\n",
406
+ "region=\"europe\"\n",
407
+ "\n",
408
+ "with initialize(config_path=\"../climdata/conf\", version_base=None):\n",
409
+ " cfg = compose(\n",
410
+ " config_name=\"config\",\n",
411
+ " overrides=[\n",
412
+ " f\"weather.parameter={variables[0]}\", # Just for bounds/region\n",
413
+ " f\"region={region}\",\n",
414
+ " ],\n",
415
+ " )\n",
416
+ "\n",
417
+ "cmip = climdata.CMIP(\n",
418
+ " experiment_id = \"historical\",\n",
419
+ " source_id = \"MIROC6\",\n",
420
+ " table_id = \"day\",\n",
421
+ " variables = [\"tasmax\", \"tasmin\", \"pr\"],\n",
422
+ " region_bounds = cfg.bounds[cfg.region]\n",
423
+ " )\n",
424
+ "cmip.fetch() # gets file lists\n",
425
+ "cmip.load() # loads and merges datasets\n",
426
+ "\n",
427
+ "cmip.extract(box = cmip.region_bounds)\n",
428
+ "ds = cmip._subset_time(cfg.time_range.start_date, cfg.time_range.end_date)\n",
429
+ "cmip.save_netcdf(\"cmip_box.nc\")"
430
+ ]
431
+ },
432
+ {
433
+ "cell_type": "code",
434
+ "execution_count": 20,
435
+ "id": "dcf86cc2",
436
+ "metadata": {},
437
+ "outputs": [
438
+ {
439
+ "data": {
440
+ "text/html": [
441
+ "\n",
442
+ " <style>\n",
443
+ " .geemap-dark {\n",
444
+ " --jp-widgets-color: white;\n",
445
+ " --jp-widgets-label-color: white;\n",
446
+ " --jp-ui-font-color1: white;\n",
447
+ " --jp-layout-color2: #454545;\n",
448
+ " background-color: #383838;\n",
449
+ " }\n",
450
+ "\n",
451
+ " .geemap-dark .jupyter-button {\n",
452
+ " --jp-layout-color3: #383838;\n",
453
+ " }\n",
454
+ "\n",
455
+ " .geemap-colab {\n",
456
+ " background-color: var(--colab-primary-surface-color, white);\n",
457
+ " }\n",
458
+ "\n",
459
+ " .geemap-colab .jupyter-button {\n",
460
+ " --jp-layout-color3: var(--colab-primary-surface-color, white);\n",
461
+ " }\n",
462
+ " </style>\n",
463
+ " "
464
+ ],
465
+ "text/plain": [
466
+ "<IPython.core.display.HTML object>"
467
+ ]
468
+ },
469
+ "metadata": {},
470
+ "output_type": "display_data"
471
+ },
472
+ {
473
+ "data": {
474
+ "text/plain": [
475
+ "'CMIP/MIROC/MIROC6/historical/r4i1p1f1/day/europe/'"
476
+ ]
477
+ },
478
+ "execution_count": 20,
479
+ "metadata": {},
480
+ "output_type": "execute_result"
481
+ }
482
+ ],
483
+ "source": [
484
+ "\"/\".join(cmip.col_subsets[0].df.zstore[0].split('/')[4:-4]+[region])+'/'"
485
+ ]
486
+ },
487
+ {
488
+ "cell_type": "markdown",
489
+ "id": "d0b5a84f",
490
+ "metadata": {},
491
+ "source": [
492
+ "\n",
493
+ "\n",
494
+ "**Usage Example:**\n"
495
+ ]
496
+ },
497
+ {
498
+ "cell_type": "code",
499
+ "execution_count": null,
500
+ "id": "fc2f2a85",
501
+ "metadata": {},
502
+ "outputs": [],
503
+ "source": [
504
+ "ds = cmip.load()\n",
505
+ "ds_time = cmip.subset_time(\"2000-01-01\", \"2005-12-31\")"
506
+ ]
507
+ },
508
+ {
509
+ "cell_type": "markdown",
510
+ "id": "d254a1ee",
511
+ "metadata": {},
512
+ "source": [
513
+ "\n",
514
+ "\n",
515
+ "You can combine this with `_subset()` for both spatial and temporal subsetting. \n",
516
+ "Let me know if you want a combined function!"
517
+ ]
518
+ },
519
+ {
520
+ "cell_type": "code",
521
+ "execution_count": null,
522
+ "id": "da13620c",
523
+ "metadata": {},
285
524
  "outputs": [],
286
525
  "source": []
287
526
  }
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "climdata"
3
- version = "0.0.6"
3
+ version = "0.0.7"
4
4
  dynamic = [
5
5
  "dependencies",
6
6
  ]
@@ -52,7 +52,7 @@ universal = true
52
52
 
53
53
 
54
54
  [tool.bumpversion]
55
- current_version = "0.0.6"
55
+ current_version = "0.0.7"
56
56
  commit = true
57
57
  tag = true
58
58
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes