h2mare 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- h2mare-0.1.0/LICENSE.txt +21 -0
- h2mare-0.1.0/PKG-INFO +174 -0
- h2mare-0.1.0/README.md +128 -0
- h2mare-0.1.0/h2mare/__init__.py +29 -0
- h2mare-0.1.0/h2mare/cli/__init__.py +24 -0
- h2mare-0.1.0/h2mare/cli/catalog.py +121 -0
- h2mare-0.1.0/h2mare/cli/compile.py +99 -0
- h2mare-0.1.0/h2mare/cli/main.py +154 -0
- h2mare-0.1.0/h2mare/cli/nc2zarr.py +88 -0
- h2mare-0.1.0/h2mare/config.py +205 -0
- h2mare-0.1.0/h2mare/downloader/__init__.py +19 -0
- h2mare-0.1.0/h2mare/downloader/aviso_downloader.py +453 -0
- h2mare-0.1.0/h2mare/downloader/base.py +104 -0
- h2mare-0.1.0/h2mare/downloader/cds_downloader.py +224 -0
- h2mare-0.1.0/h2mare/downloader/cmems_downloader.py +534 -0
- h2mare-0.1.0/h2mare/downloader/cmems_utils.py +159 -0
- h2mare-0.1.0/h2mare/downloader/commons.py +70 -0
- h2mare-0.1.0/h2mare/format_converters/__init__.py +9 -0
- h2mare-0.1.0/h2mare/format_converters/netcdf2zarr.py +441 -0
- h2mare-0.1.0/h2mare/format_converters/parquet2csv.py +155 -0
- h2mare-0.1.0/h2mare/format_converters/zarr2parquet.py +105 -0
- h2mare-0.1.0/h2mare/models.py +56 -0
- h2mare-0.1.0/h2mare/pipeline_manager.py +104 -0
- h2mare-0.1.0/h2mare/processing/__init__.py +27 -0
- h2mare-0.1.0/h2mare/processing/compiler.py +367 -0
- h2mare-0.1.0/h2mare/processing/core/__init__.py +0 -0
- h2mare-0.1.0/h2mare/processing/core/aviso.py +519 -0
- h2mare-0.1.0/h2mare/processing/core/cds.py +613 -0
- h2mare-0.1.0/h2mare/processing/core/cmems.py +76 -0
- h2mare-0.1.0/h2mare/processing/core/fronts.py +285 -0
- h2mare-0.1.0/h2mare/processing/extractor.py +989 -0
- h2mare-0.1.0/h2mare/processing/registry.py +28 -0
- h2mare-0.1.0/h2mare/storage/__init__.py +36 -0
- h2mare-0.1.0/h2mare/storage/coverage.py +69 -0
- h2mare-0.1.0/h2mare/storage/parquet_helpers.py +201 -0
- h2mare-0.1.0/h2mare/storage/parquet_indexer.py +712 -0
- h2mare-0.1.0/h2mare/storage/parquet_plotter.py +191 -0
- h2mare-0.1.0/h2mare/storage/storage.py +170 -0
- h2mare-0.1.0/h2mare/storage/xarray_helpers.py +156 -0
- h2mare-0.1.0/h2mare/storage/zarr_catalog.py +1177 -0
- h2mare-0.1.0/h2mare/types.py +342 -0
- h2mare-0.1.0/h2mare/utils/__init__.py +26 -0
- h2mare-0.1.0/h2mare/utils/datetime_utils.py +54 -0
- h2mare-0.1.0/h2mare/utils/files_io.py +162 -0
- h2mare-0.1.0/h2mare/utils/labels.py +90 -0
- h2mare-0.1.0/h2mare/utils/logging_utils.py +28 -0
- h2mare-0.1.0/h2mare/utils/paths.py +78 -0
- h2mare-0.1.0/h2mare/utils/plot.py +322 -0
- h2mare-0.1.0/h2mare/utils/spatial.py +112 -0
- h2mare-0.1.0/h2mare/validators.py +88 -0
- h2mare-0.1.0/h2mare.egg-info/PKG-INFO +174 -0
- h2mare-0.1.0/h2mare.egg-info/SOURCES.txt +74 -0
- h2mare-0.1.0/h2mare.egg-info/dependency_links.txt +1 -0
- h2mare-0.1.0/h2mare.egg-info/entry_points.txt +2 -0
- h2mare-0.1.0/h2mare.egg-info/requires.txt +24 -0
- h2mare-0.1.0/h2mare.egg-info/top_level.txt +1 -0
- h2mare-0.1.0/pyproject.toml +94 -0
- h2mare-0.1.0/setup.cfg +4 -0
- h2mare-0.1.0/tests/test_aviso_downloader.py +212 -0
- h2mare-0.1.0/tests/test_coverage_utils.py +76 -0
- h2mare-0.1.0/tests/test_datetime_utils.py +90 -0
- h2mare-0.1.0/tests/test_extractor.py +264 -0
- h2mare-0.1.0/tests/test_labels.py +66 -0
- h2mare-0.1.0/tests/test_logging_utils.py +43 -0
- h2mare-0.1.0/tests/test_models.py +157 -0
- h2mare-0.1.0/tests/test_parquet_helpers.py +196 -0
- h2mare-0.1.0/tests/test_parquet_indexer.py +197 -0
- h2mare-0.1.0/tests/test_pipeline_manager.py +233 -0
- h2mare-0.1.0/tests/test_plot.py +161 -0
- h2mare-0.1.0/tests/test_spatial.py +79 -0
- h2mare-0.1.0/tests/test_storage.py +138 -0
- h2mare-0.1.0/tests/test_types.py +310 -0
- h2mare-0.1.0/tests/test_utils_paths.py +65 -0
- h2mare-0.1.0/tests/test_validators.py +68 -0
- h2mare-0.1.0/tests/test_xarray_helpers.py +131 -0
- h2mare-0.1.0/tests/test_zarr_catalog.py +265 -0
h2mare-0.1.0/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 h2ugoparra
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
h2mare-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: h2mare
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Downloader and data management tools for climate and ocean datasets.
|
|
5
|
+
Author-email: Hugo Parra <h2ugo.parra@pm.me>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/h2ugoparra/h2mare
|
|
8
|
+
Keywords: ocean,climate,geospatial,zarr,netcdf,cmems,copernicus,era5
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE.txt
|
|
21
|
+
Requires-Dist: cartopy>=0.23.0
|
|
22
|
+
Requires-Dist: cdsapi>=0.7.0
|
|
23
|
+
Requires-Dist: cfgrib>=0.9.10
|
|
24
|
+
Requires-Dist: copernicusmarine>=2.0.0
|
|
25
|
+
Requires-Dist: dask>=2024.1.0
|
|
26
|
+
Requires-Dist: duckdb>=1.0.0
|
|
27
|
+
Requires-Dist: eccodes>=1.7.0
|
|
28
|
+
Requires-Dist: ephem>=4.1.0
|
|
29
|
+
Requires-Dist: geopandas>=1.0.0
|
|
30
|
+
Requires-Dist: global_land_mask>=1.0.0
|
|
31
|
+
Requires-Dist: ipython>=8.0.0
|
|
32
|
+
Requires-Dist: loguru>=0.7.0
|
|
33
|
+
Requires-Dist: matplotlib>=3.8.0
|
|
34
|
+
Requires-Dist: msgspec>=0.18.0
|
|
35
|
+
Requires-Dist: netCDF4>=1.6.0
|
|
36
|
+
Requires-Dist: plotly>=5.18.0
|
|
37
|
+
Requires-Dist: polars>=1.0.0
|
|
38
|
+
Requires-Dist: PyYAML>=6.0.0
|
|
39
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
40
|
+
Requires-Dist: rioxarray>=0.17.0
|
|
41
|
+
Requires-Dist: scipy>=1.13.0
|
|
42
|
+
Requires-Dist: tqdm>=4.66.0
|
|
43
|
+
Requires-Dist: typer>=0.12.0
|
|
44
|
+
Requires-Dist: xarray>=2024.1.0
|
|
45
|
+
Dynamic: license-file
|
|
46
|
+
|
|
47
|
+
# H2MARE - Geospatial Processing for Climate and Ocean Data
|
|
48
|
+
|
|
49
|
+

|
|
50
|
+
|
|
51
|
+
A Python pipeline for downloading and preprocessing multi-source oceanographic and atmospheric data into analysis-ready formats. H2MARE streamlines the acquisition and harmonization of data from major climate and ocean observation services, optimized for large-scale spatiotemporal analysis.
|
|
52
|
+
|
|
53
|
+
## Features
|
|
54
|
+
|
|
55
|
+
- **Multi-source data integration**: Download and process data from CMEMS, AVISO, and ERA5.
|
|
56
|
+
- **Variable grouping**: Organize related variables using configurable keys.
|
|
57
|
+
- **Format conversion**: Automated conversion from NetCDF/GRIB to optimized Zarr and Parquet format
|
|
58
|
+
- **Data compilation**: Regrid and interpolate multi-resolution datasets to a common grid
|
|
59
|
+
- **Point and geometry extraction**: Extract time series for specific locations or spatial features
|
|
60
|
+
|
|
61
|
+
## Data Sources
|
|
62
|
+
|
|
63
|
+
H2MARE supports the following data providers API keys and authentication are required for each:
|
|
64
|
+
|
|
65
|
+
- **[CMEMS](https://marine.copernicus.eu/)** - Copernicus Marine Service: Satellite and in-situ ocean observations
|
|
66
|
+
- **[AVISO](https://www.aviso.altimetry.fr/en/home.html)** - Archiving, Validation and Interpretation of Satellite Oceanographic data
|
|
67
|
+
- **[CDS-ERA5](https://cds.climate.copernicus.eu/)** - ERA5 hourly atmospheric reanalysis (1940-present)
|
|
68
|
+
*Hersbach, H., et al. (2023). DOI: 10.24381/cds.adbb2d47*
|
|
69
|
+
|
|
70
|
+
**Note**: Refer to each provider's documentation for authentication setup before use.
|
|
71
|
+
|
|
72
|
+
## Installation
|
|
73
|
+
|
|
74
|
+
### Prerequisites
|
|
75
|
+
|
|
76
|
+
- Python >= 3.9
|
|
77
|
+
- [uv](https://docs.astral.sh/uv/) — fast Python package and project manager
|
|
78
|
+
- Sufficient disk space for downloaded datasets (varies by region and time range)
|
|
79
|
+
|
|
80
|
+
### Install from source
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
git clone https://github.com/h2ugoparra/h2mare.git
|
|
84
|
+
cd h2mare
|
|
85
|
+
uv sync # installs all dependencies into .venv
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
For development (includes pytest, black, isort):
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
uv sync --extra dev
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Configuration
|
|
95
|
+
|
|
96
|
+
Create .env file with external storage path:
|
|
97
|
+
|
|
98
|
+
```env
|
|
99
|
+
STORE_DIR=/path/to/your/storage
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Key variables groups
|
|
103
|
+
|
|
104
|
+
Edit `config.yaml` to define variable groups and processing parameters.
|
|
105
|
+
|
|
106
|
+
### Data Flow
|
|
107
|
+
|
|
108
|
+
- **Dowload** - Raw NetCDF/GRIB files are fetched from configurated sources and saved at specified time resolution (monthly or yearly) as native-resolution Zarr files.
|
|
109
|
+
- **Compilation** (`h2mare/processing/compiler.py`) - Preprocessed data is regridded to a defined spatial/temporal resolution and geographic extent (configured via 'h2ds' key in `config.yaml`)
|
|
110
|
+
- **Extraction** (`h2mare/processing/extractor.py`) - Point (CSV files) or geometry (SHP files) data extraction from xarray datasets.
|
|
111
|
+
|
|
112
|
+
## Quick Start
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# Download and process a single variable for a specific date range
|
|
116
|
+
uv run h2mare run sst --start-date 2021-01-01 --end-date 2021-12-31
|
|
117
|
+
|
|
118
|
+
# Multiple variables at once (space-separated)
|
|
119
|
+
uv run h2mare run seapodym mld o2 chl
|
|
120
|
+
|
|
121
|
+
# Infer missing dates from the existing store and download what's new
|
|
122
|
+
uv run h2mare run sst
|
|
123
|
+
|
|
124
|
+
# Download only (skip Zarr conversion)
|
|
125
|
+
uv run h2mare run sst --no-process
|
|
126
|
+
|
|
127
|
+
# Validate configuration without downloading
|
|
128
|
+
uv run h2mare run sst --dry-run
|
|
129
|
+
|
|
130
|
+
# Process all configured variables
|
|
131
|
+
uv run h2mare run
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Development
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
# Run the full test suite
|
|
138
|
+
uv run pytest tests/
|
|
139
|
+
|
|
140
|
+
# Run a single test file
|
|
141
|
+
uv run pytest tests/test_zarr_catalog.py -v
|
|
142
|
+
|
|
143
|
+
# Format code
|
|
144
|
+
uv run black h2mare/
|
|
145
|
+
uv run isort h2mare/
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Built with
|
|
149
|
+
|
|
150
|
+
| Library | Role |
|
|
151
|
+
|---------|------|
|
|
152
|
+
| [xarray](https://xarray.dev/) | N-dimensional labelled arrays and NetCDF/Zarr I/O |
|
|
153
|
+
| [zarr](https://zarr.dev/) | Chunked, compressed array storage |
|
|
154
|
+
| [dask](https://www.dask.org/) | Parallel and out-of-core computation |
|
|
155
|
+
| [polars](https://pola.rs/) | Fast DataFrame engine for extracted time series |
|
|
156
|
+
| [geopandas](https://geopandas.org/) | Geometry-based spatial extraction |
|
|
157
|
+
| [copernicusmarine](https://pypi.org/project/copernicusmarine/) | CMEMS dataset access |
|
|
158
|
+
| [cdsapi](https://pypi.org/project/cdsapi/) | ERA5 / CDS dataset access |
|
|
159
|
+
|
|
160
|
+
## Contributing
|
|
161
|
+
|
|
162
|
+
Contributions are welcome! Please feel free to submit issues or pull requests on [GitHub](https://github.com/h2ugoparra/h2mare.git).
|
|
163
|
+
|
|
164
|
+
## License
|
|
165
|
+
|
|
166
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
167
|
+
|
|
168
|
+
## AI Assistance
|
|
169
|
+
|
|
170
|
+
Parts of this codebase were developed with the help of [Claude](https://claude.ai) (Anthropic).
|
|
171
|
+
|
|
172
|
+
## Acknowledgments
|
|
173
|
+
|
|
174
|
+
This project was developed under the framework of [COSTA project](https://costaproject.org/en/). This project relies on data from Copernicus Marine Service, AVISO, Copernicus Climate Data Store, and NOAA NCEI. We gratefully acknowledge these organizations for providing open access to their datasets.
|
h2mare-0.1.0/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# H2MARE - Geospatial Processing for Climate and Ocean Data
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+
|
|
5
|
+
A Python pipeline for downloading and preprocessing multi-source oceanographic and atmospheric data into analysis-ready formats. H2MARE streamlines the acquisition and harmonization of data from major climate and ocean observation services, optimized for large-scale spatiotemporal analysis.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Multi-source data integration**: Download and process data from CMEMS, AVISO, and ERA5.
|
|
10
|
+
- **Variable grouping**: Organize related variables using configurable keys.
|
|
11
|
+
- **Format conversion**: Automated conversion from NetCDF/GRIB to optimized Zarr and Parquet format
|
|
12
|
+
- **Data compilation**: Regrid and interpolate multi-resolution datasets to a common grid
|
|
13
|
+
- **Point and geometry extraction**: Extract time series for specific locations or spatial features
|
|
14
|
+
|
|
15
|
+
## Data Sources
|
|
16
|
+
|
|
17
|
+
H2MARE supports the following data providers API keys and authentication are required for each:
|
|
18
|
+
|
|
19
|
+
- **[CMEMS](https://marine.copernicus.eu/)** - Copernicus Marine Service: Satellite and in-situ ocean observations
|
|
20
|
+
- **[AVISO](https://www.aviso.altimetry.fr/en/home.html)** - Archiving, Validation and Interpretation of Satellite Oceanographic data
|
|
21
|
+
- **[CDS-ERA5](https://cds.climate.copernicus.eu/)** - ERA5 hourly atmospheric reanalysis (1940-present)
|
|
22
|
+
*Hersbach, H., et al. (2023). DOI: 10.24381/cds.adbb2d47*
|
|
23
|
+
|
|
24
|
+
**Note**: Refer to each provider's documentation for authentication setup before use.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
### Prerequisites
|
|
29
|
+
|
|
30
|
+
- Python >= 3.9
|
|
31
|
+
- [uv](https://docs.astral.sh/uv/) — fast Python package and project manager
|
|
32
|
+
- Sufficient disk space for downloaded datasets (varies by region and time range)
|
|
33
|
+
|
|
34
|
+
### Install from source
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
git clone https://github.com/h2ugoparra/h2mare.git
|
|
38
|
+
cd h2mare
|
|
39
|
+
uv sync # installs all dependencies into .venv
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
For development (includes pytest, black, isort):
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uv sync --extra dev
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Configuration
|
|
49
|
+
|
|
50
|
+
Create .env file with external storage path:
|
|
51
|
+
|
|
52
|
+
```env
|
|
53
|
+
STORE_DIR=/path/to/your/storage
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Key variables groups
|
|
57
|
+
|
|
58
|
+
Edit `config.yaml` to define variable groups and processing parameters.
|
|
59
|
+
|
|
60
|
+
### Data Flow
|
|
61
|
+
|
|
62
|
+
- **Dowload** - Raw NetCDF/GRIB files are fetched from configurated sources and saved at specified time resolution (monthly or yearly) as native-resolution Zarr files.
|
|
63
|
+
- **Compilation** (`h2mare/processing/compiler.py`) - Preprocessed data is regridded to a defined spatial/temporal resolution and geographic extent (configured via 'h2ds' key in `config.yaml`)
|
|
64
|
+
- **Extraction** (`h2mare/processing/extractor.py`) - Point (CSV files) or geometry (SHP files) data extraction from xarray datasets.
|
|
65
|
+
|
|
66
|
+
## Quick Start
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
# Download and process a single variable for a specific date range
|
|
70
|
+
uv run h2mare run sst --start-date 2021-01-01 --end-date 2021-12-31
|
|
71
|
+
|
|
72
|
+
# Multiple variables at once (space-separated)
|
|
73
|
+
uv run h2mare run seapodym mld o2 chl
|
|
74
|
+
|
|
75
|
+
# Infer missing dates from the existing store and download what's new
|
|
76
|
+
uv run h2mare run sst
|
|
77
|
+
|
|
78
|
+
# Download only (skip Zarr conversion)
|
|
79
|
+
uv run h2mare run sst --no-process
|
|
80
|
+
|
|
81
|
+
# Validate configuration without downloading
|
|
82
|
+
uv run h2mare run sst --dry-run
|
|
83
|
+
|
|
84
|
+
# Process all configured variables
|
|
85
|
+
uv run h2mare run
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Development
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
# Run the full test suite
|
|
92
|
+
uv run pytest tests/
|
|
93
|
+
|
|
94
|
+
# Run a single test file
|
|
95
|
+
uv run pytest tests/test_zarr_catalog.py -v
|
|
96
|
+
|
|
97
|
+
# Format code
|
|
98
|
+
uv run black h2mare/
|
|
99
|
+
uv run isort h2mare/
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Built with
|
|
103
|
+
|
|
104
|
+
| Library | Role |
|
|
105
|
+
|---------|------|
|
|
106
|
+
| [xarray](https://xarray.dev/) | N-dimensional labelled arrays and NetCDF/Zarr I/O |
|
|
107
|
+
| [zarr](https://zarr.dev/) | Chunked, compressed array storage |
|
|
108
|
+
| [dask](https://www.dask.org/) | Parallel and out-of-core computation |
|
|
109
|
+
| [polars](https://pola.rs/) | Fast DataFrame engine for extracted time series |
|
|
110
|
+
| [geopandas](https://geopandas.org/) | Geometry-based spatial extraction |
|
|
111
|
+
| [copernicusmarine](https://pypi.org/project/copernicusmarine/) | CMEMS dataset access |
|
|
112
|
+
| [cdsapi](https://pypi.org/project/cdsapi/) | ERA5 / CDS dataset access |
|
|
113
|
+
|
|
114
|
+
## Contributing
|
|
115
|
+
|
|
116
|
+
Contributions are welcome! Please feel free to submit issues or pull requests on [GitHub](https://github.com/h2ugoparra/h2mare.git).
|
|
117
|
+
|
|
118
|
+
## License
|
|
119
|
+
|
|
120
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
121
|
+
|
|
122
|
+
## AI Assistance
|
|
123
|
+
|
|
124
|
+
Parts of this codebase were developed with the help of [Claude](https://claude.ai) (Anthropic).
|
|
125
|
+
|
|
126
|
+
## Acknowledgments
|
|
127
|
+
|
|
128
|
+
This project was developed under the framework of [COSTA project](https://costaproject.org/en/). This project relies on data from Copernicus Marine Service, AVISO, Copernicus Climate Data Store, and NOAA NCEI. We gratefully acknowledge these organizations for providing open access to their datasets.
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare - Geospatial Processing for Climate and Ocean Data
|
|
3
|
+
|
|
4
|
+
Main components:
|
|
5
|
+
- config: Project paths and settings
|
|
6
|
+
- models: Data models for configuration
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
|
|
11
|
+
from .config import settings
|
|
12
|
+
from .models import AppConfig, KeyVarConfigEntry, VariablesConfig
|
|
13
|
+
from .types import BBox, DateLike, DateRange, DownloadTask, TimeResolution
|
|
14
|
+
from .validators import validate_time_resolution, validate_var_key, validate_var_keys
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"settings",
|
|
18
|
+
"AppConfig",
|
|
19
|
+
"VariablesConfig",
|
|
20
|
+
"KeyVarConfigEntry",
|
|
21
|
+
"DateLike",
|
|
22
|
+
"DateRange",
|
|
23
|
+
"BBox",
|
|
24
|
+
"TimeResolution",
|
|
25
|
+
"DownloadTask",
|
|
26
|
+
"validate_var_key",
|
|
27
|
+
"validate_var_keys",
|
|
28
|
+
"validate_time_resolution",
|
|
29
|
+
]
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""H2GIS command-line interface."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from h2mare.cli.catalog import catalog
|
|
6
|
+
from h2mare.cli.compile import compile
|
|
7
|
+
from h2mare.cli.main import run
|
|
8
|
+
from h2mare.cli.nc2zarr import convert
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(
|
|
11
|
+
name="h2mare",
|
|
12
|
+
help="Climate and ocean data pipeline — download, convert, and inspect.",
|
|
13
|
+
no_args_is_help=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
app.command("run", help="Download and convert data for one or more variable keys.")(run)
|
|
17
|
+
app.command(
|
|
18
|
+
"convert", help="Convert downloaded NetCDF/GRIB files to Zarr (no download)."
|
|
19
|
+
)(convert)
|
|
20
|
+
app.command("catalog", help="Inspect ZarrCatalog metadata for a variable.")(catalog)
|
|
21
|
+
app.command(
|
|
22
|
+
"compile",
|
|
23
|
+
help="Merge per-variable Zarr stores into the unified h2ds compiled dataset.",
|
|
24
|
+
)(compile)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare catalog — inspect ZarrCatalog metadata for a variable.
|
|
3
|
+
|
|
4
|
+
Shows coverage, file count, variables, and per-dataset breakdown from the
|
|
5
|
+
local Parquet index without opening any Zarr files.
|
|
6
|
+
|
|
7
|
+
Examples
|
|
8
|
+
--------
|
|
9
|
+
# Summary for SST
|
|
10
|
+
uv run h2mare catalog sst
|
|
11
|
+
|
|
12
|
+
# Summary for all configured variables
|
|
13
|
+
uv run h2mare catalog --all
|
|
14
|
+
|
|
15
|
+
# Show individual catalog rows
|
|
16
|
+
uv run h2mare catalog sst --rows
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import typer
|
|
23
|
+
from loguru import logger
|
|
24
|
+
|
|
25
|
+
from h2mare.config import settings
|
|
26
|
+
|
|
27
|
+
app = typer.Typer()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _print_catalog(var_key: str, show_rows: bool) -> None:
|
|
31
|
+
from h2mare.storage.zarr_catalog import ZarrCatalog
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
cat = ZarrCatalog(var_key)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
typer.echo(f" [{var_key}] Could not load catalog: {e}", err=True)
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
df = cat.df
|
|
40
|
+
summary = cat.summary()
|
|
41
|
+
cov = summary.get("time_coverage")
|
|
42
|
+
|
|
43
|
+
typer.echo(f"\nZarrCatalog — {var_key.upper()}")
|
|
44
|
+
typer.echo(f" Files : {summary['num_files']}")
|
|
45
|
+
|
|
46
|
+
if cov and cov != "No data":
|
|
47
|
+
typer.echo(f" Coverage : {cov.start.date()} → {cov.end.date()}")
|
|
48
|
+
else:
|
|
49
|
+
typer.echo(f" Coverage : No data")
|
|
50
|
+
|
|
51
|
+
variables = summary.get("variables") or set()
|
|
52
|
+
typer.echo(f" Variables : {', '.join(sorted(variables)) if variables else '—'}")
|
|
53
|
+
typer.echo(f" Timesteps : {summary.get('total_timesteps', '—')}")
|
|
54
|
+
typer.echo(f" Store : {summary['store_root']}")
|
|
55
|
+
typer.echo(f" Catalog : {summary['catalog_path']}")
|
|
56
|
+
last = summary.get("last_scanned")
|
|
57
|
+
typer.echo(
|
|
58
|
+
f" Scanned : {last.strftime('%Y-%m-%d %H:%M:%S') if pd.notna(last) else '—'}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if not df.empty and "dataset" in df.columns:
|
|
62
|
+
typer.echo("\n Dataset breakdown:")
|
|
63
|
+
for dataset, group in df.groupby("dataset", sort=True):
|
|
64
|
+
start = group["start_date"].min()
|
|
65
|
+
end = group["end_date"].max()
|
|
66
|
+
n_ts = (
|
|
67
|
+
group["num_timesteps"].sum()
|
|
68
|
+
if "num_timesteps" in group.columns
|
|
69
|
+
else "—"
|
|
70
|
+
)
|
|
71
|
+
typer.echo(f" {dataset}")
|
|
72
|
+
typer.echo(f" {start.date()} → {end.date()} ({n_ts} timesteps)")
|
|
73
|
+
|
|
74
|
+
if show_rows and not df.empty:
|
|
75
|
+
cols = [
|
|
76
|
+
c
|
|
77
|
+
for c in ["filename", "dataset", "start_date", "end_date", "num_timesteps"]
|
|
78
|
+
if c in df.columns
|
|
79
|
+
]
|
|
80
|
+
typer.echo(f"\n Rows:\n{df[cols].to_string(index=False)}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def catalog(
|
|
84
|
+
var_key: Optional[str] = typer.Argument(
|
|
85
|
+
None,
|
|
86
|
+
help="Variable key to inspect (e.g. sst, ssh). Omit with --all to show every variable.",
|
|
87
|
+
),
|
|
88
|
+
all_vars: bool = typer.Option(
|
|
89
|
+
False,
|
|
90
|
+
"--all",
|
|
91
|
+
"-a",
|
|
92
|
+
is_flag=True,
|
|
93
|
+
help="Show catalog summary for all variables configured in config.yaml.",
|
|
94
|
+
),
|
|
95
|
+
show_rows: bool = typer.Option(
|
|
96
|
+
False,
|
|
97
|
+
"--rows",
|
|
98
|
+
"-r",
|
|
99
|
+
is_flag=True,
|
|
100
|
+
help="Print individual catalog rows (filename, dataset, dates, timesteps).",
|
|
101
|
+
),
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Inspect ZarrCatalog metadata: coverage, file count, and per-dataset breakdown."""
|
|
104
|
+
|
|
105
|
+
if not var_key and not all_vars:
|
|
106
|
+
typer.echo("Provide a variable key or use --all.", err=True)
|
|
107
|
+
raise typer.Exit(code=1)
|
|
108
|
+
|
|
109
|
+
keys = list(settings.app_config.variables.keys()) if all_vars else [var_key]
|
|
110
|
+
|
|
111
|
+
for key in keys:
|
|
112
|
+
if key not in settings.app_config.variables:
|
|
113
|
+
typer.echo(
|
|
114
|
+
f"Unknown variable key '{key}'. Available: {', '.join(settings.app_config.variables)}.",
|
|
115
|
+
err=True,
|
|
116
|
+
)
|
|
117
|
+
continue
|
|
118
|
+
_print_catalog(key, show_rows)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
app.command()(catalog)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare compile — merge per-variable Zarr stores into a unified h2ds dataset.
|
|
3
|
+
|
|
4
|
+
Reads the individual per-variable Zarr stores and interpolates them to a
|
|
5
|
+
common 0.25° daily grid, writing the result as the h2ds compiled dataset.
|
|
6
|
+
When no dates are given the step infers what is missing from the local store.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
# Compile all available variables (dates inferred from store)
|
|
11
|
+
uv run h2mare compile
|
|
12
|
+
|
|
13
|
+
# Compile specific variables over a date range
|
|
14
|
+
uv run h2mare compile -v sst -v ssh -v mld --start-date 2024-01-01 --end-date 2024-12-31
|
|
15
|
+
|
|
16
|
+
# Compile with a custom store path
|
|
17
|
+
uv run h2mare compile --store-path D:/GlobalData
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import List, Optional
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import typer
|
|
25
|
+
from loguru import logger
|
|
26
|
+
|
|
27
|
+
from h2mare.config import settings
|
|
28
|
+
|
|
29
|
+
app = typer.Typer()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def compile(
|
|
33
|
+
vars: Optional[List[str]] = typer.Option(
|
|
34
|
+
None,
|
|
35
|
+
"--vars",
|
|
36
|
+
"-v",
|
|
37
|
+
help=(
|
|
38
|
+
"Variable key(s) to compile (repeat for multiple: -v sst -v ssh). "
|
|
39
|
+
"Defaults to all available keys."
|
|
40
|
+
),
|
|
41
|
+
),
|
|
42
|
+
start_date: Optional[str] = typer.Option(
|
|
43
|
+
None,
|
|
44
|
+
"--start-date",
|
|
45
|
+
help="Start date (YYYY-MM-DD). Must be paired with --end-date.",
|
|
46
|
+
),
|
|
47
|
+
end_date: Optional[str] = typer.Option(
|
|
48
|
+
None,
|
|
49
|
+
"--end-date",
|
|
50
|
+
help="End date (YYYY-MM-DD). Must be paired with --start-date.",
|
|
51
|
+
),
|
|
52
|
+
store_path: Optional[Path] = typer.Option(
|
|
53
|
+
None,
|
|
54
|
+
"--store-path",
|
|
55
|
+
help="Override the Zarr store root (defaults to STORE_DIR from .env).",
|
|
56
|
+
),
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Merge per-variable Zarr stores into the unified h2ds compiled dataset."""
|
|
59
|
+
|
|
60
|
+
log_path = settings.LOGS_DIR / "h2mare.log"
|
|
61
|
+
logger.add(log_path, level="INFO")
|
|
62
|
+
|
|
63
|
+
if bool(start_date) ^ bool(end_date):
|
|
64
|
+
typer.echo(
|
|
65
|
+
"Error: --start-date and --end-date must be provided together.", err=True
|
|
66
|
+
)
|
|
67
|
+
raise typer.Exit(code=1)
|
|
68
|
+
|
|
69
|
+
if start_date and end_date:
|
|
70
|
+
start_ts = pd.Timestamp(start_date)
|
|
71
|
+
end_ts = pd.Timestamp(end_date)
|
|
72
|
+
if start_ts >= end_ts:
|
|
73
|
+
typer.echo(
|
|
74
|
+
f"Error: --start-date ({start_date}) must be before --end-date ({end_date}).",
|
|
75
|
+
err=True,
|
|
76
|
+
)
|
|
77
|
+
raise typer.Exit(code=1)
|
|
78
|
+
|
|
79
|
+
if vars:
|
|
80
|
+
available = set(settings.app_config.variables.keys())
|
|
81
|
+
unknown = set(vars) - available
|
|
82
|
+
if unknown:
|
|
83
|
+
typer.echo(
|
|
84
|
+
f"Error: unknown variable key(s): {', '.join(sorted(unknown))}. "
|
|
85
|
+
f"Available: {', '.join(sorted(available))}.",
|
|
86
|
+
err=True,
|
|
87
|
+
)
|
|
88
|
+
raise typer.Exit(code=1)
|
|
89
|
+
|
|
90
|
+
from h2mare.processing.compiler import Compiler
|
|
91
|
+
|
|
92
|
+
Compiler(remote_store_root=store_path or settings.STORE_DIR).run(
|
|
93
|
+
start_date=start_date,
|
|
94
|
+
end_date=end_date,
|
|
95
|
+
var_keys=list(vars) if vars else None,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
app.command()(compile)
|