h2mare 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. h2mare-0.1.0/LICENSE.txt +21 -0
  2. h2mare-0.1.0/PKG-INFO +174 -0
  3. h2mare-0.1.0/README.md +128 -0
  4. h2mare-0.1.0/h2mare/__init__.py +29 -0
  5. h2mare-0.1.0/h2mare/cli/__init__.py +24 -0
  6. h2mare-0.1.0/h2mare/cli/catalog.py +121 -0
  7. h2mare-0.1.0/h2mare/cli/compile.py +99 -0
  8. h2mare-0.1.0/h2mare/cli/main.py +154 -0
  9. h2mare-0.1.0/h2mare/cli/nc2zarr.py +88 -0
  10. h2mare-0.1.0/h2mare/config.py +205 -0
  11. h2mare-0.1.0/h2mare/downloader/__init__.py +19 -0
  12. h2mare-0.1.0/h2mare/downloader/aviso_downloader.py +453 -0
  13. h2mare-0.1.0/h2mare/downloader/base.py +104 -0
  14. h2mare-0.1.0/h2mare/downloader/cds_downloader.py +224 -0
  15. h2mare-0.1.0/h2mare/downloader/cmems_downloader.py +534 -0
  16. h2mare-0.1.0/h2mare/downloader/cmems_utils.py +159 -0
  17. h2mare-0.1.0/h2mare/downloader/commons.py +70 -0
  18. h2mare-0.1.0/h2mare/format_converters/__init__.py +9 -0
  19. h2mare-0.1.0/h2mare/format_converters/netcdf2zarr.py +441 -0
  20. h2mare-0.1.0/h2mare/format_converters/parquet2csv.py +155 -0
  21. h2mare-0.1.0/h2mare/format_converters/zarr2parquet.py +105 -0
  22. h2mare-0.1.0/h2mare/models.py +56 -0
  23. h2mare-0.1.0/h2mare/pipeline_manager.py +104 -0
  24. h2mare-0.1.0/h2mare/processing/__init__.py +27 -0
  25. h2mare-0.1.0/h2mare/processing/compiler.py +367 -0
  26. h2mare-0.1.0/h2mare/processing/core/__init__.py +0 -0
  27. h2mare-0.1.0/h2mare/processing/core/aviso.py +519 -0
  28. h2mare-0.1.0/h2mare/processing/core/cds.py +613 -0
  29. h2mare-0.1.0/h2mare/processing/core/cmems.py +76 -0
  30. h2mare-0.1.0/h2mare/processing/core/fronts.py +285 -0
  31. h2mare-0.1.0/h2mare/processing/extractor.py +989 -0
  32. h2mare-0.1.0/h2mare/processing/registry.py +28 -0
  33. h2mare-0.1.0/h2mare/storage/__init__.py +36 -0
  34. h2mare-0.1.0/h2mare/storage/coverage.py +69 -0
  35. h2mare-0.1.0/h2mare/storage/parquet_helpers.py +201 -0
  36. h2mare-0.1.0/h2mare/storage/parquet_indexer.py +712 -0
  37. h2mare-0.1.0/h2mare/storage/parquet_plotter.py +191 -0
  38. h2mare-0.1.0/h2mare/storage/storage.py +170 -0
  39. h2mare-0.1.0/h2mare/storage/xarray_helpers.py +156 -0
  40. h2mare-0.1.0/h2mare/storage/zarr_catalog.py +1177 -0
  41. h2mare-0.1.0/h2mare/types.py +342 -0
  42. h2mare-0.1.0/h2mare/utils/__init__.py +26 -0
  43. h2mare-0.1.0/h2mare/utils/datetime_utils.py +54 -0
  44. h2mare-0.1.0/h2mare/utils/files_io.py +162 -0
  45. h2mare-0.1.0/h2mare/utils/labels.py +90 -0
  46. h2mare-0.1.0/h2mare/utils/logging_utils.py +28 -0
  47. h2mare-0.1.0/h2mare/utils/paths.py +78 -0
  48. h2mare-0.1.0/h2mare/utils/plot.py +322 -0
  49. h2mare-0.1.0/h2mare/utils/spatial.py +112 -0
  50. h2mare-0.1.0/h2mare/validators.py +88 -0
  51. h2mare-0.1.0/h2mare.egg-info/PKG-INFO +174 -0
  52. h2mare-0.1.0/h2mare.egg-info/SOURCES.txt +74 -0
  53. h2mare-0.1.0/h2mare.egg-info/dependency_links.txt +1 -0
  54. h2mare-0.1.0/h2mare.egg-info/entry_points.txt +2 -0
  55. h2mare-0.1.0/h2mare.egg-info/requires.txt +24 -0
  56. h2mare-0.1.0/h2mare.egg-info/top_level.txt +1 -0
  57. h2mare-0.1.0/pyproject.toml +94 -0
  58. h2mare-0.1.0/setup.cfg +4 -0
  59. h2mare-0.1.0/tests/test_aviso_downloader.py +212 -0
  60. h2mare-0.1.0/tests/test_coverage_utils.py +76 -0
  61. h2mare-0.1.0/tests/test_datetime_utils.py +90 -0
  62. h2mare-0.1.0/tests/test_extractor.py +264 -0
  63. h2mare-0.1.0/tests/test_labels.py +66 -0
  64. h2mare-0.1.0/tests/test_logging_utils.py +43 -0
  65. h2mare-0.1.0/tests/test_models.py +157 -0
  66. h2mare-0.1.0/tests/test_parquet_helpers.py +196 -0
  67. h2mare-0.1.0/tests/test_parquet_indexer.py +197 -0
  68. h2mare-0.1.0/tests/test_pipeline_manager.py +233 -0
  69. h2mare-0.1.0/tests/test_plot.py +161 -0
  70. h2mare-0.1.0/tests/test_spatial.py +79 -0
  71. h2mare-0.1.0/tests/test_storage.py +138 -0
  72. h2mare-0.1.0/tests/test_types.py +310 -0
  73. h2mare-0.1.0/tests/test_utils_paths.py +65 -0
  74. h2mare-0.1.0/tests/test_validators.py +68 -0
  75. h2mare-0.1.0/tests/test_xarray_helpers.py +131 -0
  76. h2mare-0.1.0/tests/test_zarr_catalog.py +265 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 h2ugoparra
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
h2mare-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: h2mare
3
+ Version: 0.1.0
4
+ Summary: Downloader and data management tools for climate and ocean datasets.
5
+ Author-email: Hugo Parra <h2ugo.parra@pm.me>
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/h2ugoparra/h2mare
8
+ Keywords: ocean,climate,geospatial,zarr,netcdf,cmems,copernicus,era5
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Science/Research
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
17
+ Classifier: Topic :: Scientific/Engineering :: GIS
18
+ Requires-Python: >=3.11
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE.txt
21
+ Requires-Dist: cartopy>=0.23.0
22
+ Requires-Dist: cdsapi>=0.7.0
23
+ Requires-Dist: cfgrib>=0.9.10
24
+ Requires-Dist: copernicusmarine>=2.0.0
25
+ Requires-Dist: dask>=2024.1.0
26
+ Requires-Dist: duckdb>=1.0.0
27
+ Requires-Dist: eccodes>=1.7.0
28
+ Requires-Dist: ephem>=4.1.0
29
+ Requires-Dist: geopandas>=1.0.0
30
+ Requires-Dist: global_land_mask>=1.0.0
31
+ Requires-Dist: ipython>=8.0.0
32
+ Requires-Dist: loguru>=0.7.0
33
+ Requires-Dist: matplotlib>=3.8.0
34
+ Requires-Dist: msgspec>=0.18.0
35
+ Requires-Dist: netCDF4>=1.6.0
36
+ Requires-Dist: plotly>=5.18.0
37
+ Requires-Dist: polars>=1.0.0
38
+ Requires-Dist: PyYAML>=6.0.0
39
+ Requires-Dist: python-dotenv>=1.0.0
40
+ Requires-Dist: rioxarray>=0.17.0
41
+ Requires-Dist: scipy>=1.13.0
42
+ Requires-Dist: tqdm>=4.66.0
43
+ Requires-Dist: typer>=0.12.0
44
+ Requires-Dist: xarray>=2024.1.0
45
+ Dynamic: license-file
46
+
47
+ # H2MARE - Geospatial Processing for Climate and Ocean Data
48
+
49
+ ![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12%20%7C%203.13-blue)
50
+
51
+ A Python pipeline for downloading and preprocessing multi-source oceanographic and atmospheric data into analysis-ready formats. H2MARE streamlines the acquisition and harmonization of data from major climate and ocean observation services, optimized for large-scale spatiotemporal analysis.
52
+
53
+ ## Features
54
+
55
+ - **Multi-source data integration**: Download and process data from CMEMS, AVISO, and ERA5.
56
+ - **Variable grouping**: Organize related variables using configurable keys.
57
+ - **Format conversion**: Automated conversion from NetCDF/GRIB to optimized Zarr and Parquet format
58
+ - **Data compilation**: Regrid and interpolate multi-resolution datasets to a common grid
59
+ - **Point and geometry extraction**: Extract time series for specific locations or spatial features
60
+
61
+ ## Data Sources
62
+
63
+ H2MARE supports the following data providers API keys and authentication are required for each:
64
+
65
+ - **[CMEMS](https://marine.copernicus.eu/)** - Copernicus Marine Service: Satellite and in-situ ocean observations
66
+ - **[AVISO](https://www.aviso.altimetry.fr/en/home.html)** - Archiving, Validation and Interpretation of Satellite Oceanographic data
67
+ - **[CDS-ERA5](https://cds.climate.copernicus.eu/)** - ERA5 hourly atmospheric reanalysis (1940-present)
68
+ *Hersbach, H., et al. (2023). DOI: 10.24381/cds.adbb2d47*
69
+
70
+ **Note**: Refer to each provider's documentation for authentication setup before use.
71
+
72
+ ## Installation
73
+
74
+ ### Prerequisites
75
+
76
+ - Python >= 3.9
77
+ - [uv](https://docs.astral.sh/uv/) — fast Python package and project manager
78
+ - Sufficient disk space for downloaded datasets (varies by region and time range)
79
+
80
+ ### Install from source
81
+
82
+ ```bash
83
+ git clone https://github.com/h2ugoparra/h2mare.git
84
+ cd h2mare
85
+ uv sync # installs all dependencies into .venv
86
+ ```
87
+
88
+ For development (includes pytest, black, isort):
89
+
90
+ ```bash
91
+ uv sync --extra dev
92
+ ```
93
+
94
+ ## Configuration
95
+
96
+ Create .env file with external storage path:
97
+
98
+ ```env
99
+ STORE_DIR=/path/to/your/storage
100
+ ```
101
+
102
+ ### Key variables groups
103
+
104
+ Edit `config.yaml` to define variable groups and processing parameters.
105
+
106
+ ### Data Flow
107
+
108
+ - **Dowload** - Raw NetCDF/GRIB files are fetched from configurated sources and saved at specified time resolution (monthly or yearly) as native-resolution Zarr files.
109
+ - **Compilation** (`h2mare/processing/compiler.py`) - Preprocessed data is regridded to a defined spatial/temporal resolution and geographic extent (configured via 'h2ds' key in `config.yaml`)
110
+ - **Extraction** (`h2mare/processing/extractor.py`) - Point (CSV files) or geometry (SHP files) data extraction from xarray datasets.
111
+
112
+ ## Quick Start
113
+
114
+ ```bash
115
+ # Download and process a single variable for a specific date range
116
+ uv run h2mare run sst --start-date 2021-01-01 --end-date 2021-12-31
117
+
118
+ # Multiple variables at once (space-separated)
119
+ uv run h2mare run seapodym mld o2 chl
120
+
121
+ # Infer missing dates from the existing store and download what's new
122
+ uv run h2mare run sst
123
+
124
+ # Download only (skip Zarr conversion)
125
+ uv run h2mare run sst --no-process
126
+
127
+ # Validate configuration without downloading
128
+ uv run h2mare run sst --dry-run
129
+
130
+ # Process all configured variables
131
+ uv run h2mare run
132
+ ```
133
+
134
+ ## Development
135
+
136
+ ```bash
137
+ # Run the full test suite
138
+ uv run pytest tests/
139
+
140
+ # Run a single test file
141
+ uv run pytest tests/test_zarr_catalog.py -v
142
+
143
+ # Format code
144
+ uv run black h2mare/
145
+ uv run isort h2mare/
146
+ ```
147
+
148
+ ## Built with
149
+
150
+ | Library | Role |
151
+ |---------|------|
152
+ | [xarray](https://xarray.dev/) | N-dimensional labelled arrays and NetCDF/Zarr I/O |
153
+ | [zarr](https://zarr.dev/) | Chunked, compressed array storage |
154
+ | [dask](https://www.dask.org/) | Parallel and out-of-core computation |
155
+ | [polars](https://pola.rs/) | Fast DataFrame engine for extracted time series |
156
+ | [geopandas](https://geopandas.org/) | Geometry-based spatial extraction |
157
+ | [copernicusmarine](https://pypi.org/project/copernicusmarine/) | CMEMS dataset access |
158
+ | [cdsapi](https://pypi.org/project/cdsapi/) | ERA5 / CDS dataset access |
159
+
160
+ ## Contributing
161
+
162
+ Contributions are welcome! Please feel free to submit issues or pull requests on [GitHub](https://github.com/h2ugoparra/h2mare.git).
163
+
164
+ ## License
165
+
166
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
167
+
168
+ ## AI Assistance
169
+
170
+ Parts of this codebase were developed with the help of [Claude](https://claude.ai) (Anthropic).
171
+
172
+ ## Acknowledgments
173
+
174
+ This project was developed under the framework of [COSTA project](https://costaproject.org/en/). This project relies on data from Copernicus Marine Service, AVISO, Copernicus Climate Data Store, and NOAA NCEI. We gratefully acknowledge these organizations for providing open access to their datasets.
h2mare-0.1.0/README.md ADDED
@@ -0,0 +1,128 @@
1
+ # H2MARE - Geospatial Processing for Climate and Ocean Data
2
+
3
+ ![Python](https://img.shields.io/badge/python-3.11%20%7C%203.12%20%7C%203.13-blue)
4
+
5
+ A Python pipeline for downloading and preprocessing multi-source oceanographic and atmospheric data into analysis-ready formats. H2MARE streamlines the acquisition and harmonization of data from major climate and ocean observation services, optimized for large-scale spatiotemporal analysis.
6
+
7
+ ## Features
8
+
9
+ - **Multi-source data integration**: Download and process data from CMEMS, AVISO, and ERA5.
10
+ - **Variable grouping**: Organize related variables using configurable keys.
11
+ - **Format conversion**: Automated conversion from NetCDF/GRIB to optimized Zarr and Parquet format
12
+ - **Data compilation**: Regrid and interpolate multi-resolution datasets to a common grid
13
+ - **Point and geometry extraction**: Extract time series for specific locations or spatial features
14
+
15
+ ## Data Sources
16
+
17
+ H2MARE supports the following data providers API keys and authentication are required for each:
18
+
19
+ - **[CMEMS](https://marine.copernicus.eu/)** - Copernicus Marine Service: Satellite and in-situ ocean observations
20
+ - **[AVISO](https://www.aviso.altimetry.fr/en/home.html)** - Archiving, Validation and Interpretation of Satellite Oceanographic data
21
+ - **[CDS-ERA5](https://cds.climate.copernicus.eu/)** - ERA5 hourly atmospheric reanalysis (1940-present)
22
+ *Hersbach, H., et al. (2023). DOI: 10.24381/cds.adbb2d47*
23
+
24
+ **Note**: Refer to each provider's documentation for authentication setup before use.
25
+
26
+ ## Installation
27
+
28
+ ### Prerequisites
29
+
30
+ - Python >= 3.9
31
+ - [uv](https://docs.astral.sh/uv/) — fast Python package and project manager
32
+ - Sufficient disk space for downloaded datasets (varies by region and time range)
33
+
34
+ ### Install from source
35
+
36
+ ```bash
37
+ git clone https://github.com/h2ugoparra/h2mare.git
38
+ cd h2mare
39
+ uv sync # installs all dependencies into .venv
40
+ ```
41
+
42
+ For development (includes pytest, black, isort):
43
+
44
+ ```bash
45
+ uv sync --extra dev
46
+ ```
47
+
48
+ ## Configuration
49
+
50
+ Create .env file with external storage path:
51
+
52
+ ```env
53
+ STORE_DIR=/path/to/your/storage
54
+ ```
55
+
56
+ ### Key variables groups
57
+
58
+ Edit `config.yaml` to define variable groups and processing parameters.
59
+
60
+ ### Data Flow
61
+
62
+ - **Dowload** - Raw NetCDF/GRIB files are fetched from configurated sources and saved at specified time resolution (monthly or yearly) as native-resolution Zarr files.
63
+ - **Compilation** (`h2mare/processing/compiler.py`) - Preprocessed data is regridded to a defined spatial/temporal resolution and geographic extent (configured via 'h2ds' key in `config.yaml`)
64
+ - **Extraction** (`h2mare/processing/extractor.py`) - Point (CSV files) or geometry (SHP files) data extraction from xarray datasets.
65
+
66
+ ## Quick Start
67
+
68
+ ```bash
69
+ # Download and process a single variable for a specific date range
70
+ uv run h2mare run sst --start-date 2021-01-01 --end-date 2021-12-31
71
+
72
+ # Multiple variables at once (space-separated)
73
+ uv run h2mare run seapodym mld o2 chl
74
+
75
+ # Infer missing dates from the existing store and download what's new
76
+ uv run h2mare run sst
77
+
78
+ # Download only (skip Zarr conversion)
79
+ uv run h2mare run sst --no-process
80
+
81
+ # Validate configuration without downloading
82
+ uv run h2mare run sst --dry-run
83
+
84
+ # Process all configured variables
85
+ uv run h2mare run
86
+ ```
87
+
88
+ ## Development
89
+
90
+ ```bash
91
+ # Run the full test suite
92
+ uv run pytest tests/
93
+
94
+ # Run a single test file
95
+ uv run pytest tests/test_zarr_catalog.py -v
96
+
97
+ # Format code
98
+ uv run black h2mare/
99
+ uv run isort h2mare/
100
+ ```
101
+
102
+ ## Built with
103
+
104
+ | Library | Role |
105
+ |---------|------|
106
+ | [xarray](https://xarray.dev/) | N-dimensional labelled arrays and NetCDF/Zarr I/O |
107
+ | [zarr](https://zarr.dev/) | Chunked, compressed array storage |
108
+ | [dask](https://www.dask.org/) | Parallel and out-of-core computation |
109
+ | [polars](https://pola.rs/) | Fast DataFrame engine for extracted time series |
110
+ | [geopandas](https://geopandas.org/) | Geometry-based spatial extraction |
111
+ | [copernicusmarine](https://pypi.org/project/copernicusmarine/) | CMEMS dataset access |
112
+ | [cdsapi](https://pypi.org/project/cdsapi/) | ERA5 / CDS dataset access |
113
+
114
+ ## Contributing
115
+
116
+ Contributions are welcome! Please feel free to submit issues or pull requests on [GitHub](https://github.com/h2ugoparra/h2mare.git).
117
+
118
+ ## License
119
+
120
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
121
+
122
+ ## AI Assistance
123
+
124
+ Parts of this codebase were developed with the help of [Claude](https://claude.ai) (Anthropic).
125
+
126
+ ## Acknowledgments
127
+
128
+ This project was developed under the framework of [COSTA project](https://costaproject.org/en/). This project relies on data from Copernicus Marine Service, AVISO, Copernicus Climate Data Store, and NOAA NCEI. We gratefully acknowledge these organizations for providing open access to their datasets.
@@ -0,0 +1,29 @@
1
+ """
2
+ h2mare - Geospatial Processing for Climate and Ocean Data
3
+
4
+ Main components:
5
+ - config: Project paths and settings
6
+ - models: Data models for configuration
7
+ """
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ from .config import settings
12
+ from .models import AppConfig, KeyVarConfigEntry, VariablesConfig
13
+ from .types import BBox, DateLike, DateRange, DownloadTask, TimeResolution
14
+ from .validators import validate_time_resolution, validate_var_key, validate_var_keys
15
+
16
+ __all__ = [
17
+ "settings",
18
+ "AppConfig",
19
+ "VariablesConfig",
20
+ "KeyVarConfigEntry",
21
+ "DateLike",
22
+ "DateRange",
23
+ "BBox",
24
+ "TimeResolution",
25
+ "DownloadTask",
26
+ "validate_var_key",
27
+ "validate_var_keys",
28
+ "validate_time_resolution",
29
+ ]
@@ -0,0 +1,24 @@
1
+ """H2GIS command-line interface."""
2
+
3
+ import typer
4
+
5
+ from h2mare.cli.catalog import catalog
6
+ from h2mare.cli.compile import compile
7
+ from h2mare.cli.main import run
8
+ from h2mare.cli.nc2zarr import convert
9
+
10
+ app = typer.Typer(
11
+ name="h2mare",
12
+ help="Climate and ocean data pipeline — download, convert, and inspect.",
13
+ no_args_is_help=True,
14
+ )
15
+
16
+ app.command("run", help="Download and convert data for one or more variable keys.")(run)
17
+ app.command(
18
+ "convert", help="Convert downloaded NetCDF/GRIB files to Zarr (no download)."
19
+ )(convert)
20
+ app.command("catalog", help="Inspect ZarrCatalog metadata for a variable.")(catalog)
21
+ app.command(
22
+ "compile",
23
+ help="Merge per-variable Zarr stores into the unified h2ds compiled dataset.",
24
+ )(compile)
@@ -0,0 +1,121 @@
1
+ """
2
+ h2mare catalog — inspect ZarrCatalog metadata for a variable.
3
+
4
+ Shows coverage, file count, variables, and per-dataset breakdown from the
5
+ local Parquet index without opening any Zarr files.
6
+
7
+ Examples
8
+ --------
9
+ # Summary for SST
10
+ uv run h2mare catalog sst
11
+
12
+ # Summary for all configured variables
13
+ uv run h2mare catalog --all
14
+
15
+ # Show individual catalog rows
16
+ uv run h2mare catalog sst --rows
17
+ """
18
+
19
+ from typing import Optional
20
+
21
+ import pandas as pd
22
+ import typer
23
+ from loguru import logger
24
+
25
+ from h2mare.config import settings
26
+
27
+ app = typer.Typer()
28
+
29
+
30
+ def _print_catalog(var_key: str, show_rows: bool) -> None:
31
+ from h2mare.storage.zarr_catalog import ZarrCatalog
32
+
33
+ try:
34
+ cat = ZarrCatalog(var_key)
35
+ except Exception as e:
36
+ typer.echo(f" [{var_key}] Could not load catalog: {e}", err=True)
37
+ return
38
+
39
+ df = cat.df
40
+ summary = cat.summary()
41
+ cov = summary.get("time_coverage")
42
+
43
+ typer.echo(f"\nZarrCatalog — {var_key.upper()}")
44
+ typer.echo(f" Files : {summary['num_files']}")
45
+
46
+ if cov and cov != "No data":
47
+ typer.echo(f" Coverage : {cov.start.date()} → {cov.end.date()}")
48
+ else:
49
+ typer.echo(f" Coverage : No data")
50
+
51
+ variables = summary.get("variables") or set()
52
+ typer.echo(f" Variables : {', '.join(sorted(variables)) if variables else '—'}")
53
+ typer.echo(f" Timesteps : {summary.get('total_timesteps', '—')}")
54
+ typer.echo(f" Store : {summary['store_root']}")
55
+ typer.echo(f" Catalog : {summary['catalog_path']}")
56
+ last = summary.get("last_scanned")
57
+ typer.echo(
58
+ f" Scanned : {last.strftime('%Y-%m-%d %H:%M:%S') if pd.notna(last) else '—'}"
59
+ )
60
+
61
+ if not df.empty and "dataset" in df.columns:
62
+ typer.echo("\n Dataset breakdown:")
63
+ for dataset, group in df.groupby("dataset", sort=True):
64
+ start = group["start_date"].min()
65
+ end = group["end_date"].max()
66
+ n_ts = (
67
+ group["num_timesteps"].sum()
68
+ if "num_timesteps" in group.columns
69
+ else "—"
70
+ )
71
+ typer.echo(f" {dataset}")
72
+ typer.echo(f" {start.date()} → {end.date()} ({n_ts} timesteps)")
73
+
74
+ if show_rows and not df.empty:
75
+ cols = [
76
+ c
77
+ for c in ["filename", "dataset", "start_date", "end_date", "num_timesteps"]
78
+ if c in df.columns
79
+ ]
80
+ typer.echo(f"\n Rows:\n{df[cols].to_string(index=False)}")
81
+
82
+
83
+ def catalog(
84
+ var_key: Optional[str] = typer.Argument(
85
+ None,
86
+ help="Variable key to inspect (e.g. sst, ssh). Omit with --all to show every variable.",
87
+ ),
88
+ all_vars: bool = typer.Option(
89
+ False,
90
+ "--all",
91
+ "-a",
92
+ is_flag=True,
93
+ help="Show catalog summary for all variables configured in config.yaml.",
94
+ ),
95
+ show_rows: bool = typer.Option(
96
+ False,
97
+ "--rows",
98
+ "-r",
99
+ is_flag=True,
100
+ help="Print individual catalog rows (filename, dataset, dates, timesteps).",
101
+ ),
102
+ ) -> None:
103
+ """Inspect ZarrCatalog metadata: coverage, file count, and per-dataset breakdown."""
104
+
105
+ if not var_key and not all_vars:
106
+ typer.echo("Provide a variable key or use --all.", err=True)
107
+ raise typer.Exit(code=1)
108
+
109
+ keys = list(settings.app_config.variables.keys()) if all_vars else [var_key]
110
+
111
+ for key in keys:
112
+ if key not in settings.app_config.variables:
113
+ typer.echo(
114
+ f"Unknown variable key '{key}'. Available: {', '.join(settings.app_config.variables)}.",
115
+ err=True,
116
+ )
117
+ continue
118
+ _print_catalog(key, show_rows)
119
+
120
+
121
+ app.command()(catalog)
@@ -0,0 +1,99 @@
1
+ """
2
+ h2mare compile — merge per-variable Zarr stores into a unified h2ds dataset.
3
+
4
+ Reads the individual per-variable Zarr stores and interpolates them to a
5
+ common 0.25° daily grid, writing the result as the h2ds compiled dataset.
6
+ When no dates are given the step infers what is missing from the local store.
7
+
8
+ Examples
9
+ --------
10
+ # Compile all available variables (dates inferred from store)
11
+ uv run h2mare compile
12
+
13
+ # Compile specific variables over a date range
14
+ uv run h2mare compile -v sst -v ssh -v mld --start-date 2024-01-01 --end-date 2024-12-31
15
+
16
+ # Compile with a custom store path
17
+ uv run h2mare compile --store-path D:/GlobalData
18
+ """
19
+
20
+ from pathlib import Path
21
+ from typing import List, Optional
22
+
23
+ import pandas as pd
24
+ import typer
25
+ from loguru import logger
26
+
27
+ from h2mare.config import settings
28
+
29
+ app = typer.Typer()
30
+
31
+
32
+ def compile(
33
+ vars: Optional[List[str]] = typer.Option(
34
+ None,
35
+ "--vars",
36
+ "-v",
37
+ help=(
38
+ "Variable key(s) to compile (repeat for multiple: -v sst -v ssh). "
39
+ "Defaults to all available keys."
40
+ ),
41
+ ),
42
+ start_date: Optional[str] = typer.Option(
43
+ None,
44
+ "--start-date",
45
+ help="Start date (YYYY-MM-DD). Must be paired with --end-date.",
46
+ ),
47
+ end_date: Optional[str] = typer.Option(
48
+ None,
49
+ "--end-date",
50
+ help="End date (YYYY-MM-DD). Must be paired with --start-date.",
51
+ ),
52
+ store_path: Optional[Path] = typer.Option(
53
+ None,
54
+ "--store-path",
55
+ help="Override the Zarr store root (defaults to STORE_DIR from .env).",
56
+ ),
57
+ ) -> None:
58
+ """Merge per-variable Zarr stores into the unified h2ds compiled dataset."""
59
+
60
+ log_path = settings.LOGS_DIR / "h2mare.log"
61
+ logger.add(log_path, level="INFO")
62
+
63
+ if bool(start_date) ^ bool(end_date):
64
+ typer.echo(
65
+ "Error: --start-date and --end-date must be provided together.", err=True
66
+ )
67
+ raise typer.Exit(code=1)
68
+
69
+ if start_date and end_date:
70
+ start_ts = pd.Timestamp(start_date)
71
+ end_ts = pd.Timestamp(end_date)
72
+ if start_ts >= end_ts:
73
+ typer.echo(
74
+ f"Error: --start-date ({start_date}) must be before --end-date ({end_date}).",
75
+ err=True,
76
+ )
77
+ raise typer.Exit(code=1)
78
+
79
+ if vars:
80
+ available = set(settings.app_config.variables.keys())
81
+ unknown = set(vars) - available
82
+ if unknown:
83
+ typer.echo(
84
+ f"Error: unknown variable key(s): {', '.join(sorted(unknown))}. "
85
+ f"Available: {', '.join(sorted(available))}.",
86
+ err=True,
87
+ )
88
+ raise typer.Exit(code=1)
89
+
90
+ from h2mare.processing.compiler import Compiler
91
+
92
+ Compiler(remote_store_root=store_path or settings.STORE_DIR).run(
93
+ start_date=start_date,
94
+ end_date=end_date,
95
+ var_keys=list(vars) if vars else None,
96
+ )
97
+
98
+
99
+ app.command()(compile)