h2mare 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. h2mare/__init__.py +29 -0
  2. h2mare/cli/__init__.py +24 -0
  3. h2mare/cli/catalog.py +121 -0
  4. h2mare/cli/compile.py +99 -0
  5. h2mare/cli/main.py +154 -0
  6. h2mare/cli/nc2zarr.py +88 -0
  7. h2mare/config.py +205 -0
  8. h2mare/downloader/__init__.py +19 -0
  9. h2mare/downloader/aviso_downloader.py +453 -0
  10. h2mare/downloader/base.py +104 -0
  11. h2mare/downloader/cds_downloader.py +224 -0
  12. h2mare/downloader/cmems_downloader.py +534 -0
  13. h2mare/downloader/cmems_utils.py +159 -0
  14. h2mare/downloader/commons.py +70 -0
  15. h2mare/format_converters/__init__.py +9 -0
  16. h2mare/format_converters/netcdf2zarr.py +441 -0
  17. h2mare/format_converters/parquet2csv.py +155 -0
  18. h2mare/format_converters/zarr2parquet.py +105 -0
  19. h2mare/models.py +56 -0
  20. h2mare/pipeline_manager.py +104 -0
  21. h2mare/processing/__init__.py +27 -0
  22. h2mare/processing/compiler.py +367 -0
  23. h2mare/processing/core/__init__.py +0 -0
  24. h2mare/processing/core/aviso.py +519 -0
  25. h2mare/processing/core/cds.py +613 -0
  26. h2mare/processing/core/cmems.py +76 -0
  27. h2mare/processing/core/fronts.py +285 -0
  28. h2mare/processing/extractor.py +989 -0
  29. h2mare/processing/registry.py +28 -0
  30. h2mare/storage/__init__.py +36 -0
  31. h2mare/storage/coverage.py +69 -0
  32. h2mare/storage/parquet_helpers.py +201 -0
  33. h2mare/storage/parquet_indexer.py +712 -0
  34. h2mare/storage/parquet_plotter.py +191 -0
  35. h2mare/storage/storage.py +170 -0
  36. h2mare/storage/xarray_helpers.py +156 -0
  37. h2mare/storage/zarr_catalog.py +1177 -0
  38. h2mare/types.py +342 -0
  39. h2mare/utils/__init__.py +26 -0
  40. h2mare/utils/datetime_utils.py +54 -0
  41. h2mare/utils/files_io.py +162 -0
  42. h2mare/utils/labels.py +90 -0
  43. h2mare/utils/logging_utils.py +28 -0
  44. h2mare/utils/paths.py +78 -0
  45. h2mare/utils/plot.py +322 -0
  46. h2mare/utils/spatial.py +112 -0
  47. h2mare/validators.py +88 -0
  48. h2mare-0.1.0.dist-info/METADATA +174 -0
  49. h2mare-0.1.0.dist-info/RECORD +53 -0
  50. h2mare-0.1.0.dist-info/WHEEL +5 -0
  51. h2mare-0.1.0.dist-info/entry_points.txt +2 -0
  52. h2mare-0.1.0.dist-info/licenses/LICENSE.txt +21 -0
  53. h2mare-0.1.0.dist-info/top_level.txt +1 -0
h2mare/__init__.py ADDED
@@ -0,0 +1,29 @@
1
+ """
2
+ h2mare - Geospatial Processing for Climate and Ocean Data
3
+
4
+ Main components:
5
+ - config: Project paths and settings
6
+ - models: Data models for configuration
7
+ """
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ from .config import settings
12
+ from .models import AppConfig, KeyVarConfigEntry, VariablesConfig
13
+ from .types import BBox, DateLike, DateRange, DownloadTask, TimeResolution
14
+ from .validators import validate_time_resolution, validate_var_key, validate_var_keys
15
+
16
+ __all__ = [
17
+ "settings",
18
+ "AppConfig",
19
+ "VariablesConfig",
20
+ "KeyVarConfigEntry",
21
+ "DateLike",
22
+ "DateRange",
23
+ "BBox",
24
+ "TimeResolution",
25
+ "DownloadTask",
26
+ "validate_var_key",
27
+ "validate_var_keys",
28
+ "validate_time_resolution",
29
+ ]
h2mare/cli/__init__.py ADDED
@@ -0,0 +1,24 @@
1
+ """H2GIS command-line interface."""
2
+
3
+ import typer
4
+
5
+ from h2mare.cli.catalog import catalog
6
+ from h2mare.cli.compile import compile
7
+ from h2mare.cli.main import run
8
+ from h2mare.cli.nc2zarr import convert
9
+
10
+ app = typer.Typer(
11
+ name="h2mare",
12
+ help="Climate and ocean data pipeline — download, convert, and inspect.",
13
+ no_args_is_help=True,
14
+ )
15
+
16
+ app.command("run", help="Download and convert data for one or more variable keys.")(run)
17
+ app.command(
18
+ "convert", help="Convert downloaded NetCDF/GRIB files to Zarr (no download)."
19
+ )(convert)
20
+ app.command("catalog", help="Inspect ZarrCatalog metadata for a variable.")(catalog)
21
+ app.command(
22
+ "compile",
23
+ help="Merge per-variable Zarr stores into the unified h2ds compiled dataset.",
24
+ )(compile)
h2mare/cli/catalog.py ADDED
@@ -0,0 +1,121 @@
1
+ """
2
+ h2mare catalog — inspect ZarrCatalog metadata for a variable.
3
+
4
+ Shows coverage, file count, variables, and per-dataset breakdown from the
5
+ local Parquet index without opening any Zarr files.
6
+
7
+ Examples
8
+ --------
9
+ # Summary for SST
10
+ uv run h2mare catalog sst
11
+
12
+ # Summary for all configured variables
13
+ uv run h2mare catalog --all
14
+
15
+ # Show individual catalog rows
16
+ uv run h2mare catalog sst --rows
17
+ """
18
+
19
+ from typing import Optional
20
+
21
+ import pandas as pd
22
+ import typer
23
+ from loguru import logger
24
+
25
+ from h2mare.config import settings
26
+
27
+ app = typer.Typer()
28
+
29
+
30
+ def _print_catalog(var_key: str, show_rows: bool) -> None:
31
+ from h2mare.storage.zarr_catalog import ZarrCatalog
32
+
33
+ try:
34
+ cat = ZarrCatalog(var_key)
35
+ except Exception as e:
36
+ typer.echo(f" [{var_key}] Could not load catalog: {e}", err=True)
37
+ return
38
+
39
+ df = cat.df
40
+ summary = cat.summary()
41
+ cov = summary.get("time_coverage")
42
+
43
+ typer.echo(f"\nZarrCatalog — {var_key.upper()}")
44
+ typer.echo(f" Files : {summary['num_files']}")
45
+
46
+ if cov and cov != "No data":
47
+ typer.echo(f" Coverage : {cov.start.date()} → {cov.end.date()}")
48
+ else:
49
+ typer.echo(f" Coverage : No data")
50
+
51
+ variables = summary.get("variables") or set()
52
+ typer.echo(f" Variables : {', '.join(sorted(variables)) if variables else '—'}")
53
+ typer.echo(f" Timesteps : {summary.get('total_timesteps', '—')}")
54
+ typer.echo(f" Store : {summary['store_root']}")
55
+ typer.echo(f" Catalog : {summary['catalog_path']}")
56
+ last = summary.get("last_scanned")
57
+ typer.echo(
58
+ f" Scanned : {last.strftime('%Y-%m-%d %H:%M:%S') if pd.notna(last) else '—'}"
59
+ )
60
+
61
+ if not df.empty and "dataset" in df.columns:
62
+ typer.echo("\n Dataset breakdown:")
63
+ for dataset, group in df.groupby("dataset", sort=True):
64
+ start = group["start_date"].min()
65
+ end = group["end_date"].max()
66
+ n_ts = (
67
+ group["num_timesteps"].sum()
68
+ if "num_timesteps" in group.columns
69
+ else "—"
70
+ )
71
+ typer.echo(f" {dataset}")
72
+ typer.echo(f" {start.date()} → {end.date()} ({n_ts} timesteps)")
73
+
74
+ if show_rows and not df.empty:
75
+ cols = [
76
+ c
77
+ for c in ["filename", "dataset", "start_date", "end_date", "num_timesteps"]
78
+ if c in df.columns
79
+ ]
80
+ typer.echo(f"\n Rows:\n{df[cols].to_string(index=False)}")
81
+
82
+
83
+ def catalog(
84
+ var_key: Optional[str] = typer.Argument(
85
+ None,
86
+ help="Variable key to inspect (e.g. sst, ssh). Omit with --all to show every variable.",
87
+ ),
88
+ all_vars: bool = typer.Option(
89
+ False,
90
+ "--all",
91
+ "-a",
92
+ is_flag=True,
93
+ help="Show catalog summary for all variables configured in config.yaml.",
94
+ ),
95
+ show_rows: bool = typer.Option(
96
+ False,
97
+ "--rows",
98
+ "-r",
99
+ is_flag=True,
100
+ help="Print individual catalog rows (filename, dataset, dates, timesteps).",
101
+ ),
102
+ ) -> None:
103
+ """Inspect ZarrCatalog metadata: coverage, file count, and per-dataset breakdown."""
104
+
105
+ if not var_key and not all_vars:
106
+ typer.echo("Provide a variable key or use --all.", err=True)
107
+ raise typer.Exit(code=1)
108
+
109
+ keys = list(settings.app_config.variables.keys()) if all_vars else [var_key]
110
+
111
+ for key in keys:
112
+ if key not in settings.app_config.variables:
113
+ typer.echo(
114
+ f"Unknown variable key '{key}'. Available: {', '.join(settings.app_config.variables)}.",
115
+ err=True,
116
+ )
117
+ continue
118
+ _print_catalog(key, show_rows)
119
+
120
+
121
+ app.command()(catalog)
h2mare/cli/compile.py ADDED
@@ -0,0 +1,99 @@
1
+ """
2
+ h2mare compile — merge per-variable Zarr stores into a unified h2ds dataset.
3
+
4
+ Reads the individual per-variable Zarr stores and interpolates them to a
5
+ common 0.25° daily grid, writing the result as the h2ds compiled dataset.
6
+ When no dates are given the step infers what is missing from the local store.
7
+
8
+ Examples
9
+ --------
10
+ # Compile all available variables (dates inferred from store)
11
+ uv run h2mare compile
12
+
13
+ # Compile specific variables over a date range
14
+ uv run h2mare compile -v sst -v ssh -v mld --start-date 2024-01-01 --end-date 2024-12-31
15
+
16
+ # Compile with a custom store path
17
+ uv run h2mare compile --store-path D:/GlobalData
18
+ """
19
+
20
+ from pathlib import Path
21
+ from typing import List, Optional
22
+
23
+ import pandas as pd
24
+ import typer
25
+ from loguru import logger
26
+
27
+ from h2mare.config import settings
28
+
29
+ app = typer.Typer()
30
+
31
+
32
+ def compile(
33
+ vars: Optional[List[str]] = typer.Option(
34
+ None,
35
+ "--vars",
36
+ "-v",
37
+ help=(
38
+ "Variable key(s) to compile (repeat for multiple: -v sst -v ssh). "
39
+ "Defaults to all available keys."
40
+ ),
41
+ ),
42
+ start_date: Optional[str] = typer.Option(
43
+ None,
44
+ "--start-date",
45
+ help="Start date (YYYY-MM-DD). Must be paired with --end-date.",
46
+ ),
47
+ end_date: Optional[str] = typer.Option(
48
+ None,
49
+ "--end-date",
50
+ help="End date (YYYY-MM-DD). Must be paired with --start-date.",
51
+ ),
52
+ store_path: Optional[Path] = typer.Option(
53
+ None,
54
+ "--store-path",
55
+ help="Override the Zarr store root (defaults to STORE_DIR from .env).",
56
+ ),
57
+ ) -> None:
58
+ """Merge per-variable Zarr stores into the unified h2ds compiled dataset."""
59
+
60
+ log_path = settings.LOGS_DIR / "h2mare.log"
61
+ logger.add(log_path, level="INFO")
62
+
63
+ if bool(start_date) ^ bool(end_date):
64
+ typer.echo(
65
+ "Error: --start-date and --end-date must be provided together.", err=True
66
+ )
67
+ raise typer.Exit(code=1)
68
+
69
+ if start_date and end_date:
70
+ start_ts = pd.Timestamp(start_date)
71
+ end_ts = pd.Timestamp(end_date)
72
+ if start_ts >= end_ts:
73
+ typer.echo(
74
+ f"Error: --start-date ({start_date}) must be before --end-date ({end_date}).",
75
+ err=True,
76
+ )
77
+ raise typer.Exit(code=1)
78
+
79
+ if vars:
80
+ available = set(settings.app_config.variables.keys())
81
+ unknown = set(vars) - available
82
+ if unknown:
83
+ typer.echo(
84
+ f"Error: unknown variable key(s): {', '.join(sorted(unknown))}. "
85
+ f"Available: {', '.join(sorted(available))}.",
86
+ err=True,
87
+ )
88
+ raise typer.Exit(code=1)
89
+
90
+ from h2mare.processing.compiler import Compiler
91
+
92
+ Compiler(remote_store_root=store_path or settings.STORE_DIR).run(
93
+ start_date=start_date,
94
+ end_date=end_date,
95
+ var_keys=list(vars) if vars else None,
96
+ )
97
+
98
+
99
+ app.command()(compile)
h2mare/cli/main.py ADDED
@@ -0,0 +1,154 @@
1
+ """
2
+ h2mare run — download and convert pipeline.
3
+
4
+ Downloads raw data from configured providers and converts it to Zarr.
5
+ When no dates are given the pipeline infers what is missing from the
6
+ local store and downloads only the gap.
7
+
8
+ Examples
9
+ --------
10
+ # First-time download — dates must be explicit
11
+ uv run h2mare run -v sst --start-date 2021-01-01 --end-date 2021-12-31
12
+
13
+ # Update existing store (dates inferred automatically)
14
+ uv run h2mare run -v sst
15
+
16
+ # Multiple variables at once
17
+ uv run h2mare run -v seapodym -v mld -v o2 -v chl
18
+
19
+ # Download only, skip Zarr conversion
20
+ uv run h2mare run -v sst --no-convert
21
+
22
+ # Skip the compile step after conversion
23
+ uv run h2mare run -v sst --no-compile
24
+
25
+ # Validate configuration without downloading
26
+ uv run h2mare run -v sst --dry-run
27
+
28
+ # Process all variables in config.yaml
29
+ uv run h2mare run
30
+ """
31
+
32
+ import logging
33
+ from pathlib import Path
34
+ from typing import List, Optional
35
+
36
+ import pandas as pd
37
+ import typer
38
+ from loguru import logger
39
+
40
+ from h2mare.config import settings
41
+ from h2mare.downloader.aviso_downloader import AVISODownloader
42
+ from h2mare.downloader.cds_downloader import CDSDownloader
43
+ from h2mare.downloader.cmems_downloader import CMEMSDownloader
44
+ from h2mare.pipeline_manager import PipelineManager
45
+
46
+ DOWNLOADER_REGISTRY = {
47
+ "cmems": CMEMSDownloader,
48
+ "aviso": AVISODownloader,
49
+ "cds": CDSDownloader,
50
+ }
51
+
52
+ app = typer.Typer()
53
+
54
+
55
+ def run(
56
+ vars: Optional[List[str]] = typer.Option(
57
+ None,
58
+ "--vars",
59
+ "-v",
60
+ help=(
61
+ "Variable key(s) to process (repeat for multiple: -v sst -v ssh). "
62
+ "Defaults to all keys in config.yaml."
63
+ ),
64
+ ),
65
+ start_date: Optional[str] = typer.Option(
66
+ None,
67
+ "--start-date",
68
+ help="Start date (YYYY-MM-DD). Must be paired with --end-date.",
69
+ ),
70
+ end_date: Optional[str] = typer.Option(
71
+ None,
72
+ "--end-date",
73
+ help="End date (YYYY-MM-DD). Must be paired with --start-date.",
74
+ ),
75
+ store_path: Optional[Path] = typer.Option(
76
+ None,
77
+ "--store-path",
78
+ help="Override the Zarr store root (defaults to STORE_DIR from .env).",
79
+ ),
80
+ dry_run: bool = typer.Option(
81
+ False,
82
+ "--dry-run",
83
+ is_flag=True,
84
+ help="Plan the download and log tasks without fetching any data.",
85
+ ),
86
+ no_convert: bool = typer.Option(
87
+ False,
88
+ "--no-convert",
89
+ is_flag=True,
90
+ help="Download raw files but skip Zarr conversion and compile.",
91
+ ),
92
+ no_compile: bool = typer.Option(
93
+ False,
94
+ "--no-compile",
95
+ is_flag=True,
96
+ help="Skip the compile step (h2ds dataset merge) after Zarr conversion.",
97
+ ),
98
+ ) -> None:
99
+ """Download and convert climate/ocean data for one or more variable keys."""
100
+
101
+ log_path = settings.LOGS_DIR / f"h2mare.log"
102
+ logger.add(log_path, level="INFO")
103
+ logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
104
+
105
+ # Validate date arguments
106
+ if bool(start_date) ^ bool(end_date):
107
+ typer.echo(
108
+ "Error: --start-date and --end-date must be provided together.", err=True
109
+ )
110
+ raise typer.Exit(code=1)
111
+
112
+ if start_date and end_date:
113
+ start_ts = pd.Timestamp(start_date)
114
+ end_ts = pd.Timestamp(end_date)
115
+ if start_ts >= end_ts:
116
+ typer.echo(
117
+ f"Error: --start-date ({start_date}) must be before --end-date ({end_date}).",
118
+ err=True,
119
+ )
120
+ raise typer.Exit(code=1)
121
+
122
+ # Validate variable keys
123
+ available = set(settings.app_config.variables.keys())
124
+ selected = list(vars) if vars else list(available)
125
+ unknown = set(selected) - available
126
+ if unknown:
127
+ typer.echo(
128
+ f"Error: unknown variable key(s): {', '.join(sorted(unknown))}. "
129
+ f"Available: {', '.join(sorted(available))}.",
130
+ err=True,
131
+ )
132
+ raise typer.Exit(code=1)
133
+
134
+ store_root = store_path or settings.STORE_DIR
135
+ if store_root is None:
136
+ typer.echo(
137
+ "Error: STORE_DIR is not set. Define it in .env or pass --store-path.",
138
+ err=True,
139
+ )
140
+ raise typer.Exit(code=1)
141
+
142
+ PipelineManager(
143
+ app_config=settings.app_config,
144
+ registry=DOWNLOADER_REGISTRY,
145
+ store_root=store_root,
146
+ dry_run=dry_run,
147
+ start_date=pd.Timestamp(start_date) if start_date else None,
148
+ end_date=pd.Timestamp(end_date) if end_date else None,
149
+ no_convert=no_convert,
150
+ no_compile=no_compile,
151
+ ).run(variables=selected)
152
+
153
+
154
+ app.command()(run)
h2mare/cli/nc2zarr.py ADDED
@@ -0,0 +1,88 @@
1
+ """
2
+ nc2zarr — standalone NetCDF/GRIB → Zarr converter.
3
+
4
+ Converts downloaded raw files for one or more variable keys into processed
5
+ Zarr stores without running the full download pipeline. Use this when you
6
+ want to re-process existing downloads, convert files placed manually in the
7
+ downloads directory, or recover from a failed conversion step.
8
+
9
+ Examples
10
+ --------
11
+ Convert SST downloads from the default downloads directory (DOWNLOADS_DIR/.env):
12
+
13
+ uv run h2mare convert -v sst
14
+
15
+ Convert multiple variables in one call:
16
+
17
+ uv run h2mare convert -v sst -v ssh -v mld
18
+
19
+ Convert files from a custom input directory:
20
+
21
+ uv run h2mare convert -v sst --in-dir /data/raw/CMEMS_SST
22
+
23
+ Notes
24
+ -----
25
+ - Raw files must match the regex ``pattern`` defined for the variable in
26
+ ``config.yaml``.
27
+ - Zarr stores are written to ``STORE_DIR/<local_folder>/`` (from ``.env``).
28
+ - Provenance sidecars (``*_prov.json``) are written alongside each Zarr
29
+ only when a ``h2mare_manifest.json`` exists in the input directory (created
30
+ automatically by CMEMSDownloader after a download run).
31
+ """
32
+
33
+ from pathlib import Path
34
+ from typing import List, Optional
35
+
36
+ import typer
37
+ from loguru import logger
38
+
39
+ from h2mare.config import settings
40
+ from h2mare.format_converters.netcdf2zarr import Netcdf2Zarr
41
+
42
+ app = typer.Typer(
43
+ help="Convert downloaded NetCDF/GRIB files to Zarr without re-downloading."
44
+ )
45
+
46
+
47
+ @app.command()
48
+ def convert(
49
+ var_keys: List[str] = typer.Option(
50
+ ...,
51
+ "--vars",
52
+ "-v",
53
+ help=(
54
+ "Variable key to convert (repeat for multiple: -v sst -v ssh). "
55
+ "Must match a key defined in config.yaml — "
56
+ "e.g. sst, ssh, mld, chl, fsle."
57
+ ),
58
+ ),
59
+ input_root: Optional[Path] = typer.Option(
60
+ None,
61
+ "--in-dir",
62
+ help=(
63
+ "Root directory that contains the downloaded raw files. "
64
+ "The variable's local_folder is appended automatically. "
65
+ "Defaults to DOWNLOADS_DIR from .env."
66
+ ),
67
+ ),
68
+ ) -> None:
69
+ """Convert downloaded raw NetCDF/GRIB files to Zarr for one or more variables."""
70
+
71
+ base_dir = input_root if input_root is not None else settings.DOWNLOADS_DIR
72
+
73
+ log_path = settings.LOGS_DIR / f"{Path(__file__).stem}.log"
74
+ logger.add(log_path, level="INFO")
75
+
76
+ for var in var_keys:
77
+ var_config = settings.app_config.variables.get(var)
78
+ if var_config is None:
79
+ logger.error(f"Unknown variable key '{var}' — skipping. Check config.yaml.")
80
+ continue
81
+
82
+ in_dir = base_dir / var_config.local_folder
83
+ logger.info(f"Converting '{var}' from {in_dir}")
84
+ Netcdf2Zarr(var, download_root=in_dir).run()
85
+
86
+
87
+ if __name__ == "__main__":
88
+ app()