h2mare 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- h2mare/__init__.py +29 -0
- h2mare/cli/__init__.py +24 -0
- h2mare/cli/catalog.py +121 -0
- h2mare/cli/compile.py +99 -0
- h2mare/cli/main.py +154 -0
- h2mare/cli/nc2zarr.py +88 -0
- h2mare/config.py +205 -0
- h2mare/downloader/__init__.py +19 -0
- h2mare/downloader/aviso_downloader.py +453 -0
- h2mare/downloader/base.py +104 -0
- h2mare/downloader/cds_downloader.py +224 -0
- h2mare/downloader/cmems_downloader.py +534 -0
- h2mare/downloader/cmems_utils.py +159 -0
- h2mare/downloader/commons.py +70 -0
- h2mare/format_converters/__init__.py +9 -0
- h2mare/format_converters/netcdf2zarr.py +441 -0
- h2mare/format_converters/parquet2csv.py +155 -0
- h2mare/format_converters/zarr2parquet.py +105 -0
- h2mare/models.py +56 -0
- h2mare/pipeline_manager.py +104 -0
- h2mare/processing/__init__.py +27 -0
- h2mare/processing/compiler.py +367 -0
- h2mare/processing/core/__init__.py +0 -0
- h2mare/processing/core/aviso.py +519 -0
- h2mare/processing/core/cds.py +613 -0
- h2mare/processing/core/cmems.py +76 -0
- h2mare/processing/core/fronts.py +285 -0
- h2mare/processing/extractor.py +989 -0
- h2mare/processing/registry.py +28 -0
- h2mare/storage/__init__.py +36 -0
- h2mare/storage/coverage.py +69 -0
- h2mare/storage/parquet_helpers.py +201 -0
- h2mare/storage/parquet_indexer.py +712 -0
- h2mare/storage/parquet_plotter.py +191 -0
- h2mare/storage/storage.py +170 -0
- h2mare/storage/xarray_helpers.py +156 -0
- h2mare/storage/zarr_catalog.py +1177 -0
- h2mare/types.py +342 -0
- h2mare/utils/__init__.py +26 -0
- h2mare/utils/datetime_utils.py +54 -0
- h2mare/utils/files_io.py +162 -0
- h2mare/utils/labels.py +90 -0
- h2mare/utils/logging_utils.py +28 -0
- h2mare/utils/paths.py +78 -0
- h2mare/utils/plot.py +322 -0
- h2mare/utils/spatial.py +112 -0
- h2mare/validators.py +88 -0
- h2mare-0.1.0.dist-info/METADATA +174 -0
- h2mare-0.1.0.dist-info/RECORD +53 -0
- h2mare-0.1.0.dist-info/WHEEL +5 -0
- h2mare-0.1.0.dist-info/entry_points.txt +2 -0
- h2mare-0.1.0.dist-info/licenses/LICENSE.txt +21 -0
- h2mare-0.1.0.dist-info/top_level.txt +1 -0
h2mare/__init__.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare - Geospatial Processing for Climate and Ocean Data
|
|
3
|
+
|
|
4
|
+
Main components:
|
|
5
|
+
- config: Project paths and settings
|
|
6
|
+
- models: Data models for configuration
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
|
|
11
|
+
from .config import settings
|
|
12
|
+
from .models import AppConfig, KeyVarConfigEntry, VariablesConfig
|
|
13
|
+
from .types import BBox, DateLike, DateRange, DownloadTask, TimeResolution
|
|
14
|
+
from .validators import validate_time_resolution, validate_var_key, validate_var_keys
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"settings",
|
|
18
|
+
"AppConfig",
|
|
19
|
+
"VariablesConfig",
|
|
20
|
+
"KeyVarConfigEntry",
|
|
21
|
+
"DateLike",
|
|
22
|
+
"DateRange",
|
|
23
|
+
"BBox",
|
|
24
|
+
"TimeResolution",
|
|
25
|
+
"DownloadTask",
|
|
26
|
+
"validate_var_key",
|
|
27
|
+
"validate_var_keys",
|
|
28
|
+
"validate_time_resolution",
|
|
29
|
+
]
|
h2mare/cli/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""H2GIS command-line interface."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
|
|
5
|
+
from h2mare.cli.catalog import catalog
|
|
6
|
+
from h2mare.cli.compile import compile
|
|
7
|
+
from h2mare.cli.main import run
|
|
8
|
+
from h2mare.cli.nc2zarr import convert
|
|
9
|
+
|
|
10
|
+
app = typer.Typer(
|
|
11
|
+
name="h2mare",
|
|
12
|
+
help="Climate and ocean data pipeline — download, convert, and inspect.",
|
|
13
|
+
no_args_is_help=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
app.command("run", help="Download and convert data for one or more variable keys.")(run)
|
|
17
|
+
app.command(
|
|
18
|
+
"convert", help="Convert downloaded NetCDF/GRIB files to Zarr (no download)."
|
|
19
|
+
)(convert)
|
|
20
|
+
app.command("catalog", help="Inspect ZarrCatalog metadata for a variable.")(catalog)
|
|
21
|
+
app.command(
|
|
22
|
+
"compile",
|
|
23
|
+
help="Merge per-variable Zarr stores into the unified h2ds compiled dataset.",
|
|
24
|
+
)(compile)
|
h2mare/cli/catalog.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare catalog — inspect ZarrCatalog metadata for a variable.
|
|
3
|
+
|
|
4
|
+
Shows coverage, file count, variables, and per-dataset breakdown from the
|
|
5
|
+
local Parquet index without opening any Zarr files.
|
|
6
|
+
|
|
7
|
+
Examples
|
|
8
|
+
--------
|
|
9
|
+
# Summary for SST
|
|
10
|
+
uv run h2mare catalog sst
|
|
11
|
+
|
|
12
|
+
# Summary for all configured variables
|
|
13
|
+
uv run h2mare catalog --all
|
|
14
|
+
|
|
15
|
+
# Show individual catalog rows
|
|
16
|
+
uv run h2mare catalog sst --rows
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from typing import Optional
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import typer
|
|
23
|
+
from loguru import logger
|
|
24
|
+
|
|
25
|
+
from h2mare.config import settings
|
|
26
|
+
|
|
27
|
+
app = typer.Typer()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _print_catalog(var_key: str, show_rows: bool) -> None:
|
|
31
|
+
from h2mare.storage.zarr_catalog import ZarrCatalog
|
|
32
|
+
|
|
33
|
+
try:
|
|
34
|
+
cat = ZarrCatalog(var_key)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
typer.echo(f" [{var_key}] Could not load catalog: {e}", err=True)
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
df = cat.df
|
|
40
|
+
summary = cat.summary()
|
|
41
|
+
cov = summary.get("time_coverage")
|
|
42
|
+
|
|
43
|
+
typer.echo(f"\nZarrCatalog — {var_key.upper()}")
|
|
44
|
+
typer.echo(f" Files : {summary['num_files']}")
|
|
45
|
+
|
|
46
|
+
if cov and cov != "No data":
|
|
47
|
+
typer.echo(f" Coverage : {cov.start.date()} → {cov.end.date()}")
|
|
48
|
+
else:
|
|
49
|
+
typer.echo(f" Coverage : No data")
|
|
50
|
+
|
|
51
|
+
variables = summary.get("variables") or set()
|
|
52
|
+
typer.echo(f" Variables : {', '.join(sorted(variables)) if variables else '—'}")
|
|
53
|
+
typer.echo(f" Timesteps : {summary.get('total_timesteps', '—')}")
|
|
54
|
+
typer.echo(f" Store : {summary['store_root']}")
|
|
55
|
+
typer.echo(f" Catalog : {summary['catalog_path']}")
|
|
56
|
+
last = summary.get("last_scanned")
|
|
57
|
+
typer.echo(
|
|
58
|
+
f" Scanned : {last.strftime('%Y-%m-%d %H:%M:%S') if pd.notna(last) else '—'}"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if not df.empty and "dataset" in df.columns:
|
|
62
|
+
typer.echo("\n Dataset breakdown:")
|
|
63
|
+
for dataset, group in df.groupby("dataset", sort=True):
|
|
64
|
+
start = group["start_date"].min()
|
|
65
|
+
end = group["end_date"].max()
|
|
66
|
+
n_ts = (
|
|
67
|
+
group["num_timesteps"].sum()
|
|
68
|
+
if "num_timesteps" in group.columns
|
|
69
|
+
else "—"
|
|
70
|
+
)
|
|
71
|
+
typer.echo(f" {dataset}")
|
|
72
|
+
typer.echo(f" {start.date()} → {end.date()} ({n_ts} timesteps)")
|
|
73
|
+
|
|
74
|
+
if show_rows and not df.empty:
|
|
75
|
+
cols = [
|
|
76
|
+
c
|
|
77
|
+
for c in ["filename", "dataset", "start_date", "end_date", "num_timesteps"]
|
|
78
|
+
if c in df.columns
|
|
79
|
+
]
|
|
80
|
+
typer.echo(f"\n Rows:\n{df[cols].to_string(index=False)}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def catalog(
|
|
84
|
+
var_key: Optional[str] = typer.Argument(
|
|
85
|
+
None,
|
|
86
|
+
help="Variable key to inspect (e.g. sst, ssh). Omit with --all to show every variable.",
|
|
87
|
+
),
|
|
88
|
+
all_vars: bool = typer.Option(
|
|
89
|
+
False,
|
|
90
|
+
"--all",
|
|
91
|
+
"-a",
|
|
92
|
+
is_flag=True,
|
|
93
|
+
help="Show catalog summary for all variables configured in config.yaml.",
|
|
94
|
+
),
|
|
95
|
+
show_rows: bool = typer.Option(
|
|
96
|
+
False,
|
|
97
|
+
"--rows",
|
|
98
|
+
"-r",
|
|
99
|
+
is_flag=True,
|
|
100
|
+
help="Print individual catalog rows (filename, dataset, dates, timesteps).",
|
|
101
|
+
),
|
|
102
|
+
) -> None:
|
|
103
|
+
"""Inspect ZarrCatalog metadata: coverage, file count, and per-dataset breakdown."""
|
|
104
|
+
|
|
105
|
+
if not var_key and not all_vars:
|
|
106
|
+
typer.echo("Provide a variable key or use --all.", err=True)
|
|
107
|
+
raise typer.Exit(code=1)
|
|
108
|
+
|
|
109
|
+
keys = list(settings.app_config.variables.keys()) if all_vars else [var_key]
|
|
110
|
+
|
|
111
|
+
for key in keys:
|
|
112
|
+
if key not in settings.app_config.variables:
|
|
113
|
+
typer.echo(
|
|
114
|
+
f"Unknown variable key '{key}'. Available: {', '.join(settings.app_config.variables)}.",
|
|
115
|
+
err=True,
|
|
116
|
+
)
|
|
117
|
+
continue
|
|
118
|
+
_print_catalog(key, show_rows)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
app.command()(catalog)
|
h2mare/cli/compile.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare compile — merge per-variable Zarr stores into a unified h2ds dataset.
|
|
3
|
+
|
|
4
|
+
Reads the individual per-variable Zarr stores and interpolates them to a
|
|
5
|
+
common 0.25° daily grid, writing the result as the h2ds compiled dataset.
|
|
6
|
+
When no dates are given the step infers what is missing from the local store.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
# Compile all available variables (dates inferred from store)
|
|
11
|
+
uv run h2mare compile
|
|
12
|
+
|
|
13
|
+
# Compile specific variables over a date range
|
|
14
|
+
uv run h2mare compile -v sst -v ssh -v mld --start-date 2024-01-01 --end-date 2024-12-31
|
|
15
|
+
|
|
16
|
+
# Compile with a custom store path
|
|
17
|
+
uv run h2mare compile --store-path D:/GlobalData
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import List, Optional
|
|
22
|
+
|
|
23
|
+
import pandas as pd
|
|
24
|
+
import typer
|
|
25
|
+
from loguru import logger
|
|
26
|
+
|
|
27
|
+
from h2mare.config import settings
|
|
28
|
+
|
|
29
|
+
app = typer.Typer()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def compile(
|
|
33
|
+
vars: Optional[List[str]] = typer.Option(
|
|
34
|
+
None,
|
|
35
|
+
"--vars",
|
|
36
|
+
"-v",
|
|
37
|
+
help=(
|
|
38
|
+
"Variable key(s) to compile (repeat for multiple: -v sst -v ssh). "
|
|
39
|
+
"Defaults to all available keys."
|
|
40
|
+
),
|
|
41
|
+
),
|
|
42
|
+
start_date: Optional[str] = typer.Option(
|
|
43
|
+
None,
|
|
44
|
+
"--start-date",
|
|
45
|
+
help="Start date (YYYY-MM-DD). Must be paired with --end-date.",
|
|
46
|
+
),
|
|
47
|
+
end_date: Optional[str] = typer.Option(
|
|
48
|
+
None,
|
|
49
|
+
"--end-date",
|
|
50
|
+
help="End date (YYYY-MM-DD). Must be paired with --start-date.",
|
|
51
|
+
),
|
|
52
|
+
store_path: Optional[Path] = typer.Option(
|
|
53
|
+
None,
|
|
54
|
+
"--store-path",
|
|
55
|
+
help="Override the Zarr store root (defaults to STORE_DIR from .env).",
|
|
56
|
+
),
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Merge per-variable Zarr stores into the unified h2ds compiled dataset."""
|
|
59
|
+
|
|
60
|
+
log_path = settings.LOGS_DIR / "h2mare.log"
|
|
61
|
+
logger.add(log_path, level="INFO")
|
|
62
|
+
|
|
63
|
+
if bool(start_date) ^ bool(end_date):
|
|
64
|
+
typer.echo(
|
|
65
|
+
"Error: --start-date and --end-date must be provided together.", err=True
|
|
66
|
+
)
|
|
67
|
+
raise typer.Exit(code=1)
|
|
68
|
+
|
|
69
|
+
if start_date and end_date:
|
|
70
|
+
start_ts = pd.Timestamp(start_date)
|
|
71
|
+
end_ts = pd.Timestamp(end_date)
|
|
72
|
+
if start_ts >= end_ts:
|
|
73
|
+
typer.echo(
|
|
74
|
+
f"Error: --start-date ({start_date}) must be before --end-date ({end_date}).",
|
|
75
|
+
err=True,
|
|
76
|
+
)
|
|
77
|
+
raise typer.Exit(code=1)
|
|
78
|
+
|
|
79
|
+
if vars:
|
|
80
|
+
available = set(settings.app_config.variables.keys())
|
|
81
|
+
unknown = set(vars) - available
|
|
82
|
+
if unknown:
|
|
83
|
+
typer.echo(
|
|
84
|
+
f"Error: unknown variable key(s): {', '.join(sorted(unknown))}. "
|
|
85
|
+
f"Available: {', '.join(sorted(available))}.",
|
|
86
|
+
err=True,
|
|
87
|
+
)
|
|
88
|
+
raise typer.Exit(code=1)
|
|
89
|
+
|
|
90
|
+
from h2mare.processing.compiler import Compiler
|
|
91
|
+
|
|
92
|
+
Compiler(remote_store_root=store_path or settings.STORE_DIR).run(
|
|
93
|
+
start_date=start_date,
|
|
94
|
+
end_date=end_date,
|
|
95
|
+
var_keys=list(vars) if vars else None,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
app.command()(compile)
|
h2mare/cli/main.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
h2mare run — download and convert pipeline.
|
|
3
|
+
|
|
4
|
+
Downloads raw data from configured providers and converts it to Zarr.
|
|
5
|
+
When no dates are given the pipeline infers what is missing from the
|
|
6
|
+
local store and downloads only the gap.
|
|
7
|
+
|
|
8
|
+
Examples
|
|
9
|
+
--------
|
|
10
|
+
# First-time download — dates must be explicit
|
|
11
|
+
uv run h2mare run -v sst --start-date 2021-01-01 --end-date 2021-12-31
|
|
12
|
+
|
|
13
|
+
# Update existing store (dates inferred automatically)
|
|
14
|
+
uv run h2mare run -v sst
|
|
15
|
+
|
|
16
|
+
# Multiple variables at once
|
|
17
|
+
uv run h2mare run -v seapodym -v mld -v o2 -v chl
|
|
18
|
+
|
|
19
|
+
# Download only, skip Zarr conversion
|
|
20
|
+
uv run h2mare run -v sst --no-convert
|
|
21
|
+
|
|
22
|
+
# Skip the compile step after conversion
|
|
23
|
+
uv run h2mare run -v sst --no-compile
|
|
24
|
+
|
|
25
|
+
# Validate configuration without downloading
|
|
26
|
+
uv run h2mare run -v sst --dry-run
|
|
27
|
+
|
|
28
|
+
# Process all variables in config.yaml
|
|
29
|
+
uv run h2mare run
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import logging
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import List, Optional
|
|
35
|
+
|
|
36
|
+
import pandas as pd
|
|
37
|
+
import typer
|
|
38
|
+
from loguru import logger
|
|
39
|
+
|
|
40
|
+
from h2mare.config import settings
|
|
41
|
+
from h2mare.downloader.aviso_downloader import AVISODownloader
|
|
42
|
+
from h2mare.downloader.cds_downloader import CDSDownloader
|
|
43
|
+
from h2mare.downloader.cmems_downloader import CMEMSDownloader
|
|
44
|
+
from h2mare.pipeline_manager import PipelineManager
|
|
45
|
+
|
|
46
|
+
DOWNLOADER_REGISTRY = {
|
|
47
|
+
"cmems": CMEMSDownloader,
|
|
48
|
+
"aviso": AVISODownloader,
|
|
49
|
+
"cds": CDSDownloader,
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
app = typer.Typer()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def run(
|
|
56
|
+
vars: Optional[List[str]] = typer.Option(
|
|
57
|
+
None,
|
|
58
|
+
"--vars",
|
|
59
|
+
"-v",
|
|
60
|
+
help=(
|
|
61
|
+
"Variable key(s) to process (repeat for multiple: -v sst -v ssh). "
|
|
62
|
+
"Defaults to all keys in config.yaml."
|
|
63
|
+
),
|
|
64
|
+
),
|
|
65
|
+
start_date: Optional[str] = typer.Option(
|
|
66
|
+
None,
|
|
67
|
+
"--start-date",
|
|
68
|
+
help="Start date (YYYY-MM-DD). Must be paired with --end-date.",
|
|
69
|
+
),
|
|
70
|
+
end_date: Optional[str] = typer.Option(
|
|
71
|
+
None,
|
|
72
|
+
"--end-date",
|
|
73
|
+
help="End date (YYYY-MM-DD). Must be paired with --start-date.",
|
|
74
|
+
),
|
|
75
|
+
store_path: Optional[Path] = typer.Option(
|
|
76
|
+
None,
|
|
77
|
+
"--store-path",
|
|
78
|
+
help="Override the Zarr store root (defaults to STORE_DIR from .env).",
|
|
79
|
+
),
|
|
80
|
+
dry_run: bool = typer.Option(
|
|
81
|
+
False,
|
|
82
|
+
"--dry-run",
|
|
83
|
+
is_flag=True,
|
|
84
|
+
help="Plan the download and log tasks without fetching any data.",
|
|
85
|
+
),
|
|
86
|
+
no_convert: bool = typer.Option(
|
|
87
|
+
False,
|
|
88
|
+
"--no-convert",
|
|
89
|
+
is_flag=True,
|
|
90
|
+
help="Download raw files but skip Zarr conversion and compile.",
|
|
91
|
+
),
|
|
92
|
+
no_compile: bool = typer.Option(
|
|
93
|
+
False,
|
|
94
|
+
"--no-compile",
|
|
95
|
+
is_flag=True,
|
|
96
|
+
help="Skip the compile step (h2ds dataset merge) after Zarr conversion.",
|
|
97
|
+
),
|
|
98
|
+
) -> None:
|
|
99
|
+
"""Download and convert climate/ocean data for one or more variable keys."""
|
|
100
|
+
|
|
101
|
+
log_path = settings.LOGS_DIR / f"h2mare.log"
|
|
102
|
+
logger.add(log_path, level="INFO")
|
|
103
|
+
logging.getLogger("urllib3.connectionpool").setLevel(logging.ERROR)
|
|
104
|
+
|
|
105
|
+
# Validate date arguments
|
|
106
|
+
if bool(start_date) ^ bool(end_date):
|
|
107
|
+
typer.echo(
|
|
108
|
+
"Error: --start-date and --end-date must be provided together.", err=True
|
|
109
|
+
)
|
|
110
|
+
raise typer.Exit(code=1)
|
|
111
|
+
|
|
112
|
+
if start_date and end_date:
|
|
113
|
+
start_ts = pd.Timestamp(start_date)
|
|
114
|
+
end_ts = pd.Timestamp(end_date)
|
|
115
|
+
if start_ts >= end_ts:
|
|
116
|
+
typer.echo(
|
|
117
|
+
f"Error: --start-date ({start_date}) must be before --end-date ({end_date}).",
|
|
118
|
+
err=True,
|
|
119
|
+
)
|
|
120
|
+
raise typer.Exit(code=1)
|
|
121
|
+
|
|
122
|
+
# Validate variable keys
|
|
123
|
+
available = set(settings.app_config.variables.keys())
|
|
124
|
+
selected = list(vars) if vars else list(available)
|
|
125
|
+
unknown = set(selected) - available
|
|
126
|
+
if unknown:
|
|
127
|
+
typer.echo(
|
|
128
|
+
f"Error: unknown variable key(s): {', '.join(sorted(unknown))}. "
|
|
129
|
+
f"Available: {', '.join(sorted(available))}.",
|
|
130
|
+
err=True,
|
|
131
|
+
)
|
|
132
|
+
raise typer.Exit(code=1)
|
|
133
|
+
|
|
134
|
+
store_root = store_path or settings.STORE_DIR
|
|
135
|
+
if store_root is None:
|
|
136
|
+
typer.echo(
|
|
137
|
+
"Error: STORE_DIR is not set. Define it in .env or pass --store-path.",
|
|
138
|
+
err=True,
|
|
139
|
+
)
|
|
140
|
+
raise typer.Exit(code=1)
|
|
141
|
+
|
|
142
|
+
PipelineManager(
|
|
143
|
+
app_config=settings.app_config,
|
|
144
|
+
registry=DOWNLOADER_REGISTRY,
|
|
145
|
+
store_root=store_root,
|
|
146
|
+
dry_run=dry_run,
|
|
147
|
+
start_date=pd.Timestamp(start_date) if start_date else None,
|
|
148
|
+
end_date=pd.Timestamp(end_date) if end_date else None,
|
|
149
|
+
no_convert=no_convert,
|
|
150
|
+
no_compile=no_compile,
|
|
151
|
+
).run(variables=selected)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
app.command()(run)
|
h2mare/cli/nc2zarr.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nc2zarr — standalone NetCDF/GRIB → Zarr converter.
|
|
3
|
+
|
|
4
|
+
Converts downloaded raw files for one or more variable keys into processed
|
|
5
|
+
Zarr stores without running the full download pipeline. Use this when you
|
|
6
|
+
want to re-process existing downloads, convert files placed manually in the
|
|
7
|
+
downloads directory, or recover from a failed conversion step.
|
|
8
|
+
|
|
9
|
+
Examples
|
|
10
|
+
--------
|
|
11
|
+
Convert SST downloads from the default downloads directory (DOWNLOADS_DIR/.env):
|
|
12
|
+
|
|
13
|
+
uv run h2mare convert -v sst
|
|
14
|
+
|
|
15
|
+
Convert multiple variables in one call:
|
|
16
|
+
|
|
17
|
+
uv run h2mare convert -v sst -v ssh -v mld
|
|
18
|
+
|
|
19
|
+
Convert files from a custom input directory:
|
|
20
|
+
|
|
21
|
+
uv run h2mare convert -v sst --in-dir /data/raw/CMEMS_SST
|
|
22
|
+
|
|
23
|
+
Notes
|
|
24
|
+
-----
|
|
25
|
+
- Raw files must match the regex ``pattern`` defined for the variable in
|
|
26
|
+
``config.yaml``.
|
|
27
|
+
- Zarr stores are written to ``STORE_DIR/<local_folder>/`` (from ``.env``).
|
|
28
|
+
- Provenance sidecars (``*_prov.json``) are written alongside each Zarr
|
|
29
|
+
only when a ``h2mare_manifest.json`` exists in the input directory (created
|
|
30
|
+
automatically by CMEMSDownloader after a download run).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import List, Optional
|
|
35
|
+
|
|
36
|
+
import typer
|
|
37
|
+
from loguru import logger
|
|
38
|
+
|
|
39
|
+
from h2mare.config import settings
|
|
40
|
+
from h2mare.format_converters.netcdf2zarr import Netcdf2Zarr
|
|
41
|
+
|
|
42
|
+
app = typer.Typer(
|
|
43
|
+
help="Convert downloaded NetCDF/GRIB files to Zarr without re-downloading."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@app.command()
|
|
48
|
+
def convert(
|
|
49
|
+
var_keys: List[str] = typer.Option(
|
|
50
|
+
...,
|
|
51
|
+
"--vars",
|
|
52
|
+
"-v",
|
|
53
|
+
help=(
|
|
54
|
+
"Variable key to convert (repeat for multiple: -v sst -v ssh). "
|
|
55
|
+
"Must match a key defined in config.yaml — "
|
|
56
|
+
"e.g. sst, ssh, mld, chl, fsle."
|
|
57
|
+
),
|
|
58
|
+
),
|
|
59
|
+
input_root: Optional[Path] = typer.Option(
|
|
60
|
+
None,
|
|
61
|
+
"--in-dir",
|
|
62
|
+
help=(
|
|
63
|
+
"Root directory that contains the downloaded raw files. "
|
|
64
|
+
"The variable's local_folder is appended automatically. "
|
|
65
|
+
"Defaults to DOWNLOADS_DIR from .env."
|
|
66
|
+
),
|
|
67
|
+
),
|
|
68
|
+
) -> None:
|
|
69
|
+
"""Convert downloaded raw NetCDF/GRIB files to Zarr for one or more variables."""
|
|
70
|
+
|
|
71
|
+
base_dir = input_root if input_root is not None else settings.DOWNLOADS_DIR
|
|
72
|
+
|
|
73
|
+
log_path = settings.LOGS_DIR / f"{Path(__file__).stem}.log"
|
|
74
|
+
logger.add(log_path, level="INFO")
|
|
75
|
+
|
|
76
|
+
for var in var_keys:
|
|
77
|
+
var_config = settings.app_config.variables.get(var)
|
|
78
|
+
if var_config is None:
|
|
79
|
+
logger.error(f"Unknown variable key '{var}' — skipping. Check config.yaml.")
|
|
80
|
+
continue
|
|
81
|
+
|
|
82
|
+
in_dir = base_dir / var_config.local_folder
|
|
83
|
+
logger.info(f"Converting '{var}' from {in_dir}")
|
|
84
|
+
Netcdf2Zarr(var, download_root=in_dir).run()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
app()
|