climate-ref 0.8.0__py3-none-any.whl → 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +5 -2
- climate_ref/cli/_git_utils.py +112 -0
- climate_ref/cli/_utils.py +24 -0
- climate_ref/cli/datasets.py +1 -0
- climate_ref/cli/providers.py +103 -4
- climate_ref/cli/test_cases.py +729 -0
- climate_ref/config.py +1 -1
- climate_ref/database.py +23 -0
- climate_ref/datasets/__init__.py +15 -11
- climate_ref/datasets/base.py +11 -17
- climate_ref/datasets/cmip6.py +1 -1
- climate_ref/solver.py +1 -1
- climate_ref/testing.py +115 -13
- {climate_ref-0.8.0.dist-info → climate_ref-0.9.0.dist-info}/METADATA +2 -1
- {climate_ref-0.8.0.dist-info → climate_ref-0.9.0.dist-info}/RECORD +19 -17
- {climate_ref-0.8.0.dist-info → climate_ref-0.9.0.dist-info}/WHEEL +0 -0
- {climate_ref-0.8.0.dist-info → climate_ref-0.9.0.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.8.0.dist-info → climate_ref-0.9.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.8.0.dist-info → climate_ref-0.9.0.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,729 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test data management commands for diagnostic development.
|
|
3
|
+
|
|
4
|
+
These commands are intended for developers working on diagnostics and require
|
|
5
|
+
a source checkout of the project with test data directories available.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import shutil
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Annotated
|
|
11
|
+
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import typer
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.table import Table
|
|
17
|
+
|
|
18
|
+
from climate_ref.cli._git_utils import collect_regression_file_info, get_repo_for_path
|
|
19
|
+
from climate_ref.cli._utils import format_size
|
|
20
|
+
from climate_ref.config import Config
|
|
21
|
+
from climate_ref.datasets import (
|
|
22
|
+
CMIP6DatasetAdapter,
|
|
23
|
+
DatasetAdapter,
|
|
24
|
+
Obs4MIPsDatasetAdapter,
|
|
25
|
+
PMPClimatologyDatasetAdapter,
|
|
26
|
+
)
|
|
27
|
+
from climate_ref.provider_registry import ProviderRegistry
|
|
28
|
+
from climate_ref.solver import solve_executions
|
|
29
|
+
from climate_ref.testing import TestCaseRunner
|
|
30
|
+
from climate_ref_core.datasets import ExecutionDatasetCollection, SourceDatasetType
|
|
31
|
+
from climate_ref_core.diagnostics import Diagnostic
|
|
32
|
+
from climate_ref_core.esgf import ESGFFetcher
|
|
33
|
+
from climate_ref_core.exceptions import (
|
|
34
|
+
DatasetResolutionError,
|
|
35
|
+
NoTestDataSpecError,
|
|
36
|
+
TestCaseNotFoundError,
|
|
37
|
+
)
|
|
38
|
+
from climate_ref_core.testing import (
|
|
39
|
+
TestCase,
|
|
40
|
+
TestCasePaths,
|
|
41
|
+
catalog_changed_since_regression,
|
|
42
|
+
get_catalog_hash,
|
|
43
|
+
load_datasets_from_yaml,
|
|
44
|
+
save_datasets_to_yaml,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
app = typer.Typer(help=__doc__)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _build_catalog(dataset_adapter: DatasetAdapter, file_paths: list[Path]) -> pd.DataFrame:
|
|
51
|
+
"""
|
|
52
|
+
Parses a list of datasets using a dataset adapter
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
file_paths
|
|
57
|
+
List of files to build a catalog from
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
pd.DataFrame
|
|
62
|
+
DataFrame catalog of datasets with metadata and paths
|
|
63
|
+
"""
|
|
64
|
+
# Collect unique parent directories since the adapter scans directories
|
|
65
|
+
parent_dirs = list({fp.parent for fp in file_paths})
|
|
66
|
+
|
|
67
|
+
catalog_dfs = []
|
|
68
|
+
for parent_dir in parent_dirs:
|
|
69
|
+
try:
|
|
70
|
+
df = dataset_adapter.find_local_datasets(parent_dir)
|
|
71
|
+
|
|
72
|
+
# Filter to only include the files we fetched
|
|
73
|
+
fetched_files = {str(fp) for fp in file_paths}
|
|
74
|
+
df = df[df["path"].isin(fetched_files)]
|
|
75
|
+
if df.empty:
|
|
76
|
+
logger.warning(f"No matching files found in catalog for {parent_dir}")
|
|
77
|
+
catalog_dfs.append(df)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.warning(f"Failed to parse {parent_dir}: {e}")
|
|
80
|
+
|
|
81
|
+
return pd.concat(catalog_dfs, ignore_index=True)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _solve_test_case(
|
|
85
|
+
diagnostic: Diagnostic,
|
|
86
|
+
data_catalog: dict[SourceDatasetType, pd.DataFrame],
|
|
87
|
+
) -> ExecutionDatasetCollection:
|
|
88
|
+
"""
|
|
89
|
+
Solve for test case datasets by applying the diagnostic's data requirements.
|
|
90
|
+
|
|
91
|
+
Runs the solver to determine which datasets from the catalog
|
|
92
|
+
satisfy the diagnostic's requirements.
|
|
93
|
+
"""
|
|
94
|
+
executions = list(solve_executions(data_catalog, diagnostic, diagnostic.provider))
|
|
95
|
+
|
|
96
|
+
if not executions:
|
|
97
|
+
raise ValueError(f"No valid executions found for diagnostic {diagnostic.slug}")
|
|
98
|
+
|
|
99
|
+
return executions[0].datasets
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _fetch_and_build_catalog(
|
|
103
|
+
diag: Diagnostic,
|
|
104
|
+
tc: TestCase,
|
|
105
|
+
*,
|
|
106
|
+
force: bool = False,
|
|
107
|
+
) -> tuple[ExecutionDatasetCollection, bool]:
|
|
108
|
+
"""
|
|
109
|
+
Fetch test data and build catalog.
|
|
110
|
+
|
|
111
|
+
This function:
|
|
112
|
+
1. Fetches ESGF data using ESGFFetcher (files stored in intake-esgf cache)
|
|
113
|
+
2. Uses CMIP6DatasetAdapter to create a data catalog
|
|
114
|
+
3. Solves for datasets using the diagnostic's data requirements
|
|
115
|
+
4. Writes catalog YAML to .catalogs/{provider}/{diagnostic}/{test_case}.yaml
|
|
116
|
+
5. Returns the solved datasets and whether the catalog was written
|
|
117
|
+
|
|
118
|
+
By default, the catalog is only written if the content has changed.
|
|
119
|
+
Use `force=True` to always write.
|
|
120
|
+
|
|
121
|
+
Parameters
|
|
122
|
+
----------
|
|
123
|
+
diag
|
|
124
|
+
The diagnostic to fetch data for
|
|
125
|
+
tc
|
|
126
|
+
The test case to fetch data for
|
|
127
|
+
force
|
|
128
|
+
If True, always write the catalog even if unchanged
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
:
|
|
133
|
+
Tuple of (datasets, catalog_was_written)
|
|
134
|
+
"""
|
|
135
|
+
fetcher = ESGFFetcher()
|
|
136
|
+
|
|
137
|
+
# Fetch all requests - returns DataFrame with metadata + paths
|
|
138
|
+
combined = fetcher.fetch_for_test_case(tc.requests)
|
|
139
|
+
|
|
140
|
+
if combined.empty:
|
|
141
|
+
raise DatasetResolutionError(
|
|
142
|
+
f"No datasets found for {diag.provider.slug}/{diag.slug} test case '{tc.name}'"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Group paths by source type and use adapters to build proper catalog
|
|
146
|
+
data_catalog: dict[SourceDatasetType, pd.DataFrame] = {}
|
|
147
|
+
|
|
148
|
+
for source_type, group_df in combined.groupby("source_type"):
|
|
149
|
+
file_paths = [Path(p) for p in group_df["path"].unique().tolist()]
|
|
150
|
+
|
|
151
|
+
if source_type == "CMIP6":
|
|
152
|
+
data_catalog[SourceDatasetType.CMIP6] = _build_catalog(CMIP6DatasetAdapter(), file_paths)
|
|
153
|
+
|
|
154
|
+
elif source_type == "obs4MIPs":
|
|
155
|
+
data_catalog[SourceDatasetType.obs4MIPs] = _build_catalog(Obs4MIPsDatasetAdapter(), file_paths)
|
|
156
|
+
|
|
157
|
+
elif source_type == "PMPClimatology":
|
|
158
|
+
data_catalog[SourceDatasetType.PMPClimatology] = _build_catalog(
|
|
159
|
+
PMPClimatologyDatasetAdapter(), file_paths
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
if not data_catalog:
|
|
163
|
+
raise DatasetResolutionError(
|
|
164
|
+
f"No datasets found for {diag.provider.slug}/{diag.slug} test case '{tc.name}'"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Solve for datasets
|
|
168
|
+
datasets = _solve_test_case(diag, data_catalog)
|
|
169
|
+
|
|
170
|
+
# Write catalog YAML to package-local test case directory
|
|
171
|
+
catalog_written = False
|
|
172
|
+
paths = TestCasePaths.from_diagnostic(diag, tc.name)
|
|
173
|
+
if paths:
|
|
174
|
+
paths.create()
|
|
175
|
+
catalog_written = save_datasets_to_yaml(datasets, paths.catalog, force=force)
|
|
176
|
+
|
|
177
|
+
return datasets, catalog_written
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
@app.command(name="fetch")
|
|
181
|
+
def fetch_test_data( # noqa: PLR0912
|
|
182
|
+
ctx: typer.Context,
|
|
183
|
+
provider: Annotated[
|
|
184
|
+
str | None,
|
|
185
|
+
typer.Option(help="Specific provider to fetch data for (e.g., 'esmvaltool', 'ilamb')"),
|
|
186
|
+
] = None,
|
|
187
|
+
diagnostic: Annotated[
|
|
188
|
+
str | None,
|
|
189
|
+
typer.Option(help="Specific diagnostic slug to fetch data for"),
|
|
190
|
+
] = None,
|
|
191
|
+
test_case: Annotated[
|
|
192
|
+
str | None,
|
|
193
|
+
typer.Option(help="Specific test case name to fetch data for"),
|
|
194
|
+
] = None,
|
|
195
|
+
dry_run: Annotated[
|
|
196
|
+
bool,
|
|
197
|
+
typer.Option(help="Show what would be fetched without downloading"),
|
|
198
|
+
] = False,
|
|
199
|
+
only_missing: Annotated[
|
|
200
|
+
bool,
|
|
201
|
+
typer.Option(help="Only fetch data for test cases without existing catalogs"),
|
|
202
|
+
] = False,
|
|
203
|
+
force: Annotated[
|
|
204
|
+
bool,
|
|
205
|
+
typer.Option(help="Force overwrite catalog even if unchanged"),
|
|
206
|
+
] = False,
|
|
207
|
+
) -> None:
|
|
208
|
+
"""
|
|
209
|
+
Fetch test data from ESGF for running diagnostic tests.
|
|
210
|
+
|
|
211
|
+
Downloads full-resolution ESGF data based on diagnostic test_data_spec.
|
|
212
|
+
Use --provider or --diagnostic to limit scope.
|
|
213
|
+
|
|
214
|
+
Examples
|
|
215
|
+
--------
|
|
216
|
+
ref test-cases fetch # Fetch all test data
|
|
217
|
+
ref test-cases fetch --provider ilamb # Fetch ILAMB test data only
|
|
218
|
+
ref test-cases fetch --diagnostic ecs # Fetch ECS diagnostic data
|
|
219
|
+
ref test-cases fetch --only-missing # Skip test cases with existing catalogs
|
|
220
|
+
"""
|
|
221
|
+
config = ctx.obj.config
|
|
222
|
+
db = ctx.obj.database
|
|
223
|
+
|
|
224
|
+
# Build provider registry to access diagnostics
|
|
225
|
+
registry = ProviderRegistry.build_from_config(config, db)
|
|
226
|
+
|
|
227
|
+
# Collect diagnostics to process
|
|
228
|
+
diagnostics_to_process: list[Diagnostic] = []
|
|
229
|
+
|
|
230
|
+
for provider_instance in registry.providers:
|
|
231
|
+
if provider and provider_instance.slug != provider:
|
|
232
|
+
continue
|
|
233
|
+
|
|
234
|
+
for diag in provider_instance.diagnostics():
|
|
235
|
+
if diagnostic and diag.slug != diagnostic:
|
|
236
|
+
continue
|
|
237
|
+
if diag.test_data_spec is None:
|
|
238
|
+
continue
|
|
239
|
+
diagnostics_to_process.append(diag)
|
|
240
|
+
|
|
241
|
+
if not diagnostics_to_process:
|
|
242
|
+
logger.warning("No diagnostics with test_data_spec found")
|
|
243
|
+
raise typer.Exit(code=0)
|
|
244
|
+
|
|
245
|
+
logger.info(f"Found {len(diagnostics_to_process)} diagnostics with test data specifications")
|
|
246
|
+
|
|
247
|
+
if dry_run: # pragma: no cover
|
|
248
|
+
for diag in diagnostics_to_process:
|
|
249
|
+
logger.info(f"Would fetch data for: {diag.provider.slug}/{diag.slug}")
|
|
250
|
+
if diag.test_data_spec:
|
|
251
|
+
for tc in diag.test_data_spec.test_cases:
|
|
252
|
+
if test_case and tc.name != test_case:
|
|
253
|
+
continue
|
|
254
|
+
# Check if catalog exists when using --only-missing
|
|
255
|
+
if only_missing:
|
|
256
|
+
paths = TestCasePaths.from_diagnostic(diag, tc.name)
|
|
257
|
+
if paths and paths.catalog.exists():
|
|
258
|
+
logger.info(f" Test case: {tc.name} - [SKIP: catalog exists]")
|
|
259
|
+
continue
|
|
260
|
+
logger.info(f" Test case: {tc.name} - {tc.description}")
|
|
261
|
+
if tc.requests:
|
|
262
|
+
for req in tc.requests:
|
|
263
|
+
logger.info(f" Request: {req.slug} ({req.source_type})")
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
# Process each diagnostic test case
|
|
267
|
+
for diag in diagnostics_to_process: # pragma: no cover
|
|
268
|
+
logger.info(f"Fetching data for: {diag.provider.slug}/{diag.slug}")
|
|
269
|
+
if diag.test_data_spec:
|
|
270
|
+
for tc in diag.test_data_spec.test_cases:
|
|
271
|
+
if test_case and tc.name != test_case:
|
|
272
|
+
continue
|
|
273
|
+
# Skip if catalog exists when using --only-missing
|
|
274
|
+
if only_missing:
|
|
275
|
+
paths = TestCasePaths.from_diagnostic(diag, tc.name)
|
|
276
|
+
if paths and paths.catalog.exists():
|
|
277
|
+
logger.info(f" Skipping test case: {tc.name} (catalog exists)")
|
|
278
|
+
continue
|
|
279
|
+
if tc.requests:
|
|
280
|
+
logger.info(f" Processing test case: {tc.name}")
|
|
281
|
+
try:
|
|
282
|
+
_, catalog_written = _fetch_and_build_catalog(diag, tc, force=force)
|
|
283
|
+
if not catalog_written:
|
|
284
|
+
logger.info(f" Catalog unchanged for {tc.name}")
|
|
285
|
+
except DatasetResolutionError as e:
|
|
286
|
+
logger.warning(f" Could not build catalog for {tc.name}: {e}")
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@app.command(name="list")
|
|
290
|
+
def list_cases(
|
|
291
|
+
ctx: typer.Context,
|
|
292
|
+
provider: Annotated[
|
|
293
|
+
str | None,
|
|
294
|
+
typer.Option(help="Filter by provider"),
|
|
295
|
+
] = None,
|
|
296
|
+
) -> None:
|
|
297
|
+
"""
|
|
298
|
+
List test cases for all diagnostics.
|
|
299
|
+
|
|
300
|
+
Shows which test cases are defined for each diagnostic and their descriptions.
|
|
301
|
+
Also shows whether catalog and regression data exist for each test case.
|
|
302
|
+
"""
|
|
303
|
+
config = ctx.obj.config
|
|
304
|
+
db = ctx.obj.database
|
|
305
|
+
console = ctx.obj.console
|
|
306
|
+
|
|
307
|
+
# Build provider registry to access diagnostics
|
|
308
|
+
registry = ProviderRegistry.build_from_config(config, db)
|
|
309
|
+
|
|
310
|
+
table = Table(title="Test Data Specifications")
|
|
311
|
+
table.add_column("Provider", style="cyan")
|
|
312
|
+
table.add_column("Diagnostic", style="green")
|
|
313
|
+
table.add_column("Test Case", style="yellow")
|
|
314
|
+
table.add_column("Description")
|
|
315
|
+
table.add_column("Requests", justify="right")
|
|
316
|
+
table.add_column("Catalog", justify="center")
|
|
317
|
+
table.add_column("Regression", justify="center")
|
|
318
|
+
|
|
319
|
+
for provider_instance in registry.providers:
|
|
320
|
+
if provider and provider_instance.slug != provider:
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
for diag in provider_instance.diagnostics():
|
|
324
|
+
if diag.test_data_spec is None:
|
|
325
|
+
table.add_row(
|
|
326
|
+
provider_instance.slug,
|
|
327
|
+
diag.slug,
|
|
328
|
+
"-",
|
|
329
|
+
"(no test_data_spec)",
|
|
330
|
+
"0",
|
|
331
|
+
"-",
|
|
332
|
+
"-",
|
|
333
|
+
)
|
|
334
|
+
continue
|
|
335
|
+
|
|
336
|
+
for tc in diag.test_data_spec.test_cases:
|
|
337
|
+
num_requests = len(tc.requests) if tc.requests else 0
|
|
338
|
+
|
|
339
|
+
# Check if catalog and regression data exist
|
|
340
|
+
paths = TestCasePaths.from_diagnostic(diag, tc.name)
|
|
341
|
+
if paths:
|
|
342
|
+
catalog_status = "[green]yes[/green]" if paths.catalog.exists() else "[red]no[/red]"
|
|
343
|
+
regression_status = "[green]yes[/green]" if paths.regression.exists() else "[red]no[/red]"
|
|
344
|
+
else:
|
|
345
|
+
catalog_status = "[dim]-[/dim]"
|
|
346
|
+
regression_status = "[dim]-[/dim]"
|
|
347
|
+
|
|
348
|
+
table.add_row(
|
|
349
|
+
provider_instance.slug,
|
|
350
|
+
diag.slug,
|
|
351
|
+
tc.name,
|
|
352
|
+
tc.description,
|
|
353
|
+
str(num_requests),
|
|
354
|
+
catalog_status,
|
|
355
|
+
regression_status,
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
console.print(table)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _find_diagnostic(
|
|
362
|
+
registry: ProviderRegistry, provider_slug: str, diagnostic_slug: str
|
|
363
|
+
) -> Diagnostic | None:
|
|
364
|
+
"""Find a diagnostic by provider and diagnostic slugs."""
|
|
365
|
+
for provider_instance in registry.providers:
|
|
366
|
+
if provider_instance.slug == provider_slug:
|
|
367
|
+
for d in provider_instance.diagnostics():
|
|
368
|
+
if d.slug == diagnostic_slug:
|
|
369
|
+
return d
|
|
370
|
+
return None
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _print_regression_summary( # pragma: no cover
|
|
374
|
+
console: Console,
|
|
375
|
+
regression_dir: Path,
|
|
376
|
+
size_threshold_mb: float = 1.0,
|
|
377
|
+
) -> None:
|
|
378
|
+
"""
|
|
379
|
+
Print a summary of the regression directory with file sizes and git status.
|
|
380
|
+
|
|
381
|
+
Parameters
|
|
382
|
+
----------
|
|
383
|
+
console
|
|
384
|
+
Rich console for output
|
|
385
|
+
regression_dir
|
|
386
|
+
Path to the regression data directory
|
|
387
|
+
size_threshold_mb
|
|
388
|
+
Files larger than this (in MB) will be flagged
|
|
389
|
+
"""
|
|
390
|
+
repo = get_repo_for_path(regression_dir)
|
|
391
|
+
repo_root = Path(repo.working_dir) if repo else regression_dir
|
|
392
|
+
|
|
393
|
+
threshold_bytes = int(size_threshold_mb * 1024 * 1024)
|
|
394
|
+
file_info = collect_regression_file_info(regression_dir, repo, threshold_bytes)
|
|
395
|
+
|
|
396
|
+
if not file_info:
|
|
397
|
+
console.print("[yellow]No files in regression directory[/yellow]")
|
|
398
|
+
return
|
|
399
|
+
|
|
400
|
+
total_size = sum(f["size"] for f in file_info)
|
|
401
|
+
large_files = sum(1 for f in file_info if f["is_large"])
|
|
402
|
+
|
|
403
|
+
table = Table(title=f"Regression Data: {regression_dir.relative_to(repo_root)}")
|
|
404
|
+
table.add_column("File", style="cyan", no_wrap=False)
|
|
405
|
+
table.add_column("Size", justify="right")
|
|
406
|
+
table.add_column("Git Status", justify="center")
|
|
407
|
+
|
|
408
|
+
for info in file_info:
|
|
409
|
+
# Format size with color
|
|
410
|
+
size_str = format_size(info["size"])
|
|
411
|
+
if info["is_large"]:
|
|
412
|
+
size_str = f"[bold red]{size_str}[/bold red]"
|
|
413
|
+
|
|
414
|
+
# Get git status with color
|
|
415
|
+
git_status = info["git_status"]
|
|
416
|
+
status_colors = {
|
|
417
|
+
"new": "[green]new[/green]",
|
|
418
|
+
"staged": "[green]staged[/green]",
|
|
419
|
+
"modified": "[yellow]modified[/yellow]",
|
|
420
|
+
"tracked": "[dim]tracked[/dim]",
|
|
421
|
+
"untracked": "[red]untracked[/red]",
|
|
422
|
+
"deleted": "[red]deleted[/red]",
|
|
423
|
+
"unknown": "[dim]?[/dim]",
|
|
424
|
+
}
|
|
425
|
+
git_status_str = status_colors.get(git_status, f"[dim]{git_status}[/dim]")
|
|
426
|
+
|
|
427
|
+
table.add_row(info["rel_path"], size_str, git_status_str)
|
|
428
|
+
|
|
429
|
+
console.print(table)
|
|
430
|
+
|
|
431
|
+
# Summary line
|
|
432
|
+
summary = f"\n[bold]Total:[/bold] {len(file_info)} files, {format_size(total_size)}"
|
|
433
|
+
if large_files > 0:
|
|
434
|
+
summary += f" ([red]{large_files} files > {size_threshold_mb} MB[/red])"
|
|
435
|
+
console.print(summary)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _run_single_test_case( # noqa: PLR0911, PLR0912, PLR0915
|
|
439
|
+
config: Config,
|
|
440
|
+
console: Console,
|
|
441
|
+
diag: Diagnostic,
|
|
442
|
+
tc: TestCase,
|
|
443
|
+
output_directory: Path | None,
|
|
444
|
+
force_regen: bool,
|
|
445
|
+
fetch: bool,
|
|
446
|
+
size_threshold: float,
|
|
447
|
+
clean: bool,
|
|
448
|
+
) -> bool:
|
|
449
|
+
"""
|
|
450
|
+
Run a single test case for a diagnostic.
|
|
451
|
+
|
|
452
|
+
Returns True if successful, False otherwise.
|
|
453
|
+
"""
|
|
454
|
+
provider_slug = diag.provider.slug
|
|
455
|
+
diagnostic_slug = diag.slug
|
|
456
|
+
test_case_name = tc.name
|
|
457
|
+
|
|
458
|
+
# Resolve datasets: either fetch from ESGF or load from pre-built catalog
|
|
459
|
+
if fetch:
|
|
460
|
+
logger.info(f"Fetching test data for {provider_slug}/{diagnostic_slug}/{test_case_name}")
|
|
461
|
+
try:
|
|
462
|
+
datasets, _ = _fetch_and_build_catalog(diag, tc)
|
|
463
|
+
except DatasetResolutionError as e:
|
|
464
|
+
logger.error(f"Failed to fetch data for {provider_slug}/{diagnostic_slug}/{test_case_name}: {e}")
|
|
465
|
+
return False
|
|
466
|
+
else:
|
|
467
|
+
paths = TestCasePaths.from_diagnostic(diag, test_case_name)
|
|
468
|
+
if paths is None:
|
|
469
|
+
logger.error(f"Could not determine test data directory for {provider_slug}/{diagnostic_slug}")
|
|
470
|
+
return False
|
|
471
|
+
|
|
472
|
+
if not paths.catalog.exists():
|
|
473
|
+
logger.error(f"No catalog file found for {provider_slug}/{diagnostic_slug}/{test_case_name}")
|
|
474
|
+
logger.error("Run 'ref test-cases fetch' first or use --fetch flag")
|
|
475
|
+
return False
|
|
476
|
+
|
|
477
|
+
logger.info(f"Loading catalog from {paths.catalog}")
|
|
478
|
+
datasets = load_datasets_from_yaml(paths.catalog)
|
|
479
|
+
|
|
480
|
+
# Create runner and execute
|
|
481
|
+
runner = TestCaseRunner(config=config, datasets=datasets)
|
|
482
|
+
|
|
483
|
+
logger.info(f"Running test case {test_case_name!r} for {provider_slug}/{diagnostic_slug}")
|
|
484
|
+
|
|
485
|
+
try:
|
|
486
|
+
result = runner.run(diag, test_case_name, output_directory, clean=clean)
|
|
487
|
+
except NoTestDataSpecError:
|
|
488
|
+
logger.error(f"Diagnostic {provider_slug}/{diagnostic_slug} has no test_data_spec")
|
|
489
|
+
return False
|
|
490
|
+
except TestCaseNotFoundError:
|
|
491
|
+
logger.error(f"Test case {test_case_name!r} not found for {provider_slug}/{diagnostic_slug}")
|
|
492
|
+
if diag.test_data_spec:
|
|
493
|
+
logger.error(f"Available test cases: {diag.test_data_spec.case_names}")
|
|
494
|
+
return False
|
|
495
|
+
except DatasetResolutionError as e:
|
|
496
|
+
logger.error(str(e))
|
|
497
|
+
logger.error("Have you run 'ref test-cases fetch' first?")
|
|
498
|
+
return False
|
|
499
|
+
except Exception as e:
|
|
500
|
+
case_id = f"{provider_slug}/{diagnostic_slug}/{test_case_name}"
|
|
501
|
+
logger.error(f"Diagnostic execution failed for {case_id}: {e!s}")
|
|
502
|
+
return False
|
|
503
|
+
|
|
504
|
+
if not result.successful:
|
|
505
|
+
logger.error(f"Execution failed: {provider_slug}/{diagnostic_slug}/{test_case_name}")
|
|
506
|
+
return False
|
|
507
|
+
|
|
508
|
+
logger.info(f"Execution completed: {provider_slug}/{diagnostic_slug}/{test_case_name}")
|
|
509
|
+
if result.metric_bundle_filename:
|
|
510
|
+
logger.info(f"Metric bundle: {result.to_output_path(result.metric_bundle_filename)}")
|
|
511
|
+
if result.output_bundle_filename:
|
|
512
|
+
logger.info(f"Output bundle: {result.to_output_path(result.output_bundle_filename)}")
|
|
513
|
+
|
|
514
|
+
# Handle regression baseline comparison/regeneration
|
|
515
|
+
paths = TestCasePaths.from_diagnostic(diag, test_case_name)
|
|
516
|
+
|
|
517
|
+
if paths is None:
|
|
518
|
+
logger.warning("Could not determine test case directory for provider package")
|
|
519
|
+
return True
|
|
520
|
+
|
|
521
|
+
if force_regen:
|
|
522
|
+
paths.create()
|
|
523
|
+
|
|
524
|
+
if force_regen or not paths.regression.exists():
|
|
525
|
+
# Save full output directory as regression data
|
|
526
|
+
if paths.regression.exists():
|
|
527
|
+
shutil.rmtree(paths.regression)
|
|
528
|
+
paths.regression.mkdir(parents=True, exist_ok=True)
|
|
529
|
+
shutil.copytree(result.definition.output_directory, paths.regression, dirs_exist_ok=True)
|
|
530
|
+
|
|
531
|
+
# Replace absolute paths with placeholders for portability
|
|
532
|
+
# We don't touch binary files, only text-based ones
|
|
533
|
+
# TODO: Symlink regression datasets instead of any paths on users' systems
|
|
534
|
+
for glob_pattern in ("*.json", "*.txt", "*.yaml", "*.yml"):
|
|
535
|
+
for file in paths.regression.rglob(glob_pattern):
|
|
536
|
+
content = file.read_text()
|
|
537
|
+
content = content.replace(str(result.definition.output_directory), "<OUTPUT_DIR>")
|
|
538
|
+
content = content.replace(str(paths.test_data_dir), "<TEST_DATA_DIR>")
|
|
539
|
+
file.write_text(content)
|
|
540
|
+
|
|
541
|
+
# Store catalog hash for change detection
|
|
542
|
+
catalog_hash = get_catalog_hash(paths.catalog)
|
|
543
|
+
if catalog_hash:
|
|
544
|
+
paths.regression_catalog_hash.write_text(catalog_hash)
|
|
545
|
+
|
|
546
|
+
logger.info(f"Updated regression data: {paths.regression}")
|
|
547
|
+
_print_regression_summary(console, paths.regression, size_threshold)
|
|
548
|
+
elif paths.regression.exists():
|
|
549
|
+
logger.info(f"Regression data exists at: {paths.regression}")
|
|
550
|
+
logger.info("Use --force-regen to update the baseline")
|
|
551
|
+
|
|
552
|
+
return True
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
@app.command(name="run")
|
|
556
|
+
def run_test_case( # noqa: PLR0912, PLR0915
|
|
557
|
+
ctx: typer.Context,
|
|
558
|
+
provider: Annotated[
|
|
559
|
+
str,
|
|
560
|
+
typer.Option(help="Provider slug (required, e.g., 'example', 'ilamb')"),
|
|
561
|
+
],
|
|
562
|
+
diagnostic: Annotated[
|
|
563
|
+
str | None,
|
|
564
|
+
typer.Option(help="Specific diagnostic slug to run (e.g., 'global-mean-timeseries')"),
|
|
565
|
+
] = None,
|
|
566
|
+
test_case: Annotated[
|
|
567
|
+
str | None,
|
|
568
|
+
typer.Option(help="Specific test case name to run (e.g., 'default')"),
|
|
569
|
+
] = None,
|
|
570
|
+
output_directory: Annotated[
|
|
571
|
+
Path | None,
|
|
572
|
+
typer.Option(help="Output directory for execution results"),
|
|
573
|
+
] = None,
|
|
574
|
+
force_regen: Annotated[
|
|
575
|
+
bool,
|
|
576
|
+
typer.Option(help="Force regeneration of regression baselines"),
|
|
577
|
+
] = False,
|
|
578
|
+
fetch: Annotated[
|
|
579
|
+
bool,
|
|
580
|
+
typer.Option(help="Fetch test data from ESGF before running"),
|
|
581
|
+
] = False,
|
|
582
|
+
size_threshold: Annotated[
|
|
583
|
+
float,
|
|
584
|
+
typer.Option(help="Flag files larger than this size in MB (default: 1.0)"),
|
|
585
|
+
] = 1.0,
|
|
586
|
+
dry_run: Annotated[
|
|
587
|
+
bool,
|
|
588
|
+
typer.Option(help="Show what would be run without executing"),
|
|
589
|
+
] = False,
|
|
590
|
+
only_missing: Annotated[
|
|
591
|
+
bool,
|
|
592
|
+
typer.Option(help="Only run test cases without existing regression data"),
|
|
593
|
+
] = False,
|
|
594
|
+
if_changed: Annotated[
|
|
595
|
+
bool,
|
|
596
|
+
typer.Option(help="Only run if catalog has changed since regression data was generated"),
|
|
597
|
+
] = False,
|
|
598
|
+
clean: Annotated[
|
|
599
|
+
bool,
|
|
600
|
+
typer.Option(help="Delete existing output directory before running"),
|
|
601
|
+
] = False,
|
|
602
|
+
) -> None:
|
|
603
|
+
"""
|
|
604
|
+
Run test cases for diagnostics.
|
|
605
|
+
|
|
606
|
+
Executes diagnostics using pre-defined datasets from the test_data_spec
|
|
607
|
+
and optionally compares against regression baselines.
|
|
608
|
+
|
|
609
|
+
Use --provider to select which provider's diagnostics to run (required).
|
|
610
|
+
Use --diagnostic and --test-case to further narrow the scope.
|
|
611
|
+
|
|
612
|
+
Examples
|
|
613
|
+
--------
|
|
614
|
+
ref test-cases run --provider ilamb # Run all ILAMB test cases
|
|
615
|
+
ref test-cases run --provider example --diagnostic global-mean-timeseries
|
|
616
|
+
ref test-cases run --provider ilamb --test-case default --fetch
|
|
617
|
+
ref test-cases run --provider pmp --only-missing # Skip test cases with regression data
|
|
618
|
+
ref test-cases run --provider pmp --if-changed # Only run if catalog changed
|
|
619
|
+
"""
|
|
620
|
+
config: Config = ctx.obj.config
|
|
621
|
+
db = ctx.obj.database
|
|
622
|
+
console: Console = ctx.obj.console
|
|
623
|
+
|
|
624
|
+
# Build provider registry
|
|
625
|
+
registry = ProviderRegistry.build_from_config(config, db)
|
|
626
|
+
|
|
627
|
+
# Find the provider
|
|
628
|
+
provider_instance = None
|
|
629
|
+
for p in registry.providers:
|
|
630
|
+
if p.slug == provider:
|
|
631
|
+
provider_instance = p
|
|
632
|
+
break
|
|
633
|
+
|
|
634
|
+
if provider_instance is None:
|
|
635
|
+
logger.error(f"Provider '{provider}' not found")
|
|
636
|
+
available = [p.slug for p in registry.providers]
|
|
637
|
+
logger.error(f"Available providers: {available}")
|
|
638
|
+
raise typer.Exit(code=1)
|
|
639
|
+
|
|
640
|
+
# Collect test cases to run
|
|
641
|
+
test_cases_to_run: list[tuple[Diagnostic, TestCase]] = []
|
|
642
|
+
skipped_cases: list[tuple[Diagnostic, TestCase]] = []
|
|
643
|
+
|
|
644
|
+
for diag in provider_instance.diagnostics():
|
|
645
|
+
if diagnostic and diag.slug != diagnostic:
|
|
646
|
+
continue
|
|
647
|
+
if diag.test_data_spec is None:
|
|
648
|
+
continue
|
|
649
|
+
|
|
650
|
+
for tc in diag.test_data_spec.test_cases:
|
|
651
|
+
if test_case and tc.name != test_case:
|
|
652
|
+
continue
|
|
653
|
+
# Skip if regression exists when using --only-missing
|
|
654
|
+
paths = TestCasePaths.from_diagnostic(diag, tc.name)
|
|
655
|
+
if only_missing:
|
|
656
|
+
if paths and paths.regression.exists():
|
|
657
|
+
skipped_cases.append((diag, tc))
|
|
658
|
+
continue
|
|
659
|
+
# Skip if catalog hasn't changed when using --if-changed
|
|
660
|
+
if if_changed:
|
|
661
|
+
if paths and not catalog_changed_since_regression(paths):
|
|
662
|
+
skipped_cases.append((diag, tc))
|
|
663
|
+
continue
|
|
664
|
+
test_cases_to_run.append((diag, tc))
|
|
665
|
+
|
|
666
|
+
if not test_cases_to_run:
|
|
667
|
+
logger.warning(f"No test cases found for provider '{provider}'")
|
|
668
|
+
if diagnostic:
|
|
669
|
+
logger.warning(f" with diagnostic filter: {diagnostic}")
|
|
670
|
+
if test_case:
|
|
671
|
+
logger.warning(f" with test case filter: {test_case}")
|
|
672
|
+
if only_missing and skipped_cases:
|
|
673
|
+
logger.info(f" ({len(skipped_cases)} test case(s) skipped due to --only-missing)")
|
|
674
|
+
raise typer.Exit(code=0)
|
|
675
|
+
|
|
676
|
+
logger.info(f"Found {len(test_cases_to_run)} test case(s) to run")
|
|
677
|
+
if skipped_cases:
|
|
678
|
+
logger.info(f"Skipping {len(skipped_cases)} test case(s) with existing regression data")
|
|
679
|
+
|
|
680
|
+
if dry_run: # pragma: no cover
|
|
681
|
+
table = Table(title="Test Cases to Run")
|
|
682
|
+
table.add_column("Provider", style="cyan")
|
|
683
|
+
table.add_column("Diagnostic", style="green")
|
|
684
|
+
table.add_column("Test Case", style="yellow")
|
|
685
|
+
table.add_column("Description")
|
|
686
|
+
table.add_column("Status", justify="center")
|
|
687
|
+
|
|
688
|
+
for diag, tc in test_cases_to_run:
|
|
689
|
+
table.add_row(provider, diag.slug, tc.name, tc.description, "[green]will run[/green]")
|
|
690
|
+
|
|
691
|
+
for diag, tc in skipped_cases:
|
|
692
|
+
table.add_row(provider, diag.slug, tc.name, tc.description, "[dim]skip (regression exists)[/dim]")
|
|
693
|
+
|
|
694
|
+
console.print(table)
|
|
695
|
+
return
|
|
696
|
+
|
|
697
|
+
# Run each test case
|
|
698
|
+
successes = 0
|
|
699
|
+
failures = 0
|
|
700
|
+
failed_cases: list[str] = []
|
|
701
|
+
|
|
702
|
+
for diag, tc in test_cases_to_run:
|
|
703
|
+
success = _run_single_test_case(
|
|
704
|
+
config=config,
|
|
705
|
+
console=console,
|
|
706
|
+
diag=diag,
|
|
707
|
+
tc=tc,
|
|
708
|
+
output_directory=output_directory,
|
|
709
|
+
force_regen=force_regen,
|
|
710
|
+
fetch=fetch,
|
|
711
|
+
size_threshold=size_threshold,
|
|
712
|
+
clean=clean,
|
|
713
|
+
)
|
|
714
|
+
if success:
|
|
715
|
+
successes += 1
|
|
716
|
+
else:
|
|
717
|
+
failures += 1
|
|
718
|
+
failed_cases.append(f"{provider}/{diag.slug}/{tc.name}")
|
|
719
|
+
|
|
720
|
+
# Print summary
|
|
721
|
+
console.print()
|
|
722
|
+
if failures == 0:
|
|
723
|
+
console.print(f"[green]All {successes} test case(s) passed[/green]")
|
|
724
|
+
else:
|
|
725
|
+
console.print(f"[yellow]Results: {successes} passed, {failures} failed[/yellow]")
|
|
726
|
+
console.print("[red]Failed test cases:[/red]")
|
|
727
|
+
for case in failed_cases:
|
|
728
|
+
console.print(f" - {case}")
|
|
729
|
+
raise typer.Exit(code=1)
|