climate-ref 0.6.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +12 -3
- climate_ref/cli/_utils.py +56 -2
- climate_ref/cli/datasets.py +49 -12
- climate_ref/cli/executions.py +333 -24
- climate_ref/cli/providers.py +1 -2
- climate_ref/config.py +67 -4
- climate_ref/database.py +62 -4
- climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- climate_ref/dataset_registry/sample_data.txt +10 -19
- climate_ref/datasets/__init__.py +3 -3
- climate_ref/datasets/base.py +121 -20
- climate_ref/datasets/cmip6.py +2 -0
- climate_ref/datasets/obs4mips.py +26 -15
- climate_ref/executor/hpc.py +149 -53
- climate_ref/executor/local.py +1 -2
- climate_ref/executor/result_handling.py +17 -7
- climate_ref/migrations/env.py +12 -10
- climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- climate_ref/models/__init__.py +1 -6
- climate_ref/models/base.py +4 -20
- climate_ref/models/dataset.py +2 -0
- climate_ref/models/diagnostic.py +2 -1
- climate_ref/models/execution.py +219 -7
- climate_ref/models/metric_value.py +25 -110
- climate_ref/models/mixins.py +144 -0
- climate_ref/models/provider.py +2 -1
- climate_ref/provider_registry.py +4 -4
- climate_ref/slurm.py +2 -2
- climate_ref/solver.py +17 -6
- climate_ref/testing.py +1 -1
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/METADATA +1 -1
- climate_ref-0.8.0.dist-info/RECORD +58 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/WHEEL +1 -1
- climate_ref-0.6.6.dist-info/RECORD +0 -55
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.8.0.dist-info}/licenses/NOTICE +0 -0
climate_ref/cli/__init__.py
CHANGED
|
@@ -3,11 +3,12 @@
|
|
|
3
3
|
import importlib
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Annotated
|
|
6
|
+
from typing import Annotated
|
|
7
7
|
|
|
8
8
|
import typer
|
|
9
9
|
from attrs import define
|
|
10
10
|
from loguru import logger
|
|
11
|
+
from rich.console import Console
|
|
11
12
|
|
|
12
13
|
from climate_ref import __version__
|
|
13
14
|
from climate_ref.cli import config, datasets, executions, providers, solve
|
|
@@ -37,6 +38,7 @@ class CLIContext:
|
|
|
37
38
|
|
|
38
39
|
config: Config
|
|
39
40
|
database: Database
|
|
41
|
+
console: Console
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def _version_callback(value: bool) -> None:
|
|
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
|
|
|
46
48
|
raise typer.Exit()
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
def _create_console() -> Console:
|
|
52
|
+
# Hook for testing to disable color output
|
|
53
|
+
|
|
54
|
+
# Rich respects the NO_COLOR environment variabl
|
|
55
|
+
return Console()
|
|
56
|
+
|
|
57
|
+
|
|
49
58
|
def _load_config(configuration_directory: Path | None = None) -> Config:
|
|
50
59
|
"""
|
|
51
60
|
Load the configuration from the specified directory
|
|
@@ -129,7 +138,7 @@ def main( # noqa: PLR0913
|
|
|
129
138
|
typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
|
|
130
139
|
] = LogLevel.Info,
|
|
131
140
|
version: Annotated[
|
|
132
|
-
|
|
141
|
+
bool | None,
|
|
133
142
|
typer.Option(
|
|
134
143
|
"--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
|
|
135
144
|
),
|
|
@@ -155,7 +164,7 @@ def main( # noqa: PLR0913
|
|
|
155
164
|
|
|
156
165
|
logger.debug(f"Configuration loaded from: {config._config_file!s}")
|
|
157
166
|
|
|
158
|
-
ctx.obj = CLIContext(config=config, database=Database.from_config(config))
|
|
167
|
+
ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
|
|
159
168
|
|
|
160
169
|
|
|
161
170
|
if __name__ == "__main__":
|
climate_ref/cli/_utils.py
CHANGED
|
@@ -5,6 +5,59 @@ from rich.console import Console
|
|
|
5
5
|
from rich.table import Table
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
|
|
9
|
+
"""
|
|
10
|
+
Parse facet filters from key=value format into a dictionary.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
filters
|
|
15
|
+
List of filter strings in 'key=value' format
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict[str, str]
|
|
20
|
+
Dictionary mapping facet keys to values
|
|
21
|
+
|
|
22
|
+
Raises
|
|
23
|
+
------
|
|
24
|
+
ValueError
|
|
25
|
+
If a filter string is not in valid 'key=value' format
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
|
|
30
|
+
{'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
|
|
31
|
+
"""
|
|
32
|
+
if not filters:
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
parsed: dict[str, str] = {}
|
|
36
|
+
for filter_str in filters:
|
|
37
|
+
if "=" not in filter_str:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Invalid filter format: '{filter_str}'. "
|
|
40
|
+
f"Expected format: 'key=value' or 'dataset_type.key=value' "
|
|
41
|
+
f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
key, value = filter_str.split("=", 1)
|
|
45
|
+
key = key.strip()
|
|
46
|
+
value = value.strip()
|
|
47
|
+
|
|
48
|
+
if not key:
|
|
49
|
+
raise ValueError(f"Empty key in filter: '{filter_str}'")
|
|
50
|
+
if not value:
|
|
51
|
+
raise ValueError(f"Empty value in filter: '{filter_str}'")
|
|
52
|
+
|
|
53
|
+
if key in parsed:
|
|
54
|
+
logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
|
|
55
|
+
|
|
56
|
+
parsed[key] = value
|
|
57
|
+
|
|
58
|
+
return parsed
|
|
59
|
+
|
|
60
|
+
|
|
8
61
|
def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
9
62
|
"""
|
|
10
63
|
Convert a DataFrame to a rich Table instance
|
|
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
|
32
85
|
|
|
33
86
|
table = Table(*[str(column) for column in df.columns])
|
|
34
87
|
|
|
35
|
-
for
|
|
88
|
+
for value_list in df.values.tolist():
|
|
36
89
|
row = [str(x) for x in value_list]
|
|
37
90
|
table.add_row(*row)
|
|
38
91
|
|
|
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
|
|
|
59
112
|
# Drop duplicates as they are not informative to CLI users.
|
|
60
113
|
df = df.drop_duplicates()
|
|
61
114
|
|
|
62
|
-
if console is None:
|
|
115
|
+
if console is None: # pragma: no branch
|
|
116
|
+
logger.debug("Creating new console for pretty printing")
|
|
63
117
|
console = Console()
|
|
64
118
|
|
|
65
119
|
max_col_count = console.width // 10
|
climate_ref/cli/datasets.py
CHANGED
|
@@ -6,8 +6,6 @@ which executions are required for a given diagnostic without having to re-parse
|
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import errno
|
|
10
|
-
import os
|
|
11
9
|
import shutil
|
|
12
10
|
from collections.abc import Iterable
|
|
13
11
|
from pathlib import Path
|
|
@@ -15,9 +13,9 @@ from typing import Annotated
|
|
|
15
13
|
|
|
16
14
|
import typer
|
|
17
15
|
from loguru import logger
|
|
18
|
-
from rich.console import Console
|
|
19
16
|
|
|
20
17
|
from climate_ref.cli._utils import pretty_print_df
|
|
18
|
+
from climate_ref.database import ModelState
|
|
21
19
|
from climate_ref.datasets import get_dataset_adapter
|
|
22
20
|
from climate_ref.models import Dataset
|
|
23
21
|
from climate_ref.provider_registry import ProviderRegistry
|
|
@@ -27,7 +25,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
|
|
|
27
25
|
from climate_ref_core.datasets import SourceDatasetType
|
|
28
26
|
|
|
29
27
|
app = typer.Typer(help=__doc__)
|
|
30
|
-
console = Console()
|
|
31
28
|
|
|
32
29
|
|
|
33
30
|
@app.command(name="list")
|
|
@@ -70,7 +67,7 @@ def list_(
|
|
|
70
67
|
raise typer.Exit(code=1)
|
|
71
68
|
data_catalog = data_catalog[column].sort_values(by=column)
|
|
72
69
|
|
|
73
|
-
pretty_print_df(data_catalog, console=console)
|
|
70
|
+
pretty_print_df(data_catalog, console=ctx.obj.console)
|
|
74
71
|
|
|
75
72
|
|
|
76
73
|
@app.command()
|
|
@@ -97,7 +94,7 @@ def list_columns(
|
|
|
97
94
|
|
|
98
95
|
|
|
99
96
|
@app.command()
|
|
100
|
-
def ingest( # noqa
|
|
97
|
+
def ingest( # noqa
|
|
101
98
|
ctx: typer.Context,
|
|
102
99
|
file_or_directory: list[Path],
|
|
103
100
|
source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
|
|
@@ -106,7 +103,7 @@ def ingest( # noqa: PLR0913
|
|
|
106
103
|
n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
|
|
107
104
|
skip_invalid: Annotated[
|
|
108
105
|
bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
|
|
109
|
-
] =
|
|
106
|
+
] = True,
|
|
110
107
|
) -> None:
|
|
111
108
|
"""
|
|
112
109
|
Ingest a directory of datasets into the database
|
|
@@ -118,6 +115,7 @@ def ingest( # noqa: PLR0913
|
|
|
118
115
|
"""
|
|
119
116
|
config = ctx.obj.config
|
|
120
117
|
db = ctx.obj.database
|
|
118
|
+
console = ctx.obj.console
|
|
121
119
|
|
|
122
120
|
kwargs = {}
|
|
123
121
|
|
|
@@ -133,16 +131,35 @@ def ingest( # noqa: PLR0913
|
|
|
133
131
|
|
|
134
132
|
if not _dir.exists():
|
|
135
133
|
logger.error(f"File or directory {_dir} does not exist")
|
|
136
|
-
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# TODO: This assumes that all datasets are nc files.
|
|
137
|
+
# THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
|
|
138
|
+
if not _dir.rglob("*.nc"):
|
|
139
|
+
logger.error(f"No .nc files found in {_dir}")
|
|
140
|
+
continue
|
|
137
141
|
|
|
138
|
-
|
|
139
|
-
|
|
142
|
+
try:
|
|
143
|
+
data_catalog = adapter.find_local_datasets(_dir)
|
|
144
|
+
data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.error(f"Error ingesting datasets from {_dir}: {e}")
|
|
147
|
+
continue
|
|
140
148
|
|
|
141
149
|
logger.info(
|
|
142
150
|
f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
|
|
143
151
|
)
|
|
144
152
|
pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
|
|
145
153
|
|
|
154
|
+
# track stats for a given directory
|
|
155
|
+
num_created_datasets = 0
|
|
156
|
+
num_updated_datasets = 0
|
|
157
|
+
num_unchanged_datasets = 0
|
|
158
|
+
num_created_files = 0
|
|
159
|
+
num_updated_files = 0
|
|
160
|
+
num_removed_files = 0
|
|
161
|
+
num_unchanged_files = 0
|
|
162
|
+
|
|
146
163
|
for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
|
|
147
164
|
logger.debug(f"Processing dataset {instance_id}")
|
|
148
165
|
with db.session.begin():
|
|
@@ -154,9 +171,29 @@ def ingest( # noqa: PLR0913
|
|
|
154
171
|
)
|
|
155
172
|
if not dataset:
|
|
156
173
|
logger.info(f"Would save dataset {instance_id} to the database")
|
|
157
|
-
continue
|
|
158
174
|
else:
|
|
159
|
-
adapter.register_dataset(config, db, data_catalog_dataset)
|
|
175
|
+
results = adapter.register_dataset(config, db, data_catalog_dataset)
|
|
176
|
+
|
|
177
|
+
if results.dataset_state == ModelState.CREATED:
|
|
178
|
+
num_created_datasets += 1
|
|
179
|
+
elif results.dataset_state == ModelState.UPDATED:
|
|
180
|
+
num_updated_datasets += 1
|
|
181
|
+
else:
|
|
182
|
+
num_unchanged_datasets += 1
|
|
183
|
+
num_created_files += len(results.files_added)
|
|
184
|
+
num_updated_files += len(results.files_updated)
|
|
185
|
+
num_removed_files += len(results.files_removed)
|
|
186
|
+
num_unchanged_files += len(results.files_unchanged)
|
|
187
|
+
|
|
188
|
+
if not dry_run:
|
|
189
|
+
ingestion_msg = (
|
|
190
|
+
f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
|
|
191
|
+
" (created/updated/unchanged), "
|
|
192
|
+
f"Files: "
|
|
193
|
+
f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
|
|
194
|
+
" (created/updated/removed/unchanged)"
|
|
195
|
+
)
|
|
196
|
+
logger.info(ingestion_msg)
|
|
160
197
|
|
|
161
198
|
if solve:
|
|
162
199
|
solve_required_executions(
|
climate_ref/cli/executions.py
CHANGED
|
@@ -2,38 +2,90 @@
|
|
|
2
2
|
View execution groups and their results
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import json
|
|
5
6
|
import pathlib
|
|
7
|
+
import shutil
|
|
8
|
+
from dataclasses import dataclass
|
|
6
9
|
from typing import Annotated
|
|
7
10
|
from urllib.parse import quote
|
|
8
11
|
|
|
9
12
|
import pandas as pd
|
|
10
13
|
import typer
|
|
11
14
|
from loguru import logger
|
|
12
|
-
from rich.console import
|
|
15
|
+
from rich.console import Group
|
|
13
16
|
from rich.filesize import decimal
|
|
14
17
|
from rich.markup import escape
|
|
15
18
|
from rich.panel import Panel
|
|
16
19
|
from rich.text import Text
|
|
17
20
|
from rich.tree import Tree
|
|
18
21
|
|
|
19
|
-
from climate_ref.cli._utils import df_to_table, pretty_print_df
|
|
22
|
+
from climate_ref.cli._utils import df_to_table, parse_facet_filters, pretty_print_df
|
|
20
23
|
from climate_ref.config import Config
|
|
21
24
|
from climate_ref.models import Execution, ExecutionGroup
|
|
22
|
-
from climate_ref.models.execution import
|
|
25
|
+
from climate_ref.models.execution import execution_datasets, get_execution_group_and_latest_filtered
|
|
23
26
|
from climate_ref_core.logging import EXECUTION_LOG_FILENAME
|
|
24
27
|
|
|
25
28
|
app = typer.Typer(help=__doc__)
|
|
26
|
-
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ListGroupsFilterOptions:
|
|
33
|
+
"""Options to filter execution groups in list-groups command"""
|
|
34
|
+
|
|
35
|
+
diagnostic: list[str] | None = None
|
|
36
|
+
"""Filter by diagnostic slug (substring, case-insensitive)"""
|
|
37
|
+
|
|
38
|
+
provider: list[str] | None = None
|
|
39
|
+
"""Filter by provider slug (substring, case-insensitive)"""
|
|
40
|
+
|
|
41
|
+
facets: dict[str, str] | None = None
|
|
42
|
+
"""Filter by facet key-value pairs (exact match)"""
|
|
27
43
|
|
|
28
44
|
|
|
29
45
|
@app.command()
|
|
30
|
-
def list_groups(
|
|
46
|
+
def list_groups( # noqa: PLR0913
|
|
31
47
|
ctx: typer.Context,
|
|
32
48
|
column: Annotated[
|
|
33
49
|
list[str] | None,
|
|
34
50
|
typer.Option(help="Only include specified columns in the output"),
|
|
35
51
|
] = None,
|
|
36
52
|
limit: int = typer.Option(100, help="Limit the number of rows to display"),
|
|
53
|
+
diagnostic: Annotated[
|
|
54
|
+
list[str] | None,
|
|
55
|
+
typer.Option(
|
|
56
|
+
help="Filter by diagnostic slug (substring match, case-insensitive)."
|
|
57
|
+
"Multiple values can be provided."
|
|
58
|
+
),
|
|
59
|
+
] = None,
|
|
60
|
+
provider: Annotated[
|
|
61
|
+
list[str] | None,
|
|
62
|
+
typer.Option(
|
|
63
|
+
help="Filter by provider slug (substring match, case-insensitive)."
|
|
64
|
+
"Multiple values can be provided."
|
|
65
|
+
),
|
|
66
|
+
] = None,
|
|
67
|
+
filter: Annotated[ # noqa: A002
|
|
68
|
+
list[str] | None,
|
|
69
|
+
typer.Option(
|
|
70
|
+
"--filter",
|
|
71
|
+
help="Filter by facet key=value pairs (exact match). Multiple filters can be provided.",
|
|
72
|
+
),
|
|
73
|
+
] = None,
|
|
74
|
+
successful: Annotated[
|
|
75
|
+
bool | None,
|
|
76
|
+
typer.Option(
|
|
77
|
+
"--successful/--not-successful",
|
|
78
|
+
help="Filter by successful or unsuccessful executions.",
|
|
79
|
+
),
|
|
80
|
+
] = None,
|
|
81
|
+
dirty: Annotated[
|
|
82
|
+
bool | None,
|
|
83
|
+
typer.Option(
|
|
84
|
+
"--dirty/--not-dirty",
|
|
85
|
+
help="Filter to include only dirty or clean execution groups."
|
|
86
|
+
"These execution groups will be re-computed on the next run.",
|
|
87
|
+
),
|
|
88
|
+
] = None,
|
|
37
89
|
) -> None:
|
|
38
90
|
"""
|
|
39
91
|
List the diagnostic execution groups that have been identified
|
|
@@ -41,42 +93,273 @@ def list_groups(
|
|
|
41
93
|
The data catalog is sorted by the date that the execution group was created (first = newest).
|
|
42
94
|
If the `--column` option is provided, only the specified columns will be displayed.
|
|
43
95
|
|
|
96
|
+
Filters can be combined using AND logic across filter types and OR logic within a filter type.
|
|
97
|
+
|
|
44
98
|
The output will be in a tabular format.
|
|
45
99
|
"""
|
|
46
100
|
session = ctx.obj.database.session
|
|
101
|
+
console = ctx.obj.console
|
|
47
102
|
|
|
48
|
-
|
|
49
|
-
|
|
103
|
+
# Parse facet filters
|
|
104
|
+
try:
|
|
105
|
+
facet_filters = parse_facet_filters(filter)
|
|
106
|
+
except ValueError as e:
|
|
107
|
+
logger.error(str(e))
|
|
108
|
+
raise typer.Exit(code=1)
|
|
50
109
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"provider": execution_groups.diagnostic.provider.slug,
|
|
57
|
-
"diagnostic": execution_groups.diagnostic.slug,
|
|
58
|
-
"dirty": execution_groups.dirty,
|
|
59
|
-
"successful": result.successful if result else None,
|
|
60
|
-
"created_at": execution_groups.created_at,
|
|
61
|
-
"updated_at": execution_groups.updated_at,
|
|
62
|
-
}
|
|
63
|
-
for execution_groups, result in execution_groups_results
|
|
64
|
-
]
|
|
110
|
+
# Build filter options
|
|
111
|
+
filters = ListGroupsFilterOptions(
|
|
112
|
+
diagnostic=diagnostic,
|
|
113
|
+
provider=provider,
|
|
114
|
+
facets=facet_filters if facet_filters else None,
|
|
65
115
|
)
|
|
116
|
+
logger.debug(f"Applying filters: {filters}")
|
|
117
|
+
|
|
118
|
+
# Get total count before filtering for warning messages
|
|
119
|
+
total_count = session.query(ExecutionGroup).count()
|
|
120
|
+
|
|
121
|
+
# Apply filters to query
|
|
122
|
+
try:
|
|
123
|
+
all_filtered_results = get_execution_group_and_latest_filtered(
|
|
124
|
+
session,
|
|
125
|
+
diagnostic_filters=filters.diagnostic,
|
|
126
|
+
provider_filters=filters.provider,
|
|
127
|
+
facet_filters=filters.facets,
|
|
128
|
+
successful=successful,
|
|
129
|
+
dirty=dirty,
|
|
130
|
+
)
|
|
131
|
+
execution_groups_results = all_filtered_results[:limit]
|
|
132
|
+
except Exception as e: # pragma: no cover
|
|
133
|
+
logger.error(f"Error applying filters: {e}")
|
|
134
|
+
raise typer.Exit(code=1)
|
|
135
|
+
|
|
136
|
+
# Check if any results found
|
|
137
|
+
if not execution_groups_results:
|
|
138
|
+
emit_no_results_warning(filters, total_count)
|
|
139
|
+
results_df = pd.DataFrame(
|
|
140
|
+
columns=[
|
|
141
|
+
"id",
|
|
142
|
+
"key",
|
|
143
|
+
"provider",
|
|
144
|
+
"diagnostic",
|
|
145
|
+
"dirty",
|
|
146
|
+
"successful",
|
|
147
|
+
"created_at",
|
|
148
|
+
"updated_at",
|
|
149
|
+
"selectors",
|
|
150
|
+
]
|
|
151
|
+
)
|
|
152
|
+
else:
|
|
153
|
+
results_df = pd.DataFrame(
|
|
154
|
+
[
|
|
155
|
+
{
|
|
156
|
+
"id": eg.id,
|
|
157
|
+
"key": eg.key,
|
|
158
|
+
"provider": eg.diagnostic.provider.slug,
|
|
159
|
+
"diagnostic": eg.diagnostic.slug,
|
|
160
|
+
"dirty": eg.dirty,
|
|
161
|
+
"successful": result.successful if result else None,
|
|
162
|
+
"created_at": eg.created_at,
|
|
163
|
+
"updated_at": eg.updated_at,
|
|
164
|
+
"selectors": json.dumps(eg.selectors),
|
|
165
|
+
}
|
|
166
|
+
for eg, result in execution_groups_results
|
|
167
|
+
]
|
|
168
|
+
)
|
|
66
169
|
|
|
67
|
-
|
|
170
|
+
# Apply column filtering
|
|
171
|
+
if column and not results_df.empty: # Only apply if df is not empty
|
|
68
172
|
if not all(col in results_df.columns for col in column):
|
|
69
173
|
logger.error(f"Column not found in data catalog: {column}")
|
|
70
174
|
raise typer.Exit(code=1)
|
|
71
175
|
results_df = results_df[column]
|
|
72
176
|
|
|
177
|
+
# Display results
|
|
73
178
|
pretty_print_df(results_df, console=console)
|
|
74
|
-
|
|
179
|
+
|
|
180
|
+
# Show limit warning if applicable
|
|
181
|
+
filtered_count = len(all_filtered_results)
|
|
182
|
+
if filtered_count > limit:
|
|
75
183
|
logger.warning(
|
|
76
|
-
f"Displaying {limit} of {
|
|
184
|
+
f"Displaying {limit} of {filtered_count} filtered results. "
|
|
185
|
+
f"Use the `--limit` option to display more."
|
|
77
186
|
)
|
|
78
187
|
|
|
79
188
|
|
|
189
|
+
@app.command()
|
|
190
|
+
def delete_groups( # noqa: PLR0912, PLR0913
|
|
191
|
+
ctx: typer.Context,
|
|
192
|
+
diagnostic: Annotated[
|
|
193
|
+
list[str] | None,
|
|
194
|
+
typer.Option(
|
|
195
|
+
help="Filter by diagnostic slug (substring match, case-insensitive)."
|
|
196
|
+
"Multiple values can be provided."
|
|
197
|
+
),
|
|
198
|
+
] = None,
|
|
199
|
+
provider: Annotated[
|
|
200
|
+
list[str] | None,
|
|
201
|
+
typer.Option(
|
|
202
|
+
help="Filter by provider slug (substring match, case-insensitive)."
|
|
203
|
+
"Multiple values can be provided."
|
|
204
|
+
),
|
|
205
|
+
] = None,
|
|
206
|
+
filter: Annotated[ # noqa: A002
|
|
207
|
+
list[str] | None,
|
|
208
|
+
typer.Option(
|
|
209
|
+
"--filter",
|
|
210
|
+
help="Filter by facet key=value pairs (exact match). Multiple filters can be provided.",
|
|
211
|
+
),
|
|
212
|
+
] = None,
|
|
213
|
+
successful: Annotated[
|
|
214
|
+
bool | None,
|
|
215
|
+
typer.Option(
|
|
216
|
+
"--successful/--not-successful",
|
|
217
|
+
help="Filter by successful or unsuccessful executions.",
|
|
218
|
+
),
|
|
219
|
+
] = None,
|
|
220
|
+
dirty: Annotated[
|
|
221
|
+
bool | None,
|
|
222
|
+
typer.Option(
|
|
223
|
+
"--dirty/--not-dirty",
|
|
224
|
+
help="Filter to include only dirty or clean execution groups."
|
|
225
|
+
"These execution groups will be re-computed on the next run.",
|
|
226
|
+
),
|
|
227
|
+
] = None,
|
|
228
|
+
remove_outputs: bool = typer.Option(
|
|
229
|
+
False, "--remove-outputs", help="Also remove output directories from the filesystem"
|
|
230
|
+
),
|
|
231
|
+
force: bool = typer.Option(False, help="Skip confirmation prompt"),
|
|
232
|
+
) -> None:
|
|
233
|
+
"""
|
|
234
|
+
Delete execution groups matching the specified filters.
|
|
235
|
+
|
|
236
|
+
This command will delete execution groups and their associated executions.
|
|
237
|
+
Use filters to specify which groups to delete. At least one filter must be provided
|
|
238
|
+
to prevent accidental deletion of all groups.
|
|
239
|
+
|
|
240
|
+
Filters can be combined using AND logic across filter types and OR logic within a filter type.
|
|
241
|
+
"""
|
|
242
|
+
session = ctx.obj.database.session
|
|
243
|
+
console = ctx.obj.console
|
|
244
|
+
|
|
245
|
+
# Parse facet filters
|
|
246
|
+
try:
|
|
247
|
+
facet_filters = parse_facet_filters(filter)
|
|
248
|
+
except ValueError as e:
|
|
249
|
+
logger.error(str(e))
|
|
250
|
+
raise typer.Exit(code=1)
|
|
251
|
+
|
|
252
|
+
if not any([diagnostic, provider, facet_filters, successful is not None, dirty is not None]):
|
|
253
|
+
logger.warning("THIS WILL DELETE ALL EXECUTION GROUPS IN THE DATABASE")
|
|
254
|
+
raise typer.Exit(code=1)
|
|
255
|
+
|
|
256
|
+
# Build filter options
|
|
257
|
+
filters = ListGroupsFilterOptions(
|
|
258
|
+
diagnostic=diagnostic,
|
|
259
|
+
provider=provider,
|
|
260
|
+
facets=facet_filters if facet_filters else None,
|
|
261
|
+
)
|
|
262
|
+
logger.debug(f"Applying filters: {filters}")
|
|
263
|
+
|
|
264
|
+
# Apply filters to query
|
|
265
|
+
try:
|
|
266
|
+
all_filtered_results = get_execution_group_and_latest_filtered(
|
|
267
|
+
session,
|
|
268
|
+
diagnostic_filters=filters.diagnostic,
|
|
269
|
+
provider_filters=filters.provider,
|
|
270
|
+
facet_filters=filters.facets,
|
|
271
|
+
successful=successful,
|
|
272
|
+
dirty=dirty,
|
|
273
|
+
)
|
|
274
|
+
except Exception as e: # pragma: no cover
|
|
275
|
+
logger.error(f"Error applying filters: {e}")
|
|
276
|
+
raise typer.Exit(code=1)
|
|
277
|
+
|
|
278
|
+
# Check if any results found
|
|
279
|
+
if not all_filtered_results:
|
|
280
|
+
emit_no_results_warning(filters, session.query(ExecutionGroup).count())
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
# Convert to DataFrame for preview
|
|
284
|
+
results_df = pd.DataFrame(
|
|
285
|
+
[
|
|
286
|
+
{
|
|
287
|
+
"id": eg.id,
|
|
288
|
+
"key": eg.key,
|
|
289
|
+
"provider": eg.diagnostic.provider.slug,
|
|
290
|
+
"diagnostic": eg.diagnostic.slug,
|
|
291
|
+
"dirty": eg.dirty,
|
|
292
|
+
"successful": result.successful if result else None,
|
|
293
|
+
"created_at": eg.created_at,
|
|
294
|
+
"updated_at": eg.updated_at,
|
|
295
|
+
"selectors": json.dumps(eg.selectors),
|
|
296
|
+
}
|
|
297
|
+
for eg, result in all_filtered_results
|
|
298
|
+
]
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Display preview
|
|
302
|
+
console.print("Execution groups to be deleted:")
|
|
303
|
+
pretty_print_df(results_df, console=console)
|
|
304
|
+
|
|
305
|
+
count = len(all_filtered_results)
|
|
306
|
+
console.print(f"\nWill delete {count} execution group(s).")
|
|
307
|
+
|
|
308
|
+
# Confirm unless force is set
|
|
309
|
+
if not force:
|
|
310
|
+
if not typer.confirm("Do you want to proceed with deletion?"):
|
|
311
|
+
console.print("Deletion cancelled.")
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
# Remove output directories if requested
|
|
315
|
+
if remove_outputs:
|
|
316
|
+
config = ctx.obj.config
|
|
317
|
+
for eg, _ in all_filtered_results:
|
|
318
|
+
for execution in eg.executions:
|
|
319
|
+
output_dir = config.paths.results / execution.output_fragment
|
|
320
|
+
|
|
321
|
+
# Safety check
|
|
322
|
+
if not output_dir.is_relative_to(config.paths.results): # pragma: no cover
|
|
323
|
+
logger.error(f"Skipping unsafe path: {output_dir}")
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
if output_dir.exists():
|
|
327
|
+
try:
|
|
328
|
+
logger.warning(f"Removing output directory: {output_dir}")
|
|
329
|
+
shutil.rmtree(output_dir)
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logger.error(f"Failed to remove {output_dir}: {e}")
|
|
332
|
+
|
|
333
|
+
# Delete execution groups and all related records
|
|
334
|
+
# TODO: Add cascade delete to FK relationships and simplify this code
|
|
335
|
+
with session.begin_nested() if session.in_transaction() else session.begin():
|
|
336
|
+
for eg, _ in all_filtered_results:
|
|
337
|
+
for execution in eg.executions:
|
|
338
|
+
# Delete MetricValues first
|
|
339
|
+
for metric_value in execution.values:
|
|
340
|
+
session.delete(metric_value)
|
|
341
|
+
|
|
342
|
+
# Delete ExecutionOutputs
|
|
343
|
+
for output in execution.outputs:
|
|
344
|
+
session.delete(output)
|
|
345
|
+
|
|
346
|
+
# Delete many-to-many associations with datasets
|
|
347
|
+
session.execute(
|
|
348
|
+
execution_datasets.delete().where(execution_datasets.c.execution_id == execution.id)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Now delete the execution
|
|
352
|
+
session.delete(execution)
|
|
353
|
+
|
|
354
|
+
# Finally delete the execution group
|
|
355
|
+
session.delete(eg)
|
|
356
|
+
|
|
357
|
+
if remove_outputs:
|
|
358
|
+
console.print(f"[green]Successfully deleted {count} execution group(s) and their output directories.")
|
|
359
|
+
else:
|
|
360
|
+
console.print(f"[green]Successfully deleted {count} execution group(s).")
|
|
361
|
+
|
|
362
|
+
|
|
80
363
|
def walk_directory(directory: pathlib.Path, tree: Tree) -> None:
|
|
81
364
|
"""Recursively build a Tree with directory contents."""
|
|
82
365
|
# Sort dirs first then by filename
|
|
@@ -182,6 +465,29 @@ def _log_panel(result_directory: pathlib.Path) -> Panel | None:
|
|
|
182
465
|
)
|
|
183
466
|
|
|
184
467
|
|
|
468
|
+
def emit_no_results_warning(
|
|
469
|
+
filters: ListGroupsFilterOptions,
|
|
470
|
+
total_count: int,
|
|
471
|
+
) -> None:
|
|
472
|
+
"""
|
|
473
|
+
Emit informative warning when filters produce no results.
|
|
474
|
+
"""
|
|
475
|
+
filter_parts = []
|
|
476
|
+
if filters.diagnostic:
|
|
477
|
+
filter_parts.append(f"diagnostic filters: {filters.diagnostic}")
|
|
478
|
+
if filters.provider:
|
|
479
|
+
filter_parts.append(f"provider filters: {filters.provider}")
|
|
480
|
+
if filters.facets:
|
|
481
|
+
facet_strs = [f"{k}={v}" for k, v in filters.facets.items()]
|
|
482
|
+
filter_parts.append(f"facet filters: {facet_strs}")
|
|
483
|
+
|
|
484
|
+
logger.warning(
|
|
485
|
+
f"No execution groups match the specified filters. "
|
|
486
|
+
f"Total execution groups in database: {total_count}. "
|
|
487
|
+
f"Applied filters: {', '.join(filter_parts)}"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
|
|
185
491
|
@app.command()
|
|
186
492
|
def inspect(ctx: typer.Context, execution_id: int) -> None:
|
|
187
493
|
"""
|
|
@@ -191,6 +497,8 @@ def inspect(ctx: typer.Context, execution_id: int) -> None:
|
|
|
191
497
|
"""
|
|
192
498
|
config: Config = ctx.obj.config
|
|
193
499
|
session = ctx.obj.database.session
|
|
500
|
+
console = ctx.obj.console
|
|
501
|
+
|
|
194
502
|
execution_group = session.get(ExecutionGroup, execution_id)
|
|
195
503
|
|
|
196
504
|
if not execution_group:
|
|
@@ -217,6 +525,7 @@ def flag_dirty(ctx: typer.Context, execution_id: int) -> None:
|
|
|
217
525
|
Flag an execution group for recomputation
|
|
218
526
|
"""
|
|
219
527
|
session = ctx.obj.database.session
|
|
528
|
+
console = ctx.obj.console
|
|
220
529
|
with session.begin():
|
|
221
530
|
execution_group = session.get(ExecutionGroup, execution_id)
|
|
222
531
|
|