climate-ref 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +12 -3
- climate_ref/cli/_utils.py +56 -2
- climate_ref/cli/datasets.py +48 -9
- climate_ref/cli/executions.py +333 -24
- climate_ref/cli/providers.py +1 -2
- climate_ref/config.py +4 -4
- climate_ref/database.py +62 -4
- climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- climate_ref/dataset_registry/sample_data.txt +10 -19
- climate_ref/datasets/__init__.py +3 -3
- climate_ref/datasets/base.py +121 -20
- climate_ref/datasets/cmip6.py +2 -0
- climate_ref/datasets/obs4mips.py +26 -15
- climate_ref/executor/result_handling.py +4 -1
- climate_ref/migrations/env.py +12 -10
- climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- climate_ref/models/__init__.py +1 -6
- climate_ref/models/base.py +4 -20
- climate_ref/models/dataset.py +2 -0
- climate_ref/models/diagnostic.py +2 -1
- climate_ref/models/execution.py +219 -7
- climate_ref/models/metric_value.py +25 -110
- climate_ref/models/mixins.py +144 -0
- climate_ref/models/provider.py +2 -1
- climate_ref/provider_registry.py +4 -4
- climate_ref/slurm.py +2 -2
- climate_ref/testing.py +1 -1
- {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/METADATA +1 -1
- climate_ref-0.7.0.dist-info/RECORD +58 -0
- climate_ref-0.6.6.dist-info/RECORD +0 -55
- {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/WHEEL +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/licenses/NOTICE +0 -0
climate_ref/cli/__init__.py
CHANGED
|
@@ -3,11 +3,12 @@
|
|
|
3
3
|
import importlib
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Annotated
|
|
6
|
+
from typing import Annotated
|
|
7
7
|
|
|
8
8
|
import typer
|
|
9
9
|
from attrs import define
|
|
10
10
|
from loguru import logger
|
|
11
|
+
from rich.console import Console
|
|
11
12
|
|
|
12
13
|
from climate_ref import __version__
|
|
13
14
|
from climate_ref.cli import config, datasets, executions, providers, solve
|
|
@@ -37,6 +38,7 @@ class CLIContext:
|
|
|
37
38
|
|
|
38
39
|
config: Config
|
|
39
40
|
database: Database
|
|
41
|
+
console: Console
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def _version_callback(value: bool) -> None:
|
|
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
|
|
|
46
48
|
raise typer.Exit()
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
def _create_console() -> Console:
|
|
52
|
+
# Hook for testing to disable color output
|
|
53
|
+
|
|
54
|
+
# Rich respects the NO_COLOR environment variabl
|
|
55
|
+
return Console()
|
|
56
|
+
|
|
57
|
+
|
|
49
58
|
def _load_config(configuration_directory: Path | None = None) -> Config:
|
|
50
59
|
"""
|
|
51
60
|
Load the configuration from the specified directory
|
|
@@ -129,7 +138,7 @@ def main( # noqa: PLR0913
|
|
|
129
138
|
typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
|
|
130
139
|
] = LogLevel.Info,
|
|
131
140
|
version: Annotated[
|
|
132
|
-
|
|
141
|
+
bool | None,
|
|
133
142
|
typer.Option(
|
|
134
143
|
"--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
|
|
135
144
|
),
|
|
@@ -155,7 +164,7 @@ def main( # noqa: PLR0913
|
|
|
155
164
|
|
|
156
165
|
logger.debug(f"Configuration loaded from: {config._config_file!s}")
|
|
157
166
|
|
|
158
|
-
ctx.obj = CLIContext(config=config, database=Database.from_config(config))
|
|
167
|
+
ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
|
|
159
168
|
|
|
160
169
|
|
|
161
170
|
if __name__ == "__main__":
|
climate_ref/cli/_utils.py
CHANGED
|
@@ -5,6 +5,59 @@ from rich.console import Console
|
|
|
5
5
|
from rich.table import Table
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
|
|
9
|
+
"""
|
|
10
|
+
Parse facet filters from key=value format into a dictionary.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
filters
|
|
15
|
+
List of filter strings in 'key=value' format
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict[str, str]
|
|
20
|
+
Dictionary mapping facet keys to values
|
|
21
|
+
|
|
22
|
+
Raises
|
|
23
|
+
------
|
|
24
|
+
ValueError
|
|
25
|
+
If a filter string is not in valid 'key=value' format
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
|
|
30
|
+
{'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
|
|
31
|
+
"""
|
|
32
|
+
if not filters:
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
parsed: dict[str, str] = {}
|
|
36
|
+
for filter_str in filters:
|
|
37
|
+
if "=" not in filter_str:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Invalid filter format: '{filter_str}'. "
|
|
40
|
+
f"Expected format: 'key=value' or 'dataset_type.key=value' "
|
|
41
|
+
f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
key, value = filter_str.split("=", 1)
|
|
45
|
+
key = key.strip()
|
|
46
|
+
value = value.strip()
|
|
47
|
+
|
|
48
|
+
if not key:
|
|
49
|
+
raise ValueError(f"Empty key in filter: '{filter_str}'")
|
|
50
|
+
if not value:
|
|
51
|
+
raise ValueError(f"Empty value in filter: '{filter_str}'")
|
|
52
|
+
|
|
53
|
+
if key in parsed:
|
|
54
|
+
logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
|
|
55
|
+
|
|
56
|
+
parsed[key] = value
|
|
57
|
+
|
|
58
|
+
return parsed
|
|
59
|
+
|
|
60
|
+
|
|
8
61
|
def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
9
62
|
"""
|
|
10
63
|
Convert a DataFrame to a rich Table instance
|
|
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
|
32
85
|
|
|
33
86
|
table = Table(*[str(column) for column in df.columns])
|
|
34
87
|
|
|
35
|
-
for
|
|
88
|
+
for value_list in df.values.tolist():
|
|
36
89
|
row = [str(x) for x in value_list]
|
|
37
90
|
table.add_row(*row)
|
|
38
91
|
|
|
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
|
|
|
59
112
|
# Drop duplicates as they are not informative to CLI users.
|
|
60
113
|
df = df.drop_duplicates()
|
|
61
114
|
|
|
62
|
-
if console is None:
|
|
115
|
+
if console is None: # pragma: no branch
|
|
116
|
+
logger.debug("Creating new console for pretty printing")
|
|
63
117
|
console = Console()
|
|
64
118
|
|
|
65
119
|
max_col_count = console.width // 10
|
climate_ref/cli/datasets.py
CHANGED
|
@@ -15,9 +15,9 @@ from typing import Annotated
|
|
|
15
15
|
|
|
16
16
|
import typer
|
|
17
17
|
from loguru import logger
|
|
18
|
-
from rich.console import Console
|
|
19
18
|
|
|
20
19
|
from climate_ref.cli._utils import pretty_print_df
|
|
20
|
+
from climate_ref.database import ModelState
|
|
21
21
|
from climate_ref.datasets import get_dataset_adapter
|
|
22
22
|
from climate_ref.models import Dataset
|
|
23
23
|
from climate_ref.provider_registry import ProviderRegistry
|
|
@@ -27,7 +27,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
|
|
|
27
27
|
from climate_ref_core.datasets import SourceDatasetType
|
|
28
28
|
|
|
29
29
|
app = typer.Typer(help=__doc__)
|
|
30
|
-
console = Console()
|
|
31
30
|
|
|
32
31
|
|
|
33
32
|
@app.command(name="list")
|
|
@@ -70,7 +69,7 @@ def list_(
|
|
|
70
69
|
raise typer.Exit(code=1)
|
|
71
70
|
data_catalog = data_catalog[column].sort_values(by=column)
|
|
72
71
|
|
|
73
|
-
pretty_print_df(data_catalog, console=console)
|
|
72
|
+
pretty_print_df(data_catalog, console=ctx.obj.console)
|
|
74
73
|
|
|
75
74
|
|
|
76
75
|
@app.command()
|
|
@@ -97,7 +96,7 @@ def list_columns(
|
|
|
97
96
|
|
|
98
97
|
|
|
99
98
|
@app.command()
|
|
100
|
-
def ingest( # noqa
|
|
99
|
+
def ingest( # noqa
|
|
101
100
|
ctx: typer.Context,
|
|
102
101
|
file_or_directory: list[Path],
|
|
103
102
|
source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
|
|
@@ -106,7 +105,7 @@ def ingest( # noqa: PLR0913
|
|
|
106
105
|
n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
|
|
107
106
|
skip_invalid: Annotated[
|
|
108
107
|
bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
|
|
109
|
-
] =
|
|
108
|
+
] = True,
|
|
110
109
|
) -> None:
|
|
111
110
|
"""
|
|
112
111
|
Ingest a directory of datasets into the database
|
|
@@ -118,6 +117,7 @@ def ingest( # noqa: PLR0913
|
|
|
118
117
|
"""
|
|
119
118
|
config = ctx.obj.config
|
|
120
119
|
db = ctx.obj.database
|
|
120
|
+
console = ctx.obj.console
|
|
121
121
|
|
|
122
122
|
kwargs = {}
|
|
123
123
|
|
|
@@ -135,14 +135,33 @@ def ingest( # noqa: PLR0913
|
|
|
135
135
|
logger.error(f"File or directory {_dir} does not exist")
|
|
136
136
|
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
# TODO: This assumes that all datasets are nc files.
|
|
139
|
+
# THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
|
|
140
|
+
if not _dir.rglob("*.nc"):
|
|
141
|
+
logger.error(f"No .nc files found in {_dir}")
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
data_catalog = adapter.find_local_datasets(_dir)
|
|
146
|
+
data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.error(f"Error ingesting datasets from {_dir}: {e}")
|
|
149
|
+
continue
|
|
140
150
|
|
|
141
151
|
logger.info(
|
|
142
152
|
f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
|
|
143
153
|
)
|
|
144
154
|
pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
|
|
145
155
|
|
|
156
|
+
# track stats for a given directory
|
|
157
|
+
num_created_datasets = 0
|
|
158
|
+
num_updated_datasets = 0
|
|
159
|
+
num_unchanged_datasets = 0
|
|
160
|
+
num_created_files = 0
|
|
161
|
+
num_updated_files = 0
|
|
162
|
+
num_removed_files = 0
|
|
163
|
+
num_unchanged_files = 0
|
|
164
|
+
|
|
146
165
|
for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
|
|
147
166
|
logger.debug(f"Processing dataset {instance_id}")
|
|
148
167
|
with db.session.begin():
|
|
@@ -154,9 +173,29 @@ def ingest( # noqa: PLR0913
|
|
|
154
173
|
)
|
|
155
174
|
if not dataset:
|
|
156
175
|
logger.info(f"Would save dataset {instance_id} to the database")
|
|
157
|
-
continue
|
|
158
176
|
else:
|
|
159
|
-
adapter.register_dataset(config, db, data_catalog_dataset)
|
|
177
|
+
results = adapter.register_dataset(config, db, data_catalog_dataset)
|
|
178
|
+
|
|
179
|
+
if results.dataset_state == ModelState.CREATED:
|
|
180
|
+
num_created_datasets += 1
|
|
181
|
+
elif results.dataset_state == ModelState.UPDATED:
|
|
182
|
+
num_updated_datasets += 1
|
|
183
|
+
else:
|
|
184
|
+
num_unchanged_datasets += 1
|
|
185
|
+
num_created_files += len(results.files_added)
|
|
186
|
+
num_updated_files += len(results.files_updated)
|
|
187
|
+
num_removed_files += len(results.files_removed)
|
|
188
|
+
num_unchanged_files += len(results.files_unchanged)
|
|
189
|
+
|
|
190
|
+
if not dry_run:
|
|
191
|
+
ingestion_msg = (
|
|
192
|
+
f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
|
|
193
|
+
" (created/updated/unchanged), "
|
|
194
|
+
f"Files: "
|
|
195
|
+
f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
|
|
196
|
+
" (created/updated/removed/unchanged)"
|
|
197
|
+
)
|
|
198
|
+
logger.info(ingestion_msg)
|
|
160
199
|
|
|
161
200
|
if solve:
|
|
162
201
|
solve_required_executions(
|
climate_ref/cli/executions.py
CHANGED
|
@@ -2,38 +2,90 @@
|
|
|
2
2
|
View execution groups and their results
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import json
|
|
5
6
|
import pathlib
|
|
7
|
+
import shutil
|
|
8
|
+
from dataclasses import dataclass
|
|
6
9
|
from typing import Annotated
|
|
7
10
|
from urllib.parse import quote
|
|
8
11
|
|
|
9
12
|
import pandas as pd
|
|
10
13
|
import typer
|
|
11
14
|
from loguru import logger
|
|
12
|
-
from rich.console import
|
|
15
|
+
from rich.console import Group
|
|
13
16
|
from rich.filesize import decimal
|
|
14
17
|
from rich.markup import escape
|
|
15
18
|
from rich.panel import Panel
|
|
16
19
|
from rich.text import Text
|
|
17
20
|
from rich.tree import Tree
|
|
18
21
|
|
|
19
|
-
from climate_ref.cli._utils import df_to_table, pretty_print_df
|
|
22
|
+
from climate_ref.cli._utils import df_to_table, parse_facet_filters, pretty_print_df
|
|
20
23
|
from climate_ref.config import Config
|
|
21
24
|
from climate_ref.models import Execution, ExecutionGroup
|
|
22
|
-
from climate_ref.models.execution import
|
|
25
|
+
from climate_ref.models.execution import execution_datasets, get_execution_group_and_latest_filtered
|
|
23
26
|
from climate_ref_core.logging import EXECUTION_LOG_FILENAME
|
|
24
27
|
|
|
25
28
|
app = typer.Typer(help=__doc__)
|
|
26
|
-
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class ListGroupsFilterOptions:
|
|
33
|
+
"""Options to filter execution groups in list-groups command"""
|
|
34
|
+
|
|
35
|
+
diagnostic: list[str] | None = None
|
|
36
|
+
"""Filter by diagnostic slug (substring, case-insensitive)"""
|
|
37
|
+
|
|
38
|
+
provider: list[str] | None = None
|
|
39
|
+
"""Filter by provider slug (substring, case-insensitive)"""
|
|
40
|
+
|
|
41
|
+
facets: dict[str, str] | None = None
|
|
42
|
+
"""Filter by facet key-value pairs (exact match)"""
|
|
27
43
|
|
|
28
44
|
|
|
29
45
|
@app.command()
|
|
30
|
-
def list_groups(
|
|
46
|
+
def list_groups( # noqa: PLR0913
|
|
31
47
|
ctx: typer.Context,
|
|
32
48
|
column: Annotated[
|
|
33
49
|
list[str] | None,
|
|
34
50
|
typer.Option(help="Only include specified columns in the output"),
|
|
35
51
|
] = None,
|
|
36
52
|
limit: int = typer.Option(100, help="Limit the number of rows to display"),
|
|
53
|
+
diagnostic: Annotated[
|
|
54
|
+
list[str] | None,
|
|
55
|
+
typer.Option(
|
|
56
|
+
help="Filter by diagnostic slug (substring match, case-insensitive)."
|
|
57
|
+
"Multiple values can be provided."
|
|
58
|
+
),
|
|
59
|
+
] = None,
|
|
60
|
+
provider: Annotated[
|
|
61
|
+
list[str] | None,
|
|
62
|
+
typer.Option(
|
|
63
|
+
help="Filter by provider slug (substring match, case-insensitive)."
|
|
64
|
+
"Multiple values can be provided."
|
|
65
|
+
),
|
|
66
|
+
] = None,
|
|
67
|
+
filter: Annotated[ # noqa: A002
|
|
68
|
+
list[str] | None,
|
|
69
|
+
typer.Option(
|
|
70
|
+
"--filter",
|
|
71
|
+
help="Filter by facet key=value pairs (exact match). Multiple filters can be provided.",
|
|
72
|
+
),
|
|
73
|
+
] = None,
|
|
74
|
+
successful: Annotated[
|
|
75
|
+
bool | None,
|
|
76
|
+
typer.Option(
|
|
77
|
+
"--successful/--not-successful",
|
|
78
|
+
help="Filter by successful or unsuccessful executions.",
|
|
79
|
+
),
|
|
80
|
+
] = None,
|
|
81
|
+
dirty: Annotated[
|
|
82
|
+
bool | None,
|
|
83
|
+
typer.Option(
|
|
84
|
+
"--dirty/--not-dirty",
|
|
85
|
+
help="Filter to include only dirty or clean execution groups."
|
|
86
|
+
"These execution groups will be re-computed on the next run.",
|
|
87
|
+
),
|
|
88
|
+
] = None,
|
|
37
89
|
) -> None:
|
|
38
90
|
"""
|
|
39
91
|
List the diagnostic execution groups that have been identified
|
|
@@ -41,42 +93,273 @@ def list_groups(
|
|
|
41
93
|
The data catalog is sorted by the date that the execution group was created (first = newest).
|
|
42
94
|
If the `--column` option is provided, only the specified columns will be displayed.
|
|
43
95
|
|
|
96
|
+
Filters can be combined using AND logic across filter types and OR logic within a filter type.
|
|
97
|
+
|
|
44
98
|
The output will be in a tabular format.
|
|
45
99
|
"""
|
|
46
100
|
session = ctx.obj.database.session
|
|
101
|
+
console = ctx.obj.console
|
|
47
102
|
|
|
48
|
-
|
|
49
|
-
|
|
103
|
+
# Parse facet filters
|
|
104
|
+
try:
|
|
105
|
+
facet_filters = parse_facet_filters(filter)
|
|
106
|
+
except ValueError as e:
|
|
107
|
+
logger.error(str(e))
|
|
108
|
+
raise typer.Exit(code=1)
|
|
50
109
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
"provider": execution_groups.diagnostic.provider.slug,
|
|
57
|
-
"diagnostic": execution_groups.diagnostic.slug,
|
|
58
|
-
"dirty": execution_groups.dirty,
|
|
59
|
-
"successful": result.successful if result else None,
|
|
60
|
-
"created_at": execution_groups.created_at,
|
|
61
|
-
"updated_at": execution_groups.updated_at,
|
|
62
|
-
}
|
|
63
|
-
for execution_groups, result in execution_groups_results
|
|
64
|
-
]
|
|
110
|
+
# Build filter options
|
|
111
|
+
filters = ListGroupsFilterOptions(
|
|
112
|
+
diagnostic=diagnostic,
|
|
113
|
+
provider=provider,
|
|
114
|
+
facets=facet_filters if facet_filters else None,
|
|
65
115
|
)
|
|
116
|
+
logger.debug(f"Applying filters: {filters}")
|
|
117
|
+
|
|
118
|
+
# Get total count before filtering for warning messages
|
|
119
|
+
total_count = session.query(ExecutionGroup).count()
|
|
120
|
+
|
|
121
|
+
# Apply filters to query
|
|
122
|
+
try:
|
|
123
|
+
all_filtered_results = get_execution_group_and_latest_filtered(
|
|
124
|
+
session,
|
|
125
|
+
diagnostic_filters=filters.diagnostic,
|
|
126
|
+
provider_filters=filters.provider,
|
|
127
|
+
facet_filters=filters.facets,
|
|
128
|
+
successful=successful,
|
|
129
|
+
dirty=dirty,
|
|
130
|
+
)
|
|
131
|
+
execution_groups_results = all_filtered_results[:limit]
|
|
132
|
+
except Exception as e: # pragma: no cover
|
|
133
|
+
logger.error(f"Error applying filters: {e}")
|
|
134
|
+
raise typer.Exit(code=1)
|
|
135
|
+
|
|
136
|
+
# Check if any results found
|
|
137
|
+
if not execution_groups_results:
|
|
138
|
+
emit_no_results_warning(filters, total_count)
|
|
139
|
+
results_df = pd.DataFrame(
|
|
140
|
+
columns=[
|
|
141
|
+
"id",
|
|
142
|
+
"key",
|
|
143
|
+
"provider",
|
|
144
|
+
"diagnostic",
|
|
145
|
+
"dirty",
|
|
146
|
+
"successful",
|
|
147
|
+
"created_at",
|
|
148
|
+
"updated_at",
|
|
149
|
+
"selectors",
|
|
150
|
+
]
|
|
151
|
+
)
|
|
152
|
+
else:
|
|
153
|
+
results_df = pd.DataFrame(
|
|
154
|
+
[
|
|
155
|
+
{
|
|
156
|
+
"id": eg.id,
|
|
157
|
+
"key": eg.key,
|
|
158
|
+
"provider": eg.diagnostic.provider.slug,
|
|
159
|
+
"diagnostic": eg.diagnostic.slug,
|
|
160
|
+
"dirty": eg.dirty,
|
|
161
|
+
"successful": result.successful if result else None,
|
|
162
|
+
"created_at": eg.created_at,
|
|
163
|
+
"updated_at": eg.updated_at,
|
|
164
|
+
"selectors": json.dumps(eg.selectors),
|
|
165
|
+
}
|
|
166
|
+
for eg, result in execution_groups_results
|
|
167
|
+
]
|
|
168
|
+
)
|
|
66
169
|
|
|
67
|
-
|
|
170
|
+
# Apply column filtering
|
|
171
|
+
if column and not results_df.empty: # Only apply if df is not empty
|
|
68
172
|
if not all(col in results_df.columns for col in column):
|
|
69
173
|
logger.error(f"Column not found in data catalog: {column}")
|
|
70
174
|
raise typer.Exit(code=1)
|
|
71
175
|
results_df = results_df[column]
|
|
72
176
|
|
|
177
|
+
# Display results
|
|
73
178
|
pretty_print_df(results_df, console=console)
|
|
74
|
-
|
|
179
|
+
|
|
180
|
+
# Show limit warning if applicable
|
|
181
|
+
filtered_count = len(all_filtered_results)
|
|
182
|
+
if filtered_count > limit:
|
|
75
183
|
logger.warning(
|
|
76
|
-
f"Displaying {limit} of {
|
|
184
|
+
f"Displaying {limit} of {filtered_count} filtered results. "
|
|
185
|
+
f"Use the `--limit` option to display more."
|
|
77
186
|
)
|
|
78
187
|
|
|
79
188
|
|
|
189
|
+
@app.command()
|
|
190
|
+
def delete_groups( # noqa: PLR0912, PLR0913
|
|
191
|
+
ctx: typer.Context,
|
|
192
|
+
diagnostic: Annotated[
|
|
193
|
+
list[str] | None,
|
|
194
|
+
typer.Option(
|
|
195
|
+
help="Filter by diagnostic slug (substring match, case-insensitive)."
|
|
196
|
+
"Multiple values can be provided."
|
|
197
|
+
),
|
|
198
|
+
] = None,
|
|
199
|
+
provider: Annotated[
|
|
200
|
+
list[str] | None,
|
|
201
|
+
typer.Option(
|
|
202
|
+
help="Filter by provider slug (substring match, case-insensitive)."
|
|
203
|
+
"Multiple values can be provided."
|
|
204
|
+
),
|
|
205
|
+
] = None,
|
|
206
|
+
filter: Annotated[ # noqa: A002
|
|
207
|
+
list[str] | None,
|
|
208
|
+
typer.Option(
|
|
209
|
+
"--filter",
|
|
210
|
+
help="Filter by facet key=value pairs (exact match). Multiple filters can be provided.",
|
|
211
|
+
),
|
|
212
|
+
] = None,
|
|
213
|
+
successful: Annotated[
|
|
214
|
+
bool | None,
|
|
215
|
+
typer.Option(
|
|
216
|
+
"--successful/--not-successful",
|
|
217
|
+
help="Filter by successful or unsuccessful executions.",
|
|
218
|
+
),
|
|
219
|
+
] = None,
|
|
220
|
+
dirty: Annotated[
|
|
221
|
+
bool | None,
|
|
222
|
+
typer.Option(
|
|
223
|
+
"--dirty/--not-dirty",
|
|
224
|
+
help="Filter to include only dirty or clean execution groups."
|
|
225
|
+
"These execution groups will be re-computed on the next run.",
|
|
226
|
+
),
|
|
227
|
+
] = None,
|
|
228
|
+
remove_outputs: bool = typer.Option(
|
|
229
|
+
False, "--remove-outputs", help="Also remove output directories from the filesystem"
|
|
230
|
+
),
|
|
231
|
+
force: bool = typer.Option(False, help="Skip confirmation prompt"),
|
|
232
|
+
) -> None:
|
|
233
|
+
"""
|
|
234
|
+
Delete execution groups matching the specified filters.
|
|
235
|
+
|
|
236
|
+
This command will delete execution groups and their associated executions.
|
|
237
|
+
Use filters to specify which groups to delete. At least one filter must be provided
|
|
238
|
+
to prevent accidental deletion of all groups.
|
|
239
|
+
|
|
240
|
+
Filters can be combined using AND logic across filter types and OR logic within a filter type.
|
|
241
|
+
"""
|
|
242
|
+
session = ctx.obj.database.session
|
|
243
|
+
console = ctx.obj.console
|
|
244
|
+
|
|
245
|
+
# Parse facet filters
|
|
246
|
+
try:
|
|
247
|
+
facet_filters = parse_facet_filters(filter)
|
|
248
|
+
except ValueError as e:
|
|
249
|
+
logger.error(str(e))
|
|
250
|
+
raise typer.Exit(code=1)
|
|
251
|
+
|
|
252
|
+
if not any([diagnostic, provider, facet_filters, successful is not None, dirty is not None]):
|
|
253
|
+
logger.warning("THIS WILL DELETE ALL EXECUTION GROUPS IN THE DATABASE")
|
|
254
|
+
raise typer.Exit(code=1)
|
|
255
|
+
|
|
256
|
+
# Build filter options
|
|
257
|
+
filters = ListGroupsFilterOptions(
|
|
258
|
+
diagnostic=diagnostic,
|
|
259
|
+
provider=provider,
|
|
260
|
+
facets=facet_filters if facet_filters else None,
|
|
261
|
+
)
|
|
262
|
+
logger.debug(f"Applying filters: {filters}")
|
|
263
|
+
|
|
264
|
+
# Apply filters to query
|
|
265
|
+
try:
|
|
266
|
+
all_filtered_results = get_execution_group_and_latest_filtered(
|
|
267
|
+
session,
|
|
268
|
+
diagnostic_filters=filters.diagnostic,
|
|
269
|
+
provider_filters=filters.provider,
|
|
270
|
+
facet_filters=filters.facets,
|
|
271
|
+
successful=successful,
|
|
272
|
+
dirty=dirty,
|
|
273
|
+
)
|
|
274
|
+
except Exception as e: # pragma: no cover
|
|
275
|
+
logger.error(f"Error applying filters: {e}")
|
|
276
|
+
raise typer.Exit(code=1)
|
|
277
|
+
|
|
278
|
+
# Check if any results found
|
|
279
|
+
if not all_filtered_results:
|
|
280
|
+
emit_no_results_warning(filters, session.query(ExecutionGroup).count())
|
|
281
|
+
return
|
|
282
|
+
|
|
283
|
+
# Convert to DataFrame for preview
|
|
284
|
+
results_df = pd.DataFrame(
|
|
285
|
+
[
|
|
286
|
+
{
|
|
287
|
+
"id": eg.id,
|
|
288
|
+
"key": eg.key,
|
|
289
|
+
"provider": eg.diagnostic.provider.slug,
|
|
290
|
+
"diagnostic": eg.diagnostic.slug,
|
|
291
|
+
"dirty": eg.dirty,
|
|
292
|
+
"successful": result.successful if result else None,
|
|
293
|
+
"created_at": eg.created_at,
|
|
294
|
+
"updated_at": eg.updated_at,
|
|
295
|
+
"selectors": json.dumps(eg.selectors),
|
|
296
|
+
}
|
|
297
|
+
for eg, result in all_filtered_results
|
|
298
|
+
]
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Display preview
|
|
302
|
+
console.print("Execution groups to be deleted:")
|
|
303
|
+
pretty_print_df(results_df, console=console)
|
|
304
|
+
|
|
305
|
+
count = len(all_filtered_results)
|
|
306
|
+
console.print(f"\nWill delete {count} execution group(s).")
|
|
307
|
+
|
|
308
|
+
# Confirm unless force is set
|
|
309
|
+
if not force:
|
|
310
|
+
if not typer.confirm("Do you want to proceed with deletion?"):
|
|
311
|
+
console.print("Deletion cancelled.")
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
# Remove output directories if requested
|
|
315
|
+
if remove_outputs:
|
|
316
|
+
config = ctx.obj.config
|
|
317
|
+
for eg, _ in all_filtered_results:
|
|
318
|
+
for execution in eg.executions:
|
|
319
|
+
output_dir = config.paths.results / execution.output_fragment
|
|
320
|
+
|
|
321
|
+
# Safety check
|
|
322
|
+
if not output_dir.is_relative_to(config.paths.results): # pragma: no cover
|
|
323
|
+
logger.error(f"Skipping unsafe path: {output_dir}")
|
|
324
|
+
continue
|
|
325
|
+
|
|
326
|
+
if output_dir.exists():
|
|
327
|
+
try:
|
|
328
|
+
logger.warning(f"Removing output directory: {output_dir}")
|
|
329
|
+
shutil.rmtree(output_dir)
|
|
330
|
+
except Exception as e:
|
|
331
|
+
logger.error(f"Failed to remove {output_dir}: {e}")
|
|
332
|
+
|
|
333
|
+
# Delete execution groups and all related records
|
|
334
|
+
# TODO: Add cascade delete to FK relationships and simplify this code
|
|
335
|
+
with session.begin_nested() if session.in_transaction() else session.begin():
|
|
336
|
+
for eg, _ in all_filtered_results:
|
|
337
|
+
for execution in eg.executions:
|
|
338
|
+
# Delete MetricValues first
|
|
339
|
+
for metric_value in execution.values:
|
|
340
|
+
session.delete(metric_value)
|
|
341
|
+
|
|
342
|
+
# Delete ExecutionOutputs
|
|
343
|
+
for output in execution.outputs:
|
|
344
|
+
session.delete(output)
|
|
345
|
+
|
|
346
|
+
# Delete many-to-many associations with datasets
|
|
347
|
+
session.execute(
|
|
348
|
+
execution_datasets.delete().where(execution_datasets.c.execution_id == execution.id)
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
# Now delete the execution
|
|
352
|
+
session.delete(execution)
|
|
353
|
+
|
|
354
|
+
# Finally delete the execution group
|
|
355
|
+
session.delete(eg)
|
|
356
|
+
|
|
357
|
+
if remove_outputs:
|
|
358
|
+
console.print(f"[green]Successfully deleted {count} execution group(s) and their output directories.")
|
|
359
|
+
else:
|
|
360
|
+
console.print(f"[green]Successfully deleted {count} execution group(s).")
|
|
361
|
+
|
|
362
|
+
|
|
80
363
|
def walk_directory(directory: pathlib.Path, tree: Tree) -> None:
|
|
81
364
|
"""Recursively build a Tree with directory contents."""
|
|
82
365
|
# Sort dirs first then by filename
|
|
@@ -182,6 +465,29 @@ def _log_panel(result_directory: pathlib.Path) -> Panel | None:
|
|
|
182
465
|
)
|
|
183
466
|
|
|
184
467
|
|
|
468
|
+
def emit_no_results_warning(
|
|
469
|
+
filters: ListGroupsFilterOptions,
|
|
470
|
+
total_count: int,
|
|
471
|
+
) -> None:
|
|
472
|
+
"""
|
|
473
|
+
Emit informative warning when filters produce no results.
|
|
474
|
+
"""
|
|
475
|
+
filter_parts = []
|
|
476
|
+
if filters.diagnostic:
|
|
477
|
+
filter_parts.append(f"diagnostic filters: {filters.diagnostic}")
|
|
478
|
+
if filters.provider:
|
|
479
|
+
filter_parts.append(f"provider filters: {filters.provider}")
|
|
480
|
+
if filters.facets:
|
|
481
|
+
facet_strs = [f"{k}={v}" for k, v in filters.facets.items()]
|
|
482
|
+
filter_parts.append(f"facet filters: {facet_strs}")
|
|
483
|
+
|
|
484
|
+
logger.warning(
|
|
485
|
+
f"No execution groups match the specified filters. "
|
|
486
|
+
f"Total execution groups in database: {total_count}. "
|
|
487
|
+
f"Applied filters: {', '.join(filter_parts)}"
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
|
|
185
491
|
@app.command()
|
|
186
492
|
def inspect(ctx: typer.Context, execution_id: int) -> None:
|
|
187
493
|
"""
|
|
@@ -191,6 +497,8 @@ def inspect(ctx: typer.Context, execution_id: int) -> None:
|
|
|
191
497
|
"""
|
|
192
498
|
config: Config = ctx.obj.config
|
|
193
499
|
session = ctx.obj.database.session
|
|
500
|
+
console = ctx.obj.console
|
|
501
|
+
|
|
194
502
|
execution_group = session.get(ExecutionGroup, execution_id)
|
|
195
503
|
|
|
196
504
|
if not execution_group:
|
|
@@ -217,6 +525,7 @@ def flag_dirty(ctx: typer.Context, execution_id: int) -> None:
|
|
|
217
525
|
Flag an execution group for recomputation
|
|
218
526
|
"""
|
|
219
527
|
session = ctx.obj.database.session
|
|
528
|
+
console = ctx.obj.console
|
|
220
529
|
with session.begin():
|
|
221
530
|
execution_group = session.get(ExecutionGroup, execution_id)
|
|
222
531
|
|