climate-ref 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. climate_ref/cli/__init__.py +12 -3
  2. climate_ref/cli/_utils.py +56 -2
  3. climate_ref/cli/datasets.py +48 -9
  4. climate_ref/cli/executions.py +333 -24
  5. climate_ref/cli/providers.py +1 -2
  6. climate_ref/config.py +4 -4
  7. climate_ref/database.py +62 -4
  8. climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
  9. climate_ref/dataset_registry/sample_data.txt +10 -19
  10. climate_ref/datasets/__init__.py +3 -3
  11. climate_ref/datasets/base.py +121 -20
  12. climate_ref/datasets/cmip6.py +2 -0
  13. climate_ref/datasets/obs4mips.py +26 -15
  14. climate_ref/executor/result_handling.py +4 -1
  15. climate_ref/migrations/env.py +12 -10
  16. climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
  17. climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
  18. climate_ref/models/__init__.py +1 -6
  19. climate_ref/models/base.py +4 -20
  20. climate_ref/models/dataset.py +2 -0
  21. climate_ref/models/diagnostic.py +2 -1
  22. climate_ref/models/execution.py +219 -7
  23. climate_ref/models/metric_value.py +25 -110
  24. climate_ref/models/mixins.py +144 -0
  25. climate_ref/models/provider.py +2 -1
  26. climate_ref/provider_registry.py +4 -4
  27. climate_ref/slurm.py +2 -2
  28. climate_ref/testing.py +1 -1
  29. {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/METADATA +1 -1
  30. climate_ref-0.7.0.dist-info/RECORD +58 -0
  31. climate_ref-0.6.6.dist-info/RECORD +0 -55
  32. {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/WHEEL +0 -0
  33. {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/entry_points.txt +0 -0
  34. {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/licenses/LICENCE +0 -0
  35. {climate_ref-0.6.6.dist-info → climate_ref-0.7.0.dist-info}/licenses/NOTICE +0 -0
@@ -3,11 +3,12 @@
3
3
  import importlib
4
4
  from enum import Enum
5
5
  from pathlib import Path
6
- from typing import Annotated, Optional
6
+ from typing import Annotated
7
7
 
8
8
  import typer
9
9
  from attrs import define
10
10
  from loguru import logger
11
+ from rich.console import Console
11
12
 
12
13
  from climate_ref import __version__
13
14
  from climate_ref.cli import config, datasets, executions, providers, solve
@@ -37,6 +38,7 @@ class CLIContext:
37
38
 
38
39
  config: Config
39
40
  database: Database
41
+ console: Console
40
42
 
41
43
 
42
44
  def _version_callback(value: bool) -> None:
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
46
48
  raise typer.Exit()
47
49
 
48
50
 
51
+ def _create_console() -> Console:
52
+ # Hook for testing to disable color output
53
+
54
+ # Rich respects the NO_COLOR environment variabl
55
+ return Console()
56
+
57
+
49
58
  def _load_config(configuration_directory: Path | None = None) -> Config:
50
59
  """
51
60
  Load the configuration from the specified directory
@@ -129,7 +138,7 @@ def main( # noqa: PLR0913
129
138
  typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
130
139
  ] = LogLevel.Info,
131
140
  version: Annotated[
132
- Optional[bool],
141
+ bool | None,
133
142
  typer.Option(
134
143
  "--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
135
144
  ),
@@ -155,7 +164,7 @@ def main( # noqa: PLR0913
155
164
 
156
165
  logger.debug(f"Configuration loaded from: {config._config_file!s}")
157
166
 
158
- ctx.obj = CLIContext(config=config, database=Database.from_config(config))
167
+ ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
159
168
 
160
169
 
161
170
  if __name__ == "__main__":
climate_ref/cli/_utils.py CHANGED
@@ -5,6 +5,59 @@ from rich.console import Console
5
5
  from rich.table import Table
6
6
 
7
7
 
8
+ def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
9
+ """
10
+ Parse facet filters from key=value format into a dictionary.
11
+
12
+ Parameters
13
+ ----------
14
+ filters
15
+ List of filter strings in 'key=value' format
16
+
17
+ Returns
18
+ -------
19
+ dict[str, str]
20
+ Dictionary mapping facet keys to values
21
+
22
+ Raises
23
+ ------
24
+ ValueError
25
+ If a filter string is not in valid 'key=value' format
26
+
27
+ Examples
28
+ --------
29
+ >>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
30
+ {'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
31
+ """
32
+ if not filters:
33
+ return {}
34
+
35
+ parsed: dict[str, str] = {}
36
+ for filter_str in filters:
37
+ if "=" not in filter_str:
38
+ raise ValueError(
39
+ f"Invalid filter format: '{filter_str}'. "
40
+ f"Expected format: 'key=value' or 'dataset_type.key=value' "
41
+ f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
42
+ )
43
+
44
+ key, value = filter_str.split("=", 1)
45
+ key = key.strip()
46
+ value = value.strip()
47
+
48
+ if not key:
49
+ raise ValueError(f"Empty key in filter: '{filter_str}'")
50
+ if not value:
51
+ raise ValueError(f"Empty value in filter: '{filter_str}'")
52
+
53
+ if key in parsed:
54
+ logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
55
+
56
+ parsed[key] = value
57
+
58
+ return parsed
59
+
60
+
8
61
  def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
9
62
  """
10
63
  Convert a DataFrame to a rich Table instance
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
32
85
 
33
86
  table = Table(*[str(column) for column in df.columns])
34
87
 
35
- for index, value_list in enumerate(df.values.tolist()):
88
+ for value_list in df.values.tolist():
36
89
  row = [str(x) for x in value_list]
37
90
  table.add_row(*row)
38
91
 
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
59
112
  # Drop duplicates as they are not informative to CLI users.
60
113
  df = df.drop_duplicates()
61
114
 
62
- if console is None:
115
+ if console is None: # pragma: no branch
116
+ logger.debug("Creating new console for pretty printing")
63
117
  console = Console()
64
118
 
65
119
  max_col_count = console.width // 10
@@ -15,9 +15,9 @@ from typing import Annotated
15
15
 
16
16
  import typer
17
17
  from loguru import logger
18
- from rich.console import Console
19
18
 
20
19
  from climate_ref.cli._utils import pretty_print_df
20
+ from climate_ref.database import ModelState
21
21
  from climate_ref.datasets import get_dataset_adapter
22
22
  from climate_ref.models import Dataset
23
23
  from climate_ref.provider_registry import ProviderRegistry
@@ -27,7 +27,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
27
27
  from climate_ref_core.datasets import SourceDatasetType
28
28
 
29
29
  app = typer.Typer(help=__doc__)
30
- console = Console()
31
30
 
32
31
 
33
32
  @app.command(name="list")
@@ -70,7 +69,7 @@ def list_(
70
69
  raise typer.Exit(code=1)
71
70
  data_catalog = data_catalog[column].sort_values(by=column)
72
71
 
73
- pretty_print_df(data_catalog, console=console)
72
+ pretty_print_df(data_catalog, console=ctx.obj.console)
74
73
 
75
74
 
76
75
  @app.command()
@@ -97,7 +96,7 @@ def list_columns(
97
96
 
98
97
 
99
98
  @app.command()
100
- def ingest( # noqa: PLR0913
99
+ def ingest( # noqa
101
100
  ctx: typer.Context,
102
101
  file_or_directory: list[Path],
103
102
  source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
@@ -106,7 +105,7 @@ def ingest( # noqa: PLR0913
106
105
  n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
107
106
  skip_invalid: Annotated[
108
107
  bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
109
- ] = False,
108
+ ] = True,
110
109
  ) -> None:
111
110
  """
112
111
  Ingest a directory of datasets into the database
@@ -118,6 +117,7 @@ def ingest( # noqa: PLR0913
118
117
  """
119
118
  config = ctx.obj.config
120
119
  db = ctx.obj.database
120
+ console = ctx.obj.console
121
121
 
122
122
  kwargs = {}
123
123
 
@@ -135,14 +135,33 @@ def ingest( # noqa: PLR0913
135
135
  logger.error(f"File or directory {_dir} does not exist")
136
136
  raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
137
137
 
138
- data_catalog = adapter.find_local_datasets(_dir)
139
- data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
138
+ # TODO: This assumes that all datasets are nc files.
139
+ # THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
140
+ if not _dir.rglob("*.nc"):
141
+ logger.error(f"No .nc files found in {_dir}")
142
+ continue
143
+
144
+ try:
145
+ data_catalog = adapter.find_local_datasets(_dir)
146
+ data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
147
+ except Exception as e:
148
+ logger.error(f"Error ingesting datasets from {_dir}: {e}")
149
+ continue
140
150
 
141
151
  logger.info(
142
152
  f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
143
153
  )
144
154
  pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
145
155
 
156
+ # track stats for a given directory
157
+ num_created_datasets = 0
158
+ num_updated_datasets = 0
159
+ num_unchanged_datasets = 0
160
+ num_created_files = 0
161
+ num_updated_files = 0
162
+ num_removed_files = 0
163
+ num_unchanged_files = 0
164
+
146
165
  for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
147
166
  logger.debug(f"Processing dataset {instance_id}")
148
167
  with db.session.begin():
@@ -154,9 +173,29 @@ def ingest( # noqa: PLR0913
154
173
  )
155
174
  if not dataset:
156
175
  logger.info(f"Would save dataset {instance_id} to the database")
157
- continue
158
176
  else:
159
- adapter.register_dataset(config, db, data_catalog_dataset)
177
+ results = adapter.register_dataset(config, db, data_catalog_dataset)
178
+
179
+ if results.dataset_state == ModelState.CREATED:
180
+ num_created_datasets += 1
181
+ elif results.dataset_state == ModelState.UPDATED:
182
+ num_updated_datasets += 1
183
+ else:
184
+ num_unchanged_datasets += 1
185
+ num_created_files += len(results.files_added)
186
+ num_updated_files += len(results.files_updated)
187
+ num_removed_files += len(results.files_removed)
188
+ num_unchanged_files += len(results.files_unchanged)
189
+
190
+ if not dry_run:
191
+ ingestion_msg = (
192
+ f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
193
+ " (created/updated/unchanged), "
194
+ f"Files: "
195
+ f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
196
+ " (created/updated/removed/unchanged)"
197
+ )
198
+ logger.info(ingestion_msg)
160
199
 
161
200
  if solve:
162
201
  solve_required_executions(
@@ -2,38 +2,90 @@
2
2
  View execution groups and their results
3
3
  """
4
4
 
5
+ import json
5
6
  import pathlib
7
+ import shutil
8
+ from dataclasses import dataclass
6
9
  from typing import Annotated
7
10
  from urllib.parse import quote
8
11
 
9
12
  import pandas as pd
10
13
  import typer
11
14
  from loguru import logger
12
- from rich.console import Console, Group
15
+ from rich.console import Group
13
16
  from rich.filesize import decimal
14
17
  from rich.markup import escape
15
18
  from rich.panel import Panel
16
19
  from rich.text import Text
17
20
  from rich.tree import Tree
18
21
 
19
- from climate_ref.cli._utils import df_to_table, pretty_print_df
22
+ from climate_ref.cli._utils import df_to_table, parse_facet_filters, pretty_print_df
20
23
  from climate_ref.config import Config
21
24
  from climate_ref.models import Execution, ExecutionGroup
22
- from climate_ref.models.execution import get_execution_group_and_latest
25
+ from climate_ref.models.execution import execution_datasets, get_execution_group_and_latest_filtered
23
26
  from climate_ref_core.logging import EXECUTION_LOG_FILENAME
24
27
 
25
28
  app = typer.Typer(help=__doc__)
26
- console = Console()
29
+
30
+
31
+ @dataclass
32
+ class ListGroupsFilterOptions:
33
+ """Options to filter execution groups in list-groups command"""
34
+
35
+ diagnostic: list[str] | None = None
36
+ """Filter by diagnostic slug (substring, case-insensitive)"""
37
+
38
+ provider: list[str] | None = None
39
+ """Filter by provider slug (substring, case-insensitive)"""
40
+
41
+ facets: dict[str, str] | None = None
42
+ """Filter by facet key-value pairs (exact match)"""
27
43
 
28
44
 
29
45
  @app.command()
30
- def list_groups(
46
+ def list_groups( # noqa: PLR0913
31
47
  ctx: typer.Context,
32
48
  column: Annotated[
33
49
  list[str] | None,
34
50
  typer.Option(help="Only include specified columns in the output"),
35
51
  ] = None,
36
52
  limit: int = typer.Option(100, help="Limit the number of rows to display"),
53
+ diagnostic: Annotated[
54
+ list[str] | None,
55
+ typer.Option(
56
+ help="Filter by diagnostic slug (substring match, case-insensitive)."
57
+ "Multiple values can be provided."
58
+ ),
59
+ ] = None,
60
+ provider: Annotated[
61
+ list[str] | None,
62
+ typer.Option(
63
+ help="Filter by provider slug (substring match, case-insensitive)."
64
+ "Multiple values can be provided."
65
+ ),
66
+ ] = None,
67
+ filter: Annotated[ # noqa: A002
68
+ list[str] | None,
69
+ typer.Option(
70
+ "--filter",
71
+ help="Filter by facet key=value pairs (exact match). Multiple filters can be provided.",
72
+ ),
73
+ ] = None,
74
+ successful: Annotated[
75
+ bool | None,
76
+ typer.Option(
77
+ "--successful/--not-successful",
78
+ help="Filter by successful or unsuccessful executions.",
79
+ ),
80
+ ] = None,
81
+ dirty: Annotated[
82
+ bool | None,
83
+ typer.Option(
84
+ "--dirty/--not-dirty",
85
+ help="Filter to include only dirty or clean execution groups."
86
+ "These execution groups will be re-computed on the next run.",
87
+ ),
88
+ ] = None,
37
89
  ) -> None:
38
90
  """
39
91
  List the diagnostic execution groups that have been identified
@@ -41,42 +93,273 @@ def list_groups(
41
93
  The data catalog is sorted by the date that the execution group was created (first = newest).
42
94
  If the `--column` option is provided, only the specified columns will be displayed.
43
95
 
96
+ Filters can be combined using AND logic across filter types and OR logic within a filter type.
97
+
44
98
  The output will be in a tabular format.
45
99
  """
46
100
  session = ctx.obj.database.session
101
+ console = ctx.obj.console
47
102
 
48
- execution_groups_results = get_execution_group_and_latest(session).limit(limit).all()
49
- execution_count = session.query(ExecutionGroup).count()
103
+ # Parse facet filters
104
+ try:
105
+ facet_filters = parse_facet_filters(filter)
106
+ except ValueError as e:
107
+ logger.error(str(e))
108
+ raise typer.Exit(code=1)
50
109
 
51
- results_df = pd.DataFrame(
52
- [
53
- {
54
- "id": execution_groups.id,
55
- "key": execution_groups.key,
56
- "provider": execution_groups.diagnostic.provider.slug,
57
- "diagnostic": execution_groups.diagnostic.slug,
58
- "dirty": execution_groups.dirty,
59
- "successful": result.successful if result else None,
60
- "created_at": execution_groups.created_at,
61
- "updated_at": execution_groups.updated_at,
62
- }
63
- for execution_groups, result in execution_groups_results
64
- ]
110
+ # Build filter options
111
+ filters = ListGroupsFilterOptions(
112
+ diagnostic=diagnostic,
113
+ provider=provider,
114
+ facets=facet_filters if facet_filters else None,
65
115
  )
116
+ logger.debug(f"Applying filters: {filters}")
117
+
118
+ # Get total count before filtering for warning messages
119
+ total_count = session.query(ExecutionGroup).count()
120
+
121
+ # Apply filters to query
122
+ try:
123
+ all_filtered_results = get_execution_group_and_latest_filtered(
124
+ session,
125
+ diagnostic_filters=filters.diagnostic,
126
+ provider_filters=filters.provider,
127
+ facet_filters=filters.facets,
128
+ successful=successful,
129
+ dirty=dirty,
130
+ )
131
+ execution_groups_results = all_filtered_results[:limit]
132
+ except Exception as e: # pragma: no cover
133
+ logger.error(f"Error applying filters: {e}")
134
+ raise typer.Exit(code=1)
135
+
136
+ # Check if any results found
137
+ if not execution_groups_results:
138
+ emit_no_results_warning(filters, total_count)
139
+ results_df = pd.DataFrame(
140
+ columns=[
141
+ "id",
142
+ "key",
143
+ "provider",
144
+ "diagnostic",
145
+ "dirty",
146
+ "successful",
147
+ "created_at",
148
+ "updated_at",
149
+ "selectors",
150
+ ]
151
+ )
152
+ else:
153
+ results_df = pd.DataFrame(
154
+ [
155
+ {
156
+ "id": eg.id,
157
+ "key": eg.key,
158
+ "provider": eg.diagnostic.provider.slug,
159
+ "diagnostic": eg.diagnostic.slug,
160
+ "dirty": eg.dirty,
161
+ "successful": result.successful if result else None,
162
+ "created_at": eg.created_at,
163
+ "updated_at": eg.updated_at,
164
+ "selectors": json.dumps(eg.selectors),
165
+ }
166
+ for eg, result in execution_groups_results
167
+ ]
168
+ )
66
169
 
67
- if column:
170
+ # Apply column filtering
171
+ if column and not results_df.empty: # Only apply if df is not empty
68
172
  if not all(col in results_df.columns for col in column):
69
173
  logger.error(f"Column not found in data catalog: {column}")
70
174
  raise typer.Exit(code=1)
71
175
  results_df = results_df[column]
72
176
 
177
+ # Display results
73
178
  pretty_print_df(results_df, console=console)
74
- if execution_count > limit:
179
+
180
+ # Show limit warning if applicable
181
+ filtered_count = len(all_filtered_results)
182
+ if filtered_count > limit:
75
183
  logger.warning(
76
- f"Displaying {limit} of {execution_count} results. Use the `--limit` option to display more."
184
+ f"Displaying {limit} of {filtered_count} filtered results. "
185
+ f"Use the `--limit` option to display more."
77
186
  )
78
187
 
79
188
 
189
+ @app.command()
190
+ def delete_groups( # noqa: PLR0912, PLR0913
191
+ ctx: typer.Context,
192
+ diagnostic: Annotated[
193
+ list[str] | None,
194
+ typer.Option(
195
+ help="Filter by diagnostic slug (substring match, case-insensitive)."
196
+ "Multiple values can be provided."
197
+ ),
198
+ ] = None,
199
+ provider: Annotated[
200
+ list[str] | None,
201
+ typer.Option(
202
+ help="Filter by provider slug (substring match, case-insensitive)."
203
+ "Multiple values can be provided."
204
+ ),
205
+ ] = None,
206
+ filter: Annotated[ # noqa: A002
207
+ list[str] | None,
208
+ typer.Option(
209
+ "--filter",
210
+ help="Filter by facet key=value pairs (exact match). Multiple filters can be provided.",
211
+ ),
212
+ ] = None,
213
+ successful: Annotated[
214
+ bool | None,
215
+ typer.Option(
216
+ "--successful/--not-successful",
217
+ help="Filter by successful or unsuccessful executions.",
218
+ ),
219
+ ] = None,
220
+ dirty: Annotated[
221
+ bool | None,
222
+ typer.Option(
223
+ "--dirty/--not-dirty",
224
+ help="Filter to include only dirty or clean execution groups."
225
+ "These execution groups will be re-computed on the next run.",
226
+ ),
227
+ ] = None,
228
+ remove_outputs: bool = typer.Option(
229
+ False, "--remove-outputs", help="Also remove output directories from the filesystem"
230
+ ),
231
+ force: bool = typer.Option(False, help="Skip confirmation prompt"),
232
+ ) -> None:
233
+ """
234
+ Delete execution groups matching the specified filters.
235
+
236
+ This command will delete execution groups and their associated executions.
237
+ Use filters to specify which groups to delete. At least one filter must be provided
238
+ to prevent accidental deletion of all groups.
239
+
240
+ Filters can be combined using AND logic across filter types and OR logic within a filter type.
241
+ """
242
+ session = ctx.obj.database.session
243
+ console = ctx.obj.console
244
+
245
+ # Parse facet filters
246
+ try:
247
+ facet_filters = parse_facet_filters(filter)
248
+ except ValueError as e:
249
+ logger.error(str(e))
250
+ raise typer.Exit(code=1)
251
+
252
+ if not any([diagnostic, provider, facet_filters, successful is not None, dirty is not None]):
253
+ logger.warning("THIS WILL DELETE ALL EXECUTION GROUPS IN THE DATABASE")
254
+ raise typer.Exit(code=1)
255
+
256
+ # Build filter options
257
+ filters = ListGroupsFilterOptions(
258
+ diagnostic=diagnostic,
259
+ provider=provider,
260
+ facets=facet_filters if facet_filters else None,
261
+ )
262
+ logger.debug(f"Applying filters: {filters}")
263
+
264
+ # Apply filters to query
265
+ try:
266
+ all_filtered_results = get_execution_group_and_latest_filtered(
267
+ session,
268
+ diagnostic_filters=filters.diagnostic,
269
+ provider_filters=filters.provider,
270
+ facet_filters=filters.facets,
271
+ successful=successful,
272
+ dirty=dirty,
273
+ )
274
+ except Exception as e: # pragma: no cover
275
+ logger.error(f"Error applying filters: {e}")
276
+ raise typer.Exit(code=1)
277
+
278
+ # Check if any results found
279
+ if not all_filtered_results:
280
+ emit_no_results_warning(filters, session.query(ExecutionGroup).count())
281
+ return
282
+
283
+ # Convert to DataFrame for preview
284
+ results_df = pd.DataFrame(
285
+ [
286
+ {
287
+ "id": eg.id,
288
+ "key": eg.key,
289
+ "provider": eg.diagnostic.provider.slug,
290
+ "diagnostic": eg.diagnostic.slug,
291
+ "dirty": eg.dirty,
292
+ "successful": result.successful if result else None,
293
+ "created_at": eg.created_at,
294
+ "updated_at": eg.updated_at,
295
+ "selectors": json.dumps(eg.selectors),
296
+ }
297
+ for eg, result in all_filtered_results
298
+ ]
299
+ )
300
+
301
+ # Display preview
302
+ console.print("Execution groups to be deleted:")
303
+ pretty_print_df(results_df, console=console)
304
+
305
+ count = len(all_filtered_results)
306
+ console.print(f"\nWill delete {count} execution group(s).")
307
+
308
+ # Confirm unless force is set
309
+ if not force:
310
+ if not typer.confirm("Do you want to proceed with deletion?"):
311
+ console.print("Deletion cancelled.")
312
+ return
313
+
314
+ # Remove output directories if requested
315
+ if remove_outputs:
316
+ config = ctx.obj.config
317
+ for eg, _ in all_filtered_results:
318
+ for execution in eg.executions:
319
+ output_dir = config.paths.results / execution.output_fragment
320
+
321
+ # Safety check
322
+ if not output_dir.is_relative_to(config.paths.results): # pragma: no cover
323
+ logger.error(f"Skipping unsafe path: {output_dir}")
324
+ continue
325
+
326
+ if output_dir.exists():
327
+ try:
328
+ logger.warning(f"Removing output directory: {output_dir}")
329
+ shutil.rmtree(output_dir)
330
+ except Exception as e:
331
+ logger.error(f"Failed to remove {output_dir}: {e}")
332
+
333
+ # Delete execution groups and all related records
334
+ # TODO: Add cascade delete to FK relationships and simplify this code
335
+ with session.begin_nested() if session.in_transaction() else session.begin():
336
+ for eg, _ in all_filtered_results:
337
+ for execution in eg.executions:
338
+ # Delete MetricValues first
339
+ for metric_value in execution.values:
340
+ session.delete(metric_value)
341
+
342
+ # Delete ExecutionOutputs
343
+ for output in execution.outputs:
344
+ session.delete(output)
345
+
346
+ # Delete many-to-many associations with datasets
347
+ session.execute(
348
+ execution_datasets.delete().where(execution_datasets.c.execution_id == execution.id)
349
+ )
350
+
351
+ # Now delete the execution
352
+ session.delete(execution)
353
+
354
+ # Finally delete the execution group
355
+ session.delete(eg)
356
+
357
+ if remove_outputs:
358
+ console.print(f"[green]Successfully deleted {count} execution group(s) and their output directories.")
359
+ else:
360
+ console.print(f"[green]Successfully deleted {count} execution group(s).")
361
+
362
+
80
363
  def walk_directory(directory: pathlib.Path, tree: Tree) -> None:
81
364
  """Recursively build a Tree with directory contents."""
82
365
  # Sort dirs first then by filename
@@ -182,6 +465,29 @@ def _log_panel(result_directory: pathlib.Path) -> Panel | None:
182
465
  )
183
466
 
184
467
 
468
+ def emit_no_results_warning(
469
+ filters: ListGroupsFilterOptions,
470
+ total_count: int,
471
+ ) -> None:
472
+ """
473
+ Emit informative warning when filters produce no results.
474
+ """
475
+ filter_parts = []
476
+ if filters.diagnostic:
477
+ filter_parts.append(f"diagnostic filters: {filters.diagnostic}")
478
+ if filters.provider:
479
+ filter_parts.append(f"provider filters: {filters.provider}")
480
+ if filters.facets:
481
+ facet_strs = [f"{k}={v}" for k, v in filters.facets.items()]
482
+ filter_parts.append(f"facet filters: {facet_strs}")
483
+
484
+ logger.warning(
485
+ f"No execution groups match the specified filters. "
486
+ f"Total execution groups in database: {total_count}. "
487
+ f"Applied filters: {', '.join(filter_parts)}"
488
+ )
489
+
490
+
185
491
  @app.command()
186
492
  def inspect(ctx: typer.Context, execution_id: int) -> None:
187
493
  """
@@ -191,6 +497,8 @@ def inspect(ctx: typer.Context, execution_id: int) -> None:
191
497
  """
192
498
  config: Config = ctx.obj.config
193
499
  session = ctx.obj.database.session
500
+ console = ctx.obj.console
501
+
194
502
  execution_group = session.get(ExecutionGroup, execution_id)
195
503
 
196
504
  if not execution_group:
@@ -217,6 +525,7 @@ def flag_dirty(ctx: typer.Context, execution_id: int) -> None:
217
525
  Flag an execution group for recomputation
218
526
  """
219
527
  session = ctx.obj.database.session
528
+ console = ctx.obj.console
220
529
  with session.begin():
221
530
  execution_group = session.get(ExecutionGroup, execution_id)
222
531