climate-ref 0.6.6__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. {climate_ref-0.6.6 → climate_ref-0.8.0}/PKG-INFO +1 -1
  2. {climate_ref-0.6.6 → climate_ref-0.8.0}/pyproject.toml +1 -1
  3. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/__init__.py +12 -3
  4. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/_utils.py +56 -2
  5. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/datasets.py +49 -12
  6. climate_ref-0.8.0/src/climate_ref/cli/executions.py +538 -0
  7. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/providers.py +1 -2
  8. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/config.py +67 -4
  9. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/database.py +62 -4
  10. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
  11. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/dataset_registry/sample_data.txt +10 -19
  12. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/__init__.py +3 -3
  13. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/base.py +121 -20
  14. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/cmip6.py +2 -0
  15. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/obs4mips.py +26 -15
  16. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/hpc.py +149 -53
  17. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/local.py +1 -2
  18. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/result_handling.py +17 -7
  19. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/env.py +12 -10
  20. climate_ref-0.8.0/src/climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
  21. climate_ref-0.8.0/src/climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
  22. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/__init__.py +1 -6
  23. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/base.py +4 -20
  24. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/dataset.py +2 -0
  25. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/diagnostic.py +2 -1
  26. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/execution.py +219 -7
  27. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/metric_value.py +25 -110
  28. climate_ref-0.8.0/src/climate_ref/models/mixins.py +144 -0
  29. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/provider.py +2 -1
  30. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/provider_registry.py +4 -4
  31. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/slurm.py +2 -2
  32. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/solver.py +17 -6
  33. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/testing.py +1 -1
  34. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_datasets.py +2 -4
  35. climate_ref-0.8.0/tests/unit/cli/test_executions/test_inspect.txt +25 -0
  36. climate_ref-0.8.0/tests/unit/cli/test_executions.py +831 -0
  37. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_solve.py +4 -4
  38. climate_ref-0.8.0/tests/unit/cli/test_utils.py +60 -0
  39. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db_complete.yml +20 -20
  40. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db_drs.yml +20 -20
  41. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_local_complete.yml +30 -30
  42. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_local_drs.yml +30 -30
  43. climate_ref-0.8.0/tests/unit/datasets/test_datasets.py +376 -0
  44. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_obs4mips/obs4mips_catalog_db.yml +501 -523
  45. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_obs4mips/obs4mips_catalog_local.yml +174 -314
  46. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_obs4mips.py +2 -1
  47. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_pmp_climatology/pmp_catalog_local.yml +174 -314
  48. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_pmp_climatology.py +1 -0
  49. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_hpc_executor.py +76 -11
  50. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_result_handling.py +5 -2
  51. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_synchronous_executor.py +2 -2
  52. climate_ref-0.8.0/tests/unit/models/test_metric_execution.py +115 -0
  53. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_config.py +52 -1
  54. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_database.py +4 -3
  55. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_solver/test_solve_metrics.yml +8 -0
  56. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_solver.py +12 -6
  57. climate_ref-0.6.6/src/climate_ref/cli/executions.py +0 -229
  58. climate_ref-0.6.6/tests/unit/cli/test_executions/test_inspect.txt +0 -27
  59. climate_ref-0.6.6/tests/unit/cli/test_executions.py +0 -180
  60. climate_ref-0.6.6/tests/unit/datasets/test_datasets.py +0 -100
  61. climate_ref-0.6.6/tests/unit/models/test_metric_execution.py +0 -38
  62. {climate_ref-0.6.6 → climate_ref-0.8.0}/.gitignore +0 -0
  63. {climate_ref-0.6.6 → climate_ref-0.8.0}/Dockerfile +0 -0
  64. {climate_ref-0.6.6 → climate_ref-0.8.0}/LICENCE +0 -0
  65. {climate_ref-0.6.6 → climate_ref-0.8.0}/NOTICE +0 -0
  66. {climate_ref-0.6.6 → climate_ref-0.8.0}/README.md +0 -0
  67. {climate_ref-0.6.6 → climate_ref-0.8.0}/conftest.py +0 -0
  68. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/__init__.py +0 -0
  69. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/_config_helpers.py +0 -0
  70. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/alembic.ini +0 -0
  71. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/config.py +0 -0
  72. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/solve.py +0 -0
  73. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/constants.py +0 -0
  74. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/cmip6_parsers.py +0 -0
  75. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/pmp_climatology.py +0 -0
  76. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/utils.py +0 -0
  77. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/__init__.py +0 -0
  78. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/pbs_scheduler.py +0 -0
  79. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/synchronous.py +0 -0
  80. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/README +0 -0
  81. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/script.py.mako +0 -0
  82. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -0
  83. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +0 -0
  84. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-07-03T1505_795c1e6cf496_drop_unique_requirement_on_slug.py +0 -0
  85. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +0 -0
  86. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +0 -0
  87. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +0 -0
  88. {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/py.typed +0 -0
  89. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_config.py +0 -0
  90. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_providers.py +0 -0
  91. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_root.py +0 -0
  92. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/conftest.py +0 -0
  93. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db.yml +0 -0
  94. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6.py +0 -0
  95. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_utils.py +0 -0
  96. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_local_executor.py +0 -0
  97. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/models/test_metric_value.py +0 -0
  98. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_pbssmartprovider.py +0 -0
  99. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_provider_registry.py +0 -0
  100. {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_slurm.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: climate-ref
3
- Version: 0.6.6
3
+ Version: 0.8.0
4
4
  Summary: Application which runs the CMIP Rapid Evaluation Framework
5
5
  Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
6
6
  License-Expression: Apache-2.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "climate-ref"
3
- version = "0.6.6"
3
+ version = "0.8.0"
4
4
  description = "Application which runs the CMIP Rapid Evaluation Framework"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -3,11 +3,12 @@
3
3
  import importlib
4
4
  from enum import Enum
5
5
  from pathlib import Path
6
- from typing import Annotated, Optional
6
+ from typing import Annotated
7
7
 
8
8
  import typer
9
9
  from attrs import define
10
10
  from loguru import logger
11
+ from rich.console import Console
11
12
 
12
13
  from climate_ref import __version__
13
14
  from climate_ref.cli import config, datasets, executions, providers, solve
@@ -37,6 +38,7 @@ class CLIContext:
37
38
 
38
39
  config: Config
39
40
  database: Database
41
+ console: Console
40
42
 
41
43
 
42
44
  def _version_callback(value: bool) -> None:
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
46
48
  raise typer.Exit()
47
49
 
48
50
 
51
+ def _create_console() -> Console:
52
+ # Hook for testing to disable color output
53
+
54
+ # Rich respects the NO_COLOR environment variabl
55
+ return Console()
56
+
57
+
49
58
  def _load_config(configuration_directory: Path | None = None) -> Config:
50
59
  """
51
60
  Load the configuration from the specified directory
@@ -129,7 +138,7 @@ def main( # noqa: PLR0913
129
138
  typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
130
139
  ] = LogLevel.Info,
131
140
  version: Annotated[
132
- Optional[bool],
141
+ bool | None,
133
142
  typer.Option(
134
143
  "--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
135
144
  ),
@@ -155,7 +164,7 @@ def main( # noqa: PLR0913
155
164
 
156
165
  logger.debug(f"Configuration loaded from: {config._config_file!s}")
157
166
 
158
- ctx.obj = CLIContext(config=config, database=Database.from_config(config))
167
+ ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
159
168
 
160
169
 
161
170
  if __name__ == "__main__":
@@ -5,6 +5,59 @@ from rich.console import Console
5
5
  from rich.table import Table
6
6
 
7
7
 
8
+ def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
9
+ """
10
+ Parse facet filters from key=value format into a dictionary.
11
+
12
+ Parameters
13
+ ----------
14
+ filters
15
+ List of filter strings in 'key=value' format
16
+
17
+ Returns
18
+ -------
19
+ dict[str, str]
20
+ Dictionary mapping facet keys to values
21
+
22
+ Raises
23
+ ------
24
+ ValueError
25
+ If a filter string is not in valid 'key=value' format
26
+
27
+ Examples
28
+ --------
29
+ >>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
30
+ {'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
31
+ """
32
+ if not filters:
33
+ return {}
34
+
35
+ parsed: dict[str, str] = {}
36
+ for filter_str in filters:
37
+ if "=" not in filter_str:
38
+ raise ValueError(
39
+ f"Invalid filter format: '{filter_str}'. "
40
+ f"Expected format: 'key=value' or 'dataset_type.key=value' "
41
+ f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
42
+ )
43
+
44
+ key, value = filter_str.split("=", 1)
45
+ key = key.strip()
46
+ value = value.strip()
47
+
48
+ if not key:
49
+ raise ValueError(f"Empty key in filter: '{filter_str}'")
50
+ if not value:
51
+ raise ValueError(f"Empty value in filter: '{filter_str}'")
52
+
53
+ if key in parsed:
54
+ logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
55
+
56
+ parsed[key] = value
57
+
58
+ return parsed
59
+
60
+
8
61
  def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
9
62
  """
10
63
  Convert a DataFrame to a rich Table instance
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
32
85
 
33
86
  table = Table(*[str(column) for column in df.columns])
34
87
 
35
- for index, value_list in enumerate(df.values.tolist()):
88
+ for value_list in df.values.tolist():
36
89
  row = [str(x) for x in value_list]
37
90
  table.add_row(*row)
38
91
 
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
59
112
  # Drop duplicates as they are not informative to CLI users.
60
113
  df = df.drop_duplicates()
61
114
 
62
- if console is None:
115
+ if console is None: # pragma: no branch
116
+ logger.debug("Creating new console for pretty printing")
63
117
  console = Console()
64
118
 
65
119
  max_col_count = console.width // 10
@@ -6,8 +6,6 @@ which executions are required for a given diagnostic without having to re-parse
6
6
 
7
7
  """
8
8
 
9
- import errno
10
- import os
11
9
  import shutil
12
10
  from collections.abc import Iterable
13
11
  from pathlib import Path
@@ -15,9 +13,9 @@ from typing import Annotated
15
13
 
16
14
  import typer
17
15
  from loguru import logger
18
- from rich.console import Console
19
16
 
20
17
  from climate_ref.cli._utils import pretty_print_df
18
+ from climate_ref.database import ModelState
21
19
  from climate_ref.datasets import get_dataset_adapter
22
20
  from climate_ref.models import Dataset
23
21
  from climate_ref.provider_registry import ProviderRegistry
@@ -27,7 +25,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
27
25
  from climate_ref_core.datasets import SourceDatasetType
28
26
 
29
27
  app = typer.Typer(help=__doc__)
30
- console = Console()
31
28
 
32
29
 
33
30
  @app.command(name="list")
@@ -70,7 +67,7 @@ def list_(
70
67
  raise typer.Exit(code=1)
71
68
  data_catalog = data_catalog[column].sort_values(by=column)
72
69
 
73
- pretty_print_df(data_catalog, console=console)
70
+ pretty_print_df(data_catalog, console=ctx.obj.console)
74
71
 
75
72
 
76
73
  @app.command()
@@ -97,7 +94,7 @@ def list_columns(
97
94
 
98
95
 
99
96
  @app.command()
100
- def ingest( # noqa: PLR0913
97
+ def ingest( # noqa
101
98
  ctx: typer.Context,
102
99
  file_or_directory: list[Path],
103
100
  source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
@@ -106,7 +103,7 @@ def ingest( # noqa: PLR0913
106
103
  n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
107
104
  skip_invalid: Annotated[
108
105
  bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
109
- ] = False,
106
+ ] = True,
110
107
  ) -> None:
111
108
  """
112
109
  Ingest a directory of datasets into the database
@@ -118,6 +115,7 @@ def ingest( # noqa: PLR0913
118
115
  """
119
116
  config = ctx.obj.config
120
117
  db = ctx.obj.database
118
+ console = ctx.obj.console
121
119
 
122
120
  kwargs = {}
123
121
 
@@ -133,16 +131,35 @@ def ingest( # noqa: PLR0913
133
131
 
134
132
  if not _dir.exists():
135
133
  logger.error(f"File or directory {_dir} does not exist")
136
- raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
134
+ continue
135
+
136
+ # TODO: This assumes that all datasets are nc files.
137
+ # THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
138
+ if not _dir.rglob("*.nc"):
139
+ logger.error(f"No .nc files found in {_dir}")
140
+ continue
137
141
 
138
- data_catalog = adapter.find_local_datasets(_dir)
139
- data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
142
+ try:
143
+ data_catalog = adapter.find_local_datasets(_dir)
144
+ data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
145
+ except Exception as e:
146
+ logger.error(f"Error ingesting datasets from {_dir}: {e}")
147
+ continue
140
148
 
141
149
  logger.info(
142
150
  f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
143
151
  )
144
152
  pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
145
153
 
154
+ # track stats for a given directory
155
+ num_created_datasets = 0
156
+ num_updated_datasets = 0
157
+ num_unchanged_datasets = 0
158
+ num_created_files = 0
159
+ num_updated_files = 0
160
+ num_removed_files = 0
161
+ num_unchanged_files = 0
162
+
146
163
  for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
147
164
  logger.debug(f"Processing dataset {instance_id}")
148
165
  with db.session.begin():
@@ -154,9 +171,29 @@ def ingest( # noqa: PLR0913
154
171
  )
155
172
  if not dataset:
156
173
  logger.info(f"Would save dataset {instance_id} to the database")
157
- continue
158
174
  else:
159
- adapter.register_dataset(config, db, data_catalog_dataset)
175
+ results = adapter.register_dataset(config, db, data_catalog_dataset)
176
+
177
+ if results.dataset_state == ModelState.CREATED:
178
+ num_created_datasets += 1
179
+ elif results.dataset_state == ModelState.UPDATED:
180
+ num_updated_datasets += 1
181
+ else:
182
+ num_unchanged_datasets += 1
183
+ num_created_files += len(results.files_added)
184
+ num_updated_files += len(results.files_updated)
185
+ num_removed_files += len(results.files_removed)
186
+ num_unchanged_files += len(results.files_unchanged)
187
+
188
+ if not dry_run:
189
+ ingestion_msg = (
190
+ f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
191
+ " (created/updated/unchanged), "
192
+ f"Files: "
193
+ f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
194
+ " (created/updated/removed/unchanged)"
195
+ )
196
+ logger.info(ingestion_msg)
160
197
 
161
198
  if solve:
162
199
  solve_required_executions(