climate-ref 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,7 +15,7 @@ from climate_ref.config import Config
15
15
  from climate_ref.constants import CONFIG_FILENAME
16
16
  from climate_ref.database import Database
17
17
  from climate_ref_core import __version__ as __core_version__
18
- from climate_ref_core.logging import add_log_handler
18
+ from climate_ref_core.logging import initialise_logging
19
19
 
20
20
 
21
21
  class LogLevel(str, Enum):
@@ -112,11 +112,21 @@ app = build_app()
112
112
  @app.callback()
113
113
  def main( # noqa: PLR0913
114
114
  ctx: typer.Context,
115
- configuration_directory: Annotated[Path | None, typer.Option(help="Configuration directory")] = None,
116
- verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Set the log level to DEBUG")] = False,
117
- quiet: Annotated[bool, typer.Option("--quiet", "-q", help="Set the log level to WARNING")] = False,
115
+ configuration_directory: Annotated[
116
+ Path | None,
117
+ typer.Option(help="Configuration directory"),
118
+ ] = None,
119
+ verbose: Annotated[
120
+ bool,
121
+ typer.Option("--verbose", "-v", help="Set the log level to DEBUG"),
122
+ ] = False,
123
+ quiet: Annotated[
124
+ bool,
125
+ typer.Option("--quiet", "-q", help="Set the log level to WARNING"),
126
+ ] = False,
118
127
  log_level: Annotated[
119
- LogLevel, typer.Option(case_sensitive=False, help="Set the level of logging information to display")
128
+ LogLevel,
129
+ typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
120
130
  ] = LogLevel.Info,
121
131
  version: Annotated[
122
132
  Optional[bool],
@@ -136,11 +146,15 @@ def main( # noqa: PLR0913
136
146
  log_level = LogLevel.Debug
137
147
 
138
148
  logger.remove()
139
- add_log_handler(level=log_level.value)
140
149
 
141
150
  config = _load_config(configuration_directory)
142
151
  config.log_level = log_level.value
143
152
 
153
+ log_format = config.log_format
154
+ initialise_logging(level=config.log_level, format=log_format, log_directory=config.paths.log)
155
+
156
+ logger.debug(f"Configuration loaded from: {config._config_file!s}")
157
+
144
158
  ctx.obj = CLIContext(config=config, database=Database.from_config(config))
145
159
 
146
160
 
@@ -34,7 +34,12 @@ def list_(
34
34
  ] = SourceDatasetType.CMIP6.value, # type: ignore
35
35
  column: Annotated[list[str] | None, typer.Option()] = None,
36
36
  include_files: bool = typer.Option(False, help="Include files in the output"),
37
- limit: int = typer.Option(100, help="Limit the number of rows to display"),
37
+ limit: int = typer.Option(
38
+ 100,
39
+ help=(
40
+ "Limit the number of datasets (or files when using --include-files) to display to this number."
41
+ ),
42
+ ),
38
43
  ) -> None:
39
44
  """
40
45
  List the datasets that have been ingested
@@ -172,16 +177,28 @@ def _fetch_sample_data(
172
177
 
173
178
 
174
179
  @app.command(name="fetch-data")
175
- def fetch_data(
180
+ def fetch_data( # noqa: PLR0913
176
181
  ctx: typer.Context,
177
- registry: Annotated[str, typer.Option(help="Name of the data registry to use")],
182
+ registry: Annotated[
183
+ str,
184
+ typer.Option(help="Name of the data registry to use"),
185
+ ],
178
186
  output_directory: Annotated[
179
- Path | None, typer.Option(help="Output directory where files will be saved")
187
+ Path | None,
188
+ typer.Option(help="Output directory where files will be saved"),
180
189
  ] = None,
181
- force_cleanup: Annotated[bool, typer.Option(help="If True, remove any existing files")] = False,
190
+ force_cleanup: Annotated[
191
+ bool,
192
+ typer.Option(help="If True, remove any existing files"),
193
+ ] = False,
182
194
  symlink: Annotated[
183
- bool, typer.Option(help="If True, symlink files into the output directory, otherwise perform a copy")
195
+ bool,
196
+ typer.Option(help="If True, symlink files into the output directory, otherwise perform a copy"),
184
197
  ] = False,
198
+ verify: Annotated[
199
+ bool,
200
+ typer.Option(help="Verify the checksums of the fetched files"),
201
+ ] = True,
185
202
  ) -> None:
186
203
  """
187
204
  Fetch REF-specific datasets
@@ -206,4 +223,10 @@ def fetch_data(
206
223
  logger.error(f"Available registries: {', '.join(dataset_registry_manager.keys())}")
207
224
  raise typer.Exit(code=1)
208
225
 
209
- fetch_all_files(_registry, registry, output_directory, symlink=symlink)
226
+ fetch_all_files(
227
+ _registry,
228
+ registry,
229
+ output_directory,
230
+ symlink=symlink,
231
+ verify=verify,
232
+ )
climate_ref/cli/solve.py CHANGED
@@ -1,14 +1,23 @@
1
+ from typing import Annotated
2
+
1
3
  import typer
2
4
 
3
- from climate_ref.solver import solve_required_executions
5
+ from climate_ref.solver import SolveFilterOptions, solve_required_executions
4
6
 
5
7
  app = typer.Typer()
6
8
 
7
9
 
8
10
  @app.command()
9
- def solve(
11
+ def solve( # noqa: PLR0913
10
12
  ctx: typer.Context,
11
- dry_run: bool = typer.Option(False, help="Do not execute any diagnostics"),
13
+ dry_run: Annotated[
14
+ bool,
15
+ typer.Option(help="Do not execute any diagnostics"),
16
+ ] = False,
17
+ execute: Annotated[
18
+ bool,
19
+ typer.Option(help="Solve the newly identified executions"),
20
+ ] = True,
12
21
  timeout: int = typer.Option(60, help="Timeout in seconds for the solve operation"),
13
22
  one_per_provider: bool = typer.Option(
14
23
  False, help="Limit to one execution per provider. This is useful for testing"
@@ -16,6 +25,24 @@ def solve(
16
25
  one_per_diagnostic: bool = typer.Option(
17
26
  False, help="Limit to one execution per diagnostic. This is useful for testing"
18
27
  ),
28
+ diagnostic: Annotated[
29
+ list[str] | None,
30
+ typer.Option(
31
+ help="Filters executions by the diagnostic slug. "
32
+ "Diagnostics will be included if any of the filters match a case-insensitive subset "
33
+ "of the diagnostic slug. "
34
+ "Multiple values can be provided"
35
+ ),
36
+ ] = None,
37
+ provider: Annotated[
38
+ list[str] | None,
39
+ typer.Option(
40
+ help="Filters executions by provider slug. "
41
+ "Providers will be included if any of the filters match a case-insensitive subset "
42
+ "of the provider slug. "
43
+ "Multiple values can be provided"
44
+ ),
45
+ ] = None,
19
46
  ) -> None:
20
47
  """
21
48
  Solve for executions that require recalculation
@@ -25,11 +52,19 @@ def solve(
25
52
  """
26
53
  config = ctx.obj.config
27
54
  db = ctx.obj.database
55
+
56
+ filters = SolveFilterOptions(
57
+ diagnostic=diagnostic,
58
+ provider=provider,
59
+ )
60
+
28
61
  solve_required_executions(
29
62
  config=config,
30
63
  db=db,
31
64
  dry_run=dry_run,
65
+ execute=execute,
32
66
  timeout=timeout,
33
67
  one_per_provider=one_per_provider,
34
68
  one_per_diagnostic=one_per_diagnostic,
69
+ filters=filters,
35
70
  )
climate_ref/config.py CHANGED
@@ -38,6 +38,7 @@ from climate_ref.constants import CONFIG_FILENAME
38
38
  from climate_ref_core.env import env
39
39
  from climate_ref_core.exceptions import InvalidExecutorException
40
40
  from climate_ref_core.executor import Executor, import_executor_cls
41
+ from climate_ref_core.logging import DEFAULT_LOG_FORMAT
41
42
 
42
43
  if TYPE_CHECKING:
43
44
  from climate_ref.database import Database
@@ -337,10 +338,18 @@ class Config:
337
338
 
338
339
  log_level: str = field(default="INFO")
339
340
  """
340
- Log level of messages that are displayed by the REF
341
+ Log level of messages that are displayed by the REF via the CLI
341
342
 
342
343
  This value is overridden if a value is specified via the CLI.
343
344
  """
345
+ log_format: str = env_field("LOG_FORMAT", default=DEFAULT_LOG_FORMAT)
346
+ """
347
+ Format of the log messages that are displayed by the REF via the CLI
348
+
349
+ Examples of the formatting options are available in the
350
+ [loguru documentation](https://loguru.readthedocs.io/en/stable/api/logger.html#module-loguru._logger).
351
+ """
352
+
344
353
  paths: PathConfig = Factory(PathConfig) # noqa
345
354
  db: DbConfig = Factory(DbConfig) # noqa
346
355
  executor: ExecutorConfig = Factory(ExecutorConfig) # noqa
@@ -1,16 +1,19 @@
1
+ obs4REF/ARCCSS/LORA-1-1/mon/mrro/gn/20250516/mrro_mon_LORA-1-1_REF_gn_198001-201212.nc md5:4cfbbfa3be9632b14de99b18066fbffe
2
+ obs4REF/CNES/AVISO-1-0/mon/zos/gn/v20210727/zos_mon_AVISO-1-0_PCMDI_gn_199301-201912.nc md5:91252303cb65548fee5ff42dd3024825
3
+ obs4REF/ColumbiaU/WECANN-1-0/mon/gpp/gn/20250516/gpp_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:c8757a92f915e7e270d94bfbf25accf7
4
+ obs4REF/ColumbiaU/WECANN-1-0/mon/hfls/gn/20250516/hfls_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:d99c5879948f10c7fcb2f8e95922898d
5
+ obs4REF/ColumbiaU/WECANN-1-0/mon/hfss/gn/20250516/hfss_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:b7a911e0fc164d07d3ab42a86d09b18b
1
6
  obs4REF/ECMWF/ERA-20C/mon/psl/gn/v20210727/psl_mon_ERA-20C_PCMDI_gn_190001-201012.nc md5:c100cf25d5681c375cd6c1ee60b678ba
2
7
  obs4REF/ECMWF/ERA-20C/mon/ts/gn/v20210727/ts_mon_ERA-20C_PCMDI_gn_190001-201012.nc md5:9ed8dfbb805ed4caa282ed70f873a3a0
3
- obs4REF/NOAA-ESRL-PSD/20CR/mon/psl/gn/v20210727/psl_mon_20CR_PCMDI_gn_187101-201212.nc md5:570ce90b3afd1d0b31690ae5dbe32d31
4
- obs4REF/NOAA-ESRL-PSD/20CR/mon/ts/gn/v20210727/ts_mon_20CR_PCMDI_gn_187101-201212.nc md5:e4890cc19ccc5bac29c6b70f28265ff1
5
- obs4REF/NOAA-NCEI/CMAP-V1902/mon/pr/gn/v20210727/pr_mon_CMAP-V1902_PCMDI_gn_197901-201901.nc md5:9d943d2dd0645850b616820f246aedf3
6
- obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc md5:99c8691e0f615dc4d79b4fb5e926cc76
7
- obs4REF/ESSO/TropFlux-1-0/mon/hfls/gn/v20210727/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:2f05191d6727068e1500d8d4ed90098a
8
- obs4REF/ESSO/TropFlux-1-0/mon/hfns/gn/v20210727/hfns_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7a9019e51a41d9e4ab1fcfb072d8ca8d
9
- obs4REF/ESSO/TropFlux-1-0/mon/hfss/gn/v20210727/hfss_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:1da9d8fe862c61bc49c36c18b6527213
10
- obs4REF/ESSO/TropFlux-1-0/mon/tas/gn/v20210727/tas_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:a6057931b5f6bc000a44514a1a8c891f
11
- obs4REF/ESSO/TropFlux-1-0/mon/tauu/gn/v20210727/tauu_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7c73a3deed3403fa9d21caef3a4d988d
12
- obs4REF/ESSO/TropFlux-1-0/mon/tauv/gn/v20210727/tauv_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8abc7a724a7a297826e2f783a4ea14f9
13
- obs4REF/ESSO/TropFlux-1-0/mon/ts/gn/v20210727/ts_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8697d3d7862f6e3b72bb5a161aa75ee8
8
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc md5:695633a2b401cfb66c8addbf58073dbc
9
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc md5:404f1e1f111859be06c00bcb8d740ff2
10
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc md5:a1bb8584d60cdd71154c01a692fa1fb4
11
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc md5:b78016a3c61d99dc0fd29563aa344ca1
12
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc md5:d64c231a7f798a255997ffe196613ea1
13
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc md5:7d90ce60b872dc4f044b9b0101114983
14
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc md5:2fc032707cb8a31ac60fa4abe9efe183
15
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc md5:6022d17e11df7818f5b0429d6e401d17
16
+ obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc md5:c68fdabf6eeb4813befceace089c9494
14
17
  obs4REF/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:1ae4587143f05ee81432b3d9960aab63
15
18
  obs4REF/ECMWF/ERA-INT/mon/hfss/gn/v20210727/hfss_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:261f02b8cbce18486548882a11f9aa34
16
19
  obs4REF/ECMWF/ERA-INT/mon/hur/gn/v20210727/hur_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:56fcd2df8ed2879f18b5e8c78134a148
@@ -31,6 +34,34 @@ obs4REF/ECMWF/ERA-INT/mon/uas/gn/v20210727/uas_mon_ERA-INT_PCMDI_gn_197901-20190
31
34
  obs4REF/ECMWF/ERA-INT/mon/va/gn/v20210727/va_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:f67ca168d6cd87bfdd4a911eb72dd022
32
35
  obs4REF/ECMWF/ERA-INT/mon/vas/gn/v20210727/vas_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:ac19b48b897cfe839585df4ff0fc4a7b
33
36
  obs4REF/ECMWF/ERA-INT/mon/zg/gn/v20210727/zg_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:d8fb93f31ff4a6370ccee93db82af86c
34
- obs4REF/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20210727/pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc md5:0877f014868b83547448f96c3e7c83e9
37
+ obs4REF/ESSO/TropFlux-1-0/mon/hfls/gn/v20210727/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:2f05191d6727068e1500d8d4ed90098a
38
+ obs4REF/ESSO/TropFlux-1-0/mon/hfls/gn/v20250415/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:e607167a08a2521b65e55eb186182003
39
+ obs4REF/ESSO/TropFlux-1-0/mon/hfns/gn/v20210727/hfns_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7a9019e51a41d9e4ab1fcfb072d8ca8d
40
+ obs4REF/ESSO/TropFlux-1-0/mon/hfss/gn/v20210727/hfss_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:1da9d8fe862c61bc49c36c18b6527213
41
+ obs4REF/ESSO/TropFlux-1-0/mon/hfss/gn/v20250415/hfss_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:6f766ca0332a8e566c408d237571a924
42
+ obs4REF/ESSO/TropFlux-1-0/mon/tas/gn/v20210727/tas_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:a6057931b5f6bc000a44514a1a8c891f
43
+ obs4REF/ESSO/TropFlux-1-0/mon/tas/gn/v20250415/tas_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:527ab1b9becf2a793df558532eccfe69
44
+ obs4REF/ESSO/TropFlux-1-0/mon/tauu/gn/v20210727/tauu_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7c73a3deed3403fa9d21caef3a4d988d
45
+ obs4REF/ESSO/TropFlux-1-0/mon/tauu/gn/v20250415/tauu_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:0822e2002e61472277116d38e5e19498
46
+ obs4REF/ESSO/TropFlux-1-0/mon/tauv/gn/v20210727/tauv_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8abc7a724a7a297826e2f783a4ea14f9
47
+ obs4REF/ESSO/TropFlux-1-0/mon/ts/gn/v20210727/ts_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8697d3d7862f6e3b72bb5a161aa75ee8
48
+ obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc md5:99c8691e0f615dc4d79b4fb5e926cc76
49
+ obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20250415/ts_mon_HadISST-1-1_PCMDI_gn_187001-202501.nc md5:66fb8cdf53ec0e073c565adfa57862b3
35
50
  obs4REF/NASA-GSFC/TRMM-3B43v-7/mon/pr/gn/v20210727/pr_mon_TRMM-3B43v-7_PCMDI_gn_199801-201712.nc md5:b80c9989d358656c781be5ea5a44c64c
36
- obs4REF/CNES/AVISO-1-0/mon/zos/gn/v20210727/zos_mon_AVISO-1-0_PCMDI_gn_199301-201912.nc md5:91252303cb65548fee5ff42dd3024825
51
+ obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rlds/gn/v20230209/rlds_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:897451ed566251135483d1413cd6bee3
52
+ obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rlus/gn/v20230209/rlus_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:750650025845fc89d9e56a3690deea21
53
+ obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rsds/gn/v20230209/rsds_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:5c33068dd11e6eb8d0bf6e2aa0335ef2
54
+ obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rsus/gn/v20230209/rsus_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:4f67c58186905e995a8b9497a49ecbf0
55
+ obs4REF/NOAA-ESRL-PSD/20CR/mon/psl/gn/v20210727/psl_mon_20CR_PCMDI_gn_187101-201212.nc md5:570ce90b3afd1d0b31690ae5dbe32d31
56
+ obs4REF/NOAA-ESRL-PSD/20CR/mon/ts/gn/v20210727/ts_mon_20CR_PCMDI_gn_187101-201212.nc md5:e4890cc19ccc5bac29c6b70f28265ff1
57
+ obs4REF/NOAA-NCEI/CMAP-V1902/mon/pr/gn/v20210727/pr_mon_CMAP-V1902_PCMDI_gn_197901-201901.nc md5:9d943d2dd0645850b616820f246aedf3
58
+ obs4REF/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20210727/pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc md5:0877f014868b83547448f96c3e7c83e9
59
+ obs4REF/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20231205/pr_mon_GPCP-Monthly-3-2_RSS_gn_198301-202303.nc md5:6970c22443e2097c45de5db8947318eb
60
+ obs4REF/NOAA-NCEI/WOA2023/mon/no3/gn/20250516/no3_mon_WOA2023_REF_gn_201501-202212.nc md5:ffd2b0c1b1f35f5176ba26c43478c9d2
61
+ obs4REF/NOAA-NCEI/WOA2023/mon/o2/gn/20250516/o2_mon_WOA2023_REF_gn_201501-202212.nc md5:1a41d343329730d357a6a21b59f201c7
62
+ obs4REF/NOAA-NCEI/WOA2023/mon/po4/gn/20250516/po4_mon_WOA2023_REF_gn_201501-202212.nc md5:ff65f5c81f3775af49be3e22305d4979
63
+ obs4REF/NOAA-NCEI/WOA2023/mon/so/gn/20250516/so_mon_WOA2023_REF_gn_201501-202212.nc md5:0da9596667c3b6bbc5388e420cd24a3b
64
+ obs4REF/NOAA-NCEI/WOA2023/mon/thetao/gn/20250516/thetao_mon_WOA2023_REF_gn_201501-202212.nc md5:e3bb4f9b387191571459dc49520632ee
65
+ obs4REF/NOC/RAPID/mon/msftmz/NA/20250516/msftmz_mon_RAPID_REF_NA_200404-202302.nc md5:e9ee555d923d39112dcbf591440000b3
66
+ obs4REF/UCI-ORNL/Hoffman/yr/fgco2/gm/20250516/fgco2_yr_Hoffman_REF_gm_185007-201007.nc md5:93419dd752e0ae20e69b7db3f8ea9f5b
67
+ obs4REF/UCI-ORNL/Hoffman/yr/nbp/gm/20250516/nbp_yr_Hoffman_REF_gm_185007-201007.nc md5:1c0b93c81e2608c668ec39c45ca828de
@@ -23,22 +23,20 @@ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_A
23
23
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b3148af8bee4c3562607aeaec9d81d94abad366c85421dd55ef222516fd3f7e0
24
24
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsutcs/gn/v20191115/rsutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 9f336ed549c02bf31f73f4679b850463ca8edc4db7afb04b31acd643bc3b51c8
25
25
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 4c74c74031fbc88cb0df70826e0aea63a582af5f5a714603a0ae70b3ff834097
26
+ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tauu/gn/v20191115/tauu_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 2a1eb8768134189addd7541975ce39d75406d530ba68b2c1286f9f1d8fd14f17
26
27
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/ts/gn/v20191115/ts_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b3b8e7675287471fd27f3de1f3ecada278eb7ea384da24031d0f3a949e04f757
27
28
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Emon/cSoil/gn/v20191115/cSoil_Emon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc d78d9bc532d79a50a0e40645f91572d61a92881aea43552061baea0e77353777
28
29
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/gpp/gn/v20191115/gpp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 41a43edaa1ff7385232f8fbf67fcc551eb77e51ba6cd75b95b5f0ee71af19544
29
30
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/lai/gn/v20191115/lai_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 4e6e1530da9030cfb7c207543369c636f339ee3c59c4621e22405494dc436755
30
31
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrro/gn/v20191115/mrro_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 3b9c010af44ca894bb23a55903fdc57dcd5af8a4e22ea1e98b41b18274720fd9
31
32
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrsos/gn/v20191115/mrsos_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc ca21d4f5341114cbc217d7c30f89c4375b02ea0a2b723d9cb069db6a9c08b08f
32
- CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 476b4ebacf38ddb78d44975b01d859e9ca3ed7955b92adc4bd098591867e56b0
33
+ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc a400042a848e855335de99e3ae8f5f2a900476db953ad70e6562fc4b8beb0ffc
33
34
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/areacello/gn/v20191115/areacello_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc fe5adde7cfe6d80462ad649fe68c2aa983ba700d96cff5ac830b1d7e7b09ca6f
34
35
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/sftof/gn/v20191115/sftof_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 89106ad204fb1ac9f3b25f4b93d6fcf9e95db5f1f4466b9baafcb426bad24e4f
35
- CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-197912.nc 9016163abd89155710291dc541a4146778f18a320435a2ee618f0da1424b0338
36
- CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_198001-198912.nc 97094c32cb3e8f019ad87145d348307c2a57ba9b0acf0cc5c59978213060e8d4
37
- CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_199001-199912.nc be682038248b0d79ea9e0402a2a173bd5c054dfdd8cc0932f6cb85d925670889
38
36
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200912.nc 266c386ddfbe9556404afadef1e778a64b5f4c5dd746c54a1beee3e5fbc9954b
39
37
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_201001-201412.nc 21fa3a223baced12da8a62ad406ba8126851c3e36589184fba2bb1c8b269c6e3
40
- CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc a757237973708d28f2333542fc34ed9543a15256c7f38d0139fcfe66bdb8d95c
41
- CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 701fa01ac2c50b622c3454bb3d69a54fed1c92b66f9194c09148b2349aa11342
38
+ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc da3a4b261779bd8477a6e067c7e7b611d9438e74d76cff225a44a8dacedc3fd4
39
+ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc b10be33023c7fdc77c7550ea1bfc6976e354841e6ba6aeaece6eec4b3308d99e
42
40
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/SImon/siconc/gn/v20200817/siconc_SImon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 384b45a0f88678ffaf0c9701a0fc0175fb0c319d7d94d0b3c4334939d653e51d
43
41
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/v20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 6750d4a2432842cb9342ef99b8ecb3569e8ff3dcbbf020f7f9f43a6f7af42f06
44
42
  CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/sftlf/gn/v20191115/sftlf_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 08d84ba3cf02a2481b76611dfa1abe25cbf76326003eebc4eb00c99b32fea19a
@@ -55,6 +53,10 @@ CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_
55
53
  CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_Amon_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn_189001-190912.nc ae9193e06aec8a7f63f2f7ec63fec9d25b2c1dc5de2bbe7065428c722090f45c
56
54
  CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_Amon_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn_191001-191512.nc 2113b7d4a781c7c28de55b0ffe99c42b49692d113f5109e7a4d0214211a9eb1c
57
55
  CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/fx/areacella/gn/v20190815/areacella_fx_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn.nc 2f5595635a566d5b4a889468f687aabf99ca5cdadb11f6a770d05d09775bfabb
56
+ CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/ImonAnt/snc/gn/v20190308/snc_ImonAnt_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 5a88cd5ed6ed33f9b24fd4bf942b5e427cb3512584a38030b07feb158c7a78c2
57
+ CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/ImonGre/snc/gn/v20190308/snc_ImonGre_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 5f6b6ccb2450eeeebc3b24d3d76cf306894094efa2f6ee53403be89595f4455c
58
+ CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/LImon/snc/gn/v20190308/snc_LImon_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 27005c5952a981c27c17ad8d169d01c852e2daee3236adf1e170b6b96aafcfff
59
+ CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Lmon/burntFractionAll/gn/v20190308/burntFractionAll_Lmon_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 61fbd29b05a5bdfa11001265c3960b5147e641148eff2a55cc027aa56db7263c
58
60
  CMIP6/DAMIP/CSIRO/ACCESS-ESM1-5/hist-GHG/r1i1p1f1/Amon/psl/gn/v20200615/psl_Amon_ACCESS-ESM1-5_hist-GHG_r1i1p1f1_gn_200001-202012.nc 5fed6b48717fcf508e8b5936be304db0346362c2182c5d13a141afe7c982021a
59
61
  CMIP6/DAMIP/CSIRO/ACCESS-ESM1-5/hist-GHG/r1i1p1f1/Amon/ts/gn/v20200615/ts_Amon_ACCESS-ESM1-5_hist-GHG_r1i1p1f1_gn_200001-202012.nc aa98fa93ef29ea266a94d87876b4ae08917caf77e38c5df1eae1ec98a5f559cb
60
62
  CMIP6/DAMIP/CSIRO/ACCESS-ESM1-5/hist-GHG/r1i1p1f1/fx/areacella/gn/v20200615/areacella_fx_ACCESS-ESM1-5_hist-GHG_r1i1p1f1_gn.nc 7179f75ff1754e9666b10f8c5508040bbe9fdac0c8117a9231ba5977d4889f27
@@ -1,5 +1,5 @@
1
1
  from pathlib import Path
2
- from typing import Protocol
2
+ from typing import Protocol, cast
3
3
 
4
4
  import pandas as pd
5
5
  from loguru import logger
@@ -44,14 +44,49 @@ class DatasetAdapter(Protocol):
44
44
 
45
45
  dataset_cls: type[Dataset]
46
46
  slug_column: str
47
+ """
48
+ The column in the data catalog that contains the dataset slug.
49
+ The dataset slug is a unique identifier for the dataset that includes the version of the dataset.
50
+ This can be used to group files together that belong to the same dataset.
51
+ """
47
52
  dataset_specific_metadata: tuple[str, ...]
48
53
  file_specific_metadata: tuple[str, ...] = ()
49
54
 
55
+ version_metadata: str = "version"
56
+ """
57
+ The column in the data catalog that contains the version of the dataset.
58
+ """
59
+ dataset_id_metadata: tuple[str, ...] = ()
60
+ """
61
+ The group of metadata columns that are specific to the dataset excluding the version information.
62
+
63
+ Each unique dataset should have the same values for these columns.
64
+
65
+ This is generally the columns that describe the `slug` of a dataset,
66
+ excluding the version information.
67
+ """
68
+
50
69
  def pretty_subset(self, data_catalog: pd.DataFrame) -> pd.DataFrame:
51
70
  """
52
71
  Get a subset of the data_catalog to pretty print
72
+
73
+ Parameters
74
+ ----------
75
+ data_catalog
76
+ Data catalog to subset
77
+
78
+ Returns
79
+ -------
80
+ :
81
+ Subset of the data catalog to pretty print
82
+
53
83
  """
54
- ...
84
+ return data_catalog[
85
+ [
86
+ *self.dataset_id_metadata,
87
+ self.version_metadata,
88
+ ]
89
+ ]
55
90
 
56
91
  def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
57
92
  """
@@ -202,6 +237,8 @@ class DatasetAdapter(Protocol):
202
237
  Iterating over different datasets within the data catalog can be done using a `groupby`
203
238
  operation for the `instance_id` column.
204
239
 
240
+ Only the latest version of each dataset is returned.
241
+
205
242
  The index of the data catalog is the primary key of the dataset.
206
243
  This should be maintained during any processing.
207
244
 
@@ -213,6 +250,27 @@ class DatasetAdapter(Protocol):
213
250
  with db.session.begin():
214
251
  # TODO: Paginate this query to avoid loading all the data at once
215
252
  if include_files:
216
- return self._get_dataset_files(db, limit)
253
+ catalog = self._get_dataset_files(db, limit)
217
254
  else:
218
- return self._get_datasets(db, limit)
255
+ catalog = self._get_datasets(db, limit)
256
+
257
+ def _get_latest_version(dataset_catalog: pd.DataFrame) -> pd.DataFrame:
258
+ """
259
+ Get the latest version of each dataset based on the version metadata.
260
+
261
+ This assumes that the version can be sorted lexicographically.
262
+ """
263
+ latest_version = dataset_catalog[self.version_metadata].max()
264
+
265
+ return cast(
266
+ pd.DataFrame, dataset_catalog[dataset_catalog[self.version_metadata] == latest_version]
267
+ )
268
+
269
+ # If there are no datasets, return an empty DataFrame
270
+ if catalog.empty:
271
+ return pd.DataFrame(columns=self.dataset_specific_metadata + self.file_specific_metadata)
272
+
273
+ # Group by the dataset ID and get the latest version for each dataset
274
+ return catalog.groupby(
275
+ list(self.dataset_id_metadata), group_keys=False, as_index=False, sort=False
276
+ ).apply(_get_latest_version)
@@ -196,40 +196,21 @@ class CMIP6DatasetAdapter(DatasetAdapter):
196
196
 
197
197
  file_specific_metadata = ("start_time", "end_time", "path")
198
198
 
199
+ version_metadata = "version"
200
+ dataset_id_metadata = (
201
+ "activity_id",
202
+ "institution_id",
203
+ "source_id",
204
+ "experiment_id",
205
+ "member_id",
206
+ "table_id",
207
+ "variable_id",
208
+ "grid_label",
209
+ )
210
+
199
211
  def __init__(self, n_jobs: int = 1):
200
212
  self.n_jobs = n_jobs
201
213
 
202
- def pretty_subset(self, data_catalog: pd.DataFrame) -> pd.DataFrame:
203
- """
204
- Get a subset of the data_catalog to pretty print
205
-
206
- This is particularly useful for CMIP6 datasets, which have a lot of metadata columns.
207
-
208
- Parameters
209
- ----------
210
- data_catalog
211
- Data catalog to subset
212
-
213
- Returns
214
- -------
215
- :
216
- Subset of the data catalog to pretty print
217
-
218
- """
219
- return data_catalog[
220
- [
221
- "activity_id",
222
- "institution_id",
223
- "source_id",
224
- "experiment_id",
225
- "member_id",
226
- "table_id",
227
- "variable_id",
228
- "grid_label",
229
- "version",
230
- ]
231
- ]
232
-
233
214
  def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
234
215
  """
235
216
  Generate a data catalog from the specified file or directory
@@ -266,15 +247,8 @@ class CMIP6DatasetAdapter(DatasetAdapter):
266
247
  datasets["end_time"] = _parse_datetime(datasets["end_time"])
267
248
 
268
249
  drs_items = [
269
- "activity_id",
270
- "institution_id",
271
- "source_id",
272
- "experiment_id",
273
- "member_id",
274
- "table_id",
275
- "variable_id",
276
- "grid_label",
277
- "version",
250
+ *self.dataset_id_metadata,
251
+ self.version_metadata,
278
252
  ]
279
253
  datasets["instance_id"] = datasets.apply(
280
254
  lambda row: "CMIP6." + ".".join([row[item] for item in drs_items]), axis=1
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import re
4
3
  import traceback
5
4
  from pathlib import Path
6
5
  from typing import Any
@@ -8,6 +7,7 @@ from typing import Any
8
7
  import pandas as pd
9
8
  import xarray as xr
10
9
  from ecgtools import Builder
10
+ from ecgtools.parsers.utilities import extract_attr_with_regex # type: ignore
11
11
  from loguru import logger
12
12
 
13
13
  from climate_ref.datasets.base import DatasetAdapter
@@ -15,25 +15,6 @@ from climate_ref.datasets.cmip6 import _parse_datetime
15
15
  from climate_ref.models.dataset import Dataset, Obs4MIPsDataset
16
16
 
17
17
 
18
- def extract_attr_with_regex(
19
- input_str: str, regex: str, strip_chars: str | None, ignore_case: bool
20
- ) -> list[Any] | None:
21
- """
22
- Extract version information from attribute with regular expressions.
23
- """
24
- if ignore_case:
25
- pattern = re.compile(regex, re.IGNORECASE)
26
- else:
27
- pattern = re.compile(regex)
28
- match = re.findall(pattern, input_str)
29
- if match:
30
- matchstr = max(match, key=len)
31
- match = matchstr.strip(strip_chars) if strip_chars else matchstr.strip()
32
- return match
33
- else:
34
- return None
35
-
36
-
37
18
  def parse_obs4mips(file: str) -> dict[str, Any | None]:
38
19
  """Parser for obs4mips"""
39
20
  keys = sorted(
@@ -144,38 +125,18 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
144
125
  )
145
126
 
146
127
  file_specific_metadata = ("start_time", "end_time", "path")
128
+ version_metadata = "source_version_number"
129
+ dataset_id_metadata = (
130
+ "activity_id",
131
+ "institution_id",
132
+ "source_id",
133
+ "variable_id",
134
+ "grid_label",
135
+ )
147
136
 
148
137
  def __init__(self, n_jobs: int = 1):
149
138
  self.n_jobs = n_jobs
150
139
 
151
- def pretty_subset(self, data_catalog: pd.DataFrame) -> pd.DataFrame:
152
- """
153
- Get a subset of the data_catalog to pretty print
154
-
155
- This is particularly useful for obs4MIPs datasets, which have a lot of metadata columns.
156
-
157
- Parameters
158
- ----------
159
- data_catalog
160
- Data catalog to subset
161
-
162
- Returns
163
- -------
164
- :
165
- Subset of the data catalog to pretty print
166
-
167
- """
168
- return data_catalog[
169
- [
170
- "activity_id",
171
- "institution_id",
172
- "source_id",
173
- "variable_id",
174
- "grid_label",
175
- "source_version_number",
176
- ]
177
- ]
178
-
179
140
  def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
180
141
  """
181
142
  Generate a data catalog from the specified file or directory
@@ -211,12 +172,8 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
211
172
  datasets["end_time"] = _parse_datetime(datasets["end_time"])
212
173
 
213
174
  drs_items = [
214
- "activity_id",
215
- "institution_id",
216
- "source_id",
217
- "variable_id",
218
- "grid_label",
219
- "source_version_number",
175
+ *self.dataset_id_metadata,
176
+ self.version_metadata,
220
177
  ]
221
178
  datasets["instance_id"] = datasets.apply(
222
179
  lambda row: "obs4MIPs." + ".".join([row[item] for item in drs_items]), axis=1
@@ -9,8 +9,9 @@ The simplest executor is the `LocalExecutor`, which runs the diagnostic in the s
9
9
  This is useful for local testing and debugging.
10
10
  """
11
11
 
12
+ from .hpc import HPCExecutor
12
13
  from .local import LocalExecutor
13
14
  from .result_handling import handle_execution_result
14
15
  from .synchronous import SynchronousExecutor
15
16
 
16
- __all__ = ["LocalExecutor", "SynchronousExecutor", "handle_execution_result"]
17
+ __all__ = ["HPCExecutor", "LocalExecutor", "SynchronousExecutor", "handle_execution_result"]