PyPI - climate-ref - Versions diffs - 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

climate-ref 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

climate_ref/cli/__init__.py +20 -6
climate_ref/cli/datasets.py +30 -7
climate_ref/cli/solve.py +38 -3
climate_ref/config.py +10 -1
climate_ref/dataset_registry/obs4ref_reference.txt +44 -13
climate_ref/dataset_registry/sample_data.txt +8 -6
climate_ref/datasets/base.py +62 -4
climate_ref/datasets/cmip6.py +14 -40
climate_ref/datasets/obs4mips.py +11 -54
climate_ref/executor/__init__.py +2 -1
climate_ref/executor/hpc.py +308 -0
climate_ref/executor/local.py +24 -4
climate_ref/executor/result_handling.py +0 -1
climate_ref/slurm.py +192 -0
climate_ref/solver.py +67 -6
climate_ref/testing.py +7 -5
{climate_ref-0.5.5.dist-info → climate_ref-0.6.1.dist-info}/METADATA +3 -2
{climate_ref-0.5.5.dist-info → climate_ref-0.6.1.dist-info}/RECORD +22 -20
{climate_ref-0.5.5.dist-info → climate_ref-0.6.1.dist-info}/WHEEL +0 -0
{climate_ref-0.5.5.dist-info → climate_ref-0.6.1.dist-info}/entry_points.txt +0 -0
{climate_ref-0.5.5.dist-info → climate_ref-0.6.1.dist-info}/licenses/LICENCE +0 -0
{climate_ref-0.5.5.dist-info → climate_ref-0.6.1.dist-info}/licenses/NOTICE +0 -0

climate_ref/cli/__init__.py CHANGED Viewed

@@ -15,7 +15,7 @@ from climate_ref.config import Config
 from climate_ref.constants import CONFIG_FILENAME
 from climate_ref.database import Database
 from climate_ref_core import __version__ as __core_version__
-from climate_ref_core.logging import add_log_handler
+from climate_ref_core.logging import initialise_logging
 class LogLevel(str, Enum):
@@ -112,11 +112,21 @@ app = build_app()
 @app.callback()
 def main(  # noqa: PLR0913
     ctx: typer.Context,
-    configuration_directory: Annotated[Path | None, typer.Option(help="Configuration directory")] = None,
-    verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Set the log level to DEBUG")] = False,
-    quiet: Annotated[bool, typer.Option("--quiet", "-q", help="Set the log level to WARNING")] = False,
+    configuration_directory: Annotated[
+        Path | None,
+        typer.Option(help="Configuration directory"),
+    ] = None,
+    verbose: Annotated[
+        bool,
+        typer.Option("--verbose", "-v", help="Set the log level to DEBUG"),
+    ] = False,
+    quiet: Annotated[
+        bool,
+        typer.Option("--quiet", "-q", help="Set the log level to WARNING"),
+    ] = False,
     log_level: Annotated[
-        LogLevel, typer.Option(case_sensitive=False, help="Set the level of logging information to display")
+        LogLevel,
+        typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
     ] = LogLevel.Info,
     version: Annotated[
         Optional[bool],
@@ -136,11 +146,15 @@ def main(  # noqa: PLR0913
         log_level = LogLevel.Debug
     logger.remove()
-    add_log_handler(level=log_level.value)
     config = _load_config(configuration_directory)
     config.log_level = log_level.value
+    log_format = config.log_format
+    initialise_logging(level=config.log_level, format=log_format, log_directory=config.paths.log)
+    logger.debug(f"Configuration loaded from: {config._config_file!s}")
     ctx.obj = CLIContext(config=config, database=Database.from_config(config))

climate_ref/cli/datasets.py CHANGED Viewed

@@ -34,7 +34,12 @@ def list_(
     ] = SourceDatasetType.CMIP6.value,  # type: ignore
     column: Annotated[list[str] | None, typer.Option()] = None,
     include_files: bool = typer.Option(False, help="Include files in the output"),
-    limit: int = typer.Option(100, help="Limit the number of rows to display"),
+    limit: int = typer.Option(
+        100,
+        help=(
+            "Limit the number of datasets (or files when using --include-files) to display to this number."
+        ),
+    ),
 ) -> None:
     """
     List the datasets that have been ingested
@@ -172,16 +177,28 @@ def _fetch_sample_data(
 @app.command(name="fetch-data")
-def fetch_data(
+def fetch_data(  # noqa: PLR0913
     ctx: typer.Context,
-    registry: Annotated[str, typer.Option(help="Name of the data registry to use")],
+    registry: Annotated[
+        str,
+        typer.Option(help="Name of the data registry to use"),
+    ],
     output_directory: Annotated[
-        Path | None, typer.Option(help="Output directory where files will be saved")
+        Path | None,
+        typer.Option(help="Output directory where files will be saved"),
     ] = None,
-    force_cleanup: Annotated[bool, typer.Option(help="If True, remove any existing files")] = False,
+    force_cleanup: Annotated[
+        bool,
+        typer.Option(help="If True, remove any existing files"),
+    ] = False,
     symlink: Annotated[
-        bool, typer.Option(help="If True, symlink files into the output directory, otherwise perform a copy")
+        bool,
+        typer.Option(help="If True, symlink files into the output directory, otherwise perform a copy"),
     ] = False,
+    verify: Annotated[
+        bool,
+        typer.Option(help="Verify the checksums of the fetched files"),
+    ] = True,
 ) -> None:
     """
     Fetch REF-specific datasets
@@ -206,4 +223,10 @@ def fetch_data(
         logger.error(f"Available registries: {', '.join(dataset_registry_manager.keys())}")
         raise typer.Exit(code=1)
-    fetch_all_files(_registry, registry, output_directory, symlink=symlink)
+    fetch_all_files(
+        _registry,
+        registry,
+        output_directory,
+        symlink=symlink,
+        verify=verify,
+    )

climate_ref/cli/solve.py CHANGED Viewed

@@ -1,14 +1,23 @@
+from typing import Annotated
 import typer
-from climate_ref.solver import solve_required_executions
+from climate_ref.solver import SolveFilterOptions, solve_required_executions
 app = typer.Typer()
 @app.command()
-def solve(
+def solve(  # noqa: PLR0913
     ctx: typer.Context,
-    dry_run: bool = typer.Option(False, help="Do not execute any diagnostics"),
+    dry_run: Annotated[
+        bool,
+        typer.Option(help="Do not execute any diagnostics"),
+    ] = False,
+    execute: Annotated[
+        bool,
+        typer.Option(help="Solve the newly identified executions"),
+    ] = True,
     timeout: int = typer.Option(60, help="Timeout in seconds for the solve operation"),
     one_per_provider: bool = typer.Option(
         False, help="Limit to one execution per provider. This is useful for testing"
@@ -16,6 +25,24 @@ def solve(
     one_per_diagnostic: bool = typer.Option(
         False, help="Limit to one execution per diagnostic. This is useful for testing"
     ),
+    diagnostic: Annotated[
+        list[str] | None,
+        typer.Option(
+            help="Filters executions by the diagnostic slug. "
+            "Diagnostics will be included if any of the filters match a case-insensitive subset "
+            "of the diagnostic slug. "
+            "Multiple values can be provided"
+        ),
+    ] = None,
+    provider: Annotated[
+        list[str] | None,
+        typer.Option(
+            help="Filters executions by provider slug. "
+            "Providers will be included if any of the filters match a case-insensitive subset "
+            "of the provider slug. "
+            "Multiple values can be provided"
+        ),
+    ] = None,
 ) -> None:
     """
     Solve for executions that require recalculation
@@ -25,11 +52,19 @@ def solve(
     """
     config = ctx.obj.config
     db = ctx.obj.database
+    filters = SolveFilterOptions(
+        diagnostic=diagnostic,
+        provider=provider,
+    )
     solve_required_executions(
         config=config,
         db=db,
         dry_run=dry_run,
+        execute=execute,
         timeout=timeout,
         one_per_provider=one_per_provider,
         one_per_diagnostic=one_per_diagnostic,
+        filters=filters,
     )

climate_ref/config.py CHANGED Viewed

@@ -38,6 +38,7 @@ from climate_ref.constants import CONFIG_FILENAME
 from climate_ref_core.env import env
 from climate_ref_core.exceptions import InvalidExecutorException
 from climate_ref_core.executor import Executor, import_executor_cls
+from climate_ref_core.logging import DEFAULT_LOG_FORMAT
 if TYPE_CHECKING:
     from climate_ref.database import Database
@@ -337,10 +338,18 @@ class Config:
     log_level: str = field(default="INFO")
     """
-    Log level of messages that are displayed by the REF
+    Log level of messages that are displayed by the REF via the CLI
     This value is overridden if a value is specified via the CLI.
     """
+    log_format: str = env_field("LOG_FORMAT", default=DEFAULT_LOG_FORMAT)
+    """
+    Format of the log messages that are displayed by the REF via the CLI
+    Examples of the formatting options are available in the
+    [loguru documentation](https://loguru.readthedocs.io/en/stable/api/logger.html#module-loguru._logger).
+    """
     paths: PathConfig = Factory(PathConfig)  # noqa
     db: DbConfig = Factory(DbConfig)  # noqa
     executor: ExecutorConfig = Factory(ExecutorConfig)  # noqa

climate_ref/dataset_registry/obs4ref_reference.txt CHANGED Viewed

@@ -1,16 +1,19 @@
+obs4REF/ARCCSS/LORA-1-1/mon/mrro/gn/20250516/mrro_mon_LORA-1-1_REF_gn_198001-201212.nc md5:4cfbbfa3be9632b14de99b18066fbffe
+obs4REF/CNES/AVISO-1-0/mon/zos/gn/v20210727/zos_mon_AVISO-1-0_PCMDI_gn_199301-201912.nc md5:91252303cb65548fee5ff42dd3024825
+obs4REF/ColumbiaU/WECANN-1-0/mon/gpp/gn/20250516/gpp_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:c8757a92f915e7e270d94bfbf25accf7
+obs4REF/ColumbiaU/WECANN-1-0/mon/hfls/gn/20250516/hfls_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:d99c5879948f10c7fcb2f8e95922898d
+obs4REF/ColumbiaU/WECANN-1-0/mon/hfss/gn/20250516/hfss_mon_WECANN-1-0_REF_gn_200701-201512.nc md5:b7a911e0fc164d07d3ab42a86d09b18b
 obs4REF/ECMWF/ERA-20C/mon/psl/gn/v20210727/psl_mon_ERA-20C_PCMDI_gn_190001-201012.nc md5:c100cf25d5681c375cd6c1ee60b678ba
 obs4REF/ECMWF/ERA-20C/mon/ts/gn/v20210727/ts_mon_ERA-20C_PCMDI_gn_190001-201012.nc md5:9ed8dfbb805ed4caa282ed70f873a3a0
-obs4REF/NOAA-ESRL-PSD/20CR/mon/psl/gn/v20210727/psl_mon_20CR_PCMDI_gn_187101-201212.nc md5:570ce90b3afd1d0b31690ae5dbe32d31
-obs4REF/NOAA-ESRL-PSD/20CR/mon/ts/gn/v20210727/ts_mon_20CR_PCMDI_gn_187101-201212.nc md5:e4890cc19ccc5bac29c6b70f28265ff1
-obs4REF/NOAA-NCEI/CMAP-V1902/mon/pr/gn/v20210727/pr_mon_CMAP-V1902_PCMDI_gn_197901-201901.nc md5:9d943d2dd0645850b616820f246aedf3
-obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc md5:99c8691e0f615dc4d79b4fb5e926cc76
-obs4REF/ESSO/TropFlux-1-0/mon/hfls/gn/v20210727/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:2f05191d6727068e1500d8d4ed90098a
-obs4REF/ESSO/TropFlux-1-0/mon/hfns/gn/v20210727/hfns_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7a9019e51a41d9e4ab1fcfb072d8ca8d
-obs4REF/ESSO/TropFlux-1-0/mon/hfss/gn/v20210727/hfss_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:1da9d8fe862c61bc49c36c18b6527213
-obs4REF/ESSO/TropFlux-1-0/mon/tas/gn/v20210727/tas_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:a6057931b5f6bc000a44514a1a8c891f
-obs4REF/ESSO/TropFlux-1-0/mon/tauu/gn/v20210727/tauu_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7c73a3deed3403fa9d21caef3a4d988d
-obs4REF/ESSO/TropFlux-1-0/mon/tauv/gn/v20210727/tauv_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8abc7a724a7a297826e2f783a4ea14f9
-obs4REF/ESSO/TropFlux-1-0/mon/ts/gn/v20210727/ts_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8697d3d7862f6e3b72bb5a161aa75ee8
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200701-200712.nc md5:695633a2b401cfb66c8addbf58073dbc
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200801-200812.nc md5:404f1e1f111859be06c00bcb8d740ff2
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_200901-200912.nc md5:a1bb8584d60cdd71154c01a692fa1fb4
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201001-201012.nc md5:b78016a3c61d99dc0fd29563aa344ca1
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201101-201112.nc md5:d64c231a7f798a255997ffe196613ea1
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201201-201212.nc md5:7d90ce60b872dc4f044b9b0101114983
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201301-201312.nc md5:2fc032707cb8a31ac60fa4abe9efe183
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201401-201412.nc md5:6022d17e11df7818f5b0429d6e401d17
+obs4REF/ECMWF/ERA-5/mon/ta/gn/v20250220/ta_mon_ERA-5_PCMDI_gn_201501-201512.nc md5:c68fdabf6eeb4813befceace089c9494
 obs4REF/ECMWF/ERA-INT/mon/hfls/gn/v20210727/hfls_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:1ae4587143f05ee81432b3d9960aab63
 obs4REF/ECMWF/ERA-INT/mon/hfss/gn/v20210727/hfss_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:261f02b8cbce18486548882a11f9aa34
 obs4REF/ECMWF/ERA-INT/mon/hur/gn/v20210727/hur_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:56fcd2df8ed2879f18b5e8c78134a148
@@ -31,6 +34,34 @@ obs4REF/ECMWF/ERA-INT/mon/uas/gn/v20210727/uas_mon_ERA-INT_PCMDI_gn_197901-20190
 obs4REF/ECMWF/ERA-INT/mon/va/gn/v20210727/va_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:f67ca168d6cd87bfdd4a911eb72dd022
 obs4REF/ECMWF/ERA-INT/mon/vas/gn/v20210727/vas_mon_ERA-INT_PCMDI_gn_197901-201903.nc md5:ac19b48b897cfe839585df4ff0fc4a7b
 obs4REF/ECMWF/ERA-INT/mon/zg/gn/v20210727/zg_mon_ERA-INT_PCMDI_gn_198901-201001.nc md5:d8fb93f31ff4a6370ccee93db82af86c
-obs4REF/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20210727/pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc md5:0877f014868b83547448f96c3e7c83e9
+obs4REF/ESSO/TropFlux-1-0/mon/hfls/gn/v20210727/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:2f05191d6727068e1500d8d4ed90098a
+obs4REF/ESSO/TropFlux-1-0/mon/hfls/gn/v20250415/hfls_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:e607167a08a2521b65e55eb186182003
+obs4REF/ESSO/TropFlux-1-0/mon/hfns/gn/v20210727/hfns_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7a9019e51a41d9e4ab1fcfb072d8ca8d
+obs4REF/ESSO/TropFlux-1-0/mon/hfss/gn/v20210727/hfss_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:1da9d8fe862c61bc49c36c18b6527213
+obs4REF/ESSO/TropFlux-1-0/mon/hfss/gn/v20250415/hfss_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:6f766ca0332a8e566c408d237571a924
+obs4REF/ESSO/TropFlux-1-0/mon/tas/gn/v20210727/tas_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:a6057931b5f6bc000a44514a1a8c891f
+obs4REF/ESSO/TropFlux-1-0/mon/tas/gn/v20250415/tas_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:527ab1b9becf2a793df558532eccfe69
+obs4REF/ESSO/TropFlux-1-0/mon/tauu/gn/v20210727/tauu_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:7c73a3deed3403fa9d21caef3a4d988d
+obs4REF/ESSO/TropFlux-1-0/mon/tauu/gn/v20250415/tauu_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:0822e2002e61472277116d38e5e19498
+obs4REF/ESSO/TropFlux-1-0/mon/tauv/gn/v20210727/tauv_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8abc7a724a7a297826e2f783a4ea14f9
+obs4REF/ESSO/TropFlux-1-0/mon/ts/gn/v20210727/ts_mon_TropFlux-1-0_PCMDI_gn_197901-201707.nc md5:8697d3d7862f6e3b72bb5a161aa75ee8
+obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20210727/ts_mon_HadISST-1-1_PCMDI_gn_187001-201907.nc md5:99c8691e0f615dc4d79b4fb5e926cc76
+obs4REF/MOHC/HadISST-1-1/mon/ts/gn/v20250415/ts_mon_HadISST-1-1_PCMDI_gn_187001-202501.nc md5:66fb8cdf53ec0e073c565adfa57862b3
 obs4REF/NASA-GSFC/TRMM-3B43v-7/mon/pr/gn/v20210727/pr_mon_TRMM-3B43v-7_PCMDI_gn_199801-201712.nc md5:b80c9989d358656c781be5ea5a44c64c
-obs4REF/CNES/AVISO-1-0/mon/zos/gn/v20210727/zos_mon_AVISO-1-0_PCMDI_gn_199301-201912.nc md5:91252303cb65548fee5ff42dd3024825
+obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rlds/gn/v20230209/rlds_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:897451ed566251135483d1413cd6bee3
+obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rlus/gn/v20230209/rlus_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:750650025845fc89d9e56a3690deea21
+obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rsds/gn/v20230209/rsds_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:5c33068dd11e6eb8d0bf6e2aa0335ef2
+obs4REF/NASA-LaRC/CERES-EBAF-4-2/mon/rsus/gn/v20230209/rsus_mon_CERES-EBAF-4-2_RSS_gn_200003-202309.nc md5:4f67c58186905e995a8b9497a49ecbf0
+obs4REF/NOAA-ESRL-PSD/20CR/mon/psl/gn/v20210727/psl_mon_20CR_PCMDI_gn_187101-201212.nc md5:570ce90b3afd1d0b31690ae5dbe32d31
+obs4REF/NOAA-ESRL-PSD/20CR/mon/ts/gn/v20210727/ts_mon_20CR_PCMDI_gn_187101-201212.nc md5:e4890cc19ccc5bac29c6b70f28265ff1
+obs4REF/NOAA-NCEI/CMAP-V1902/mon/pr/gn/v20210727/pr_mon_CMAP-V1902_PCMDI_gn_197901-201901.nc md5:9d943d2dd0645850b616820f246aedf3
+obs4REF/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20210727/pr_mon_GPCP-2-3_PCMDI_gn_197901-201907.nc md5:0877f014868b83547448f96c3e7c83e9
+obs4REF/NOAA-NCEI/GPCP-2-3/mon/pr/gn/v20231205/pr_mon_GPCP-Monthly-3-2_RSS_gn_198301-202303.nc md5:6970c22443e2097c45de5db8947318eb
+obs4REF/NOAA-NCEI/WOA2023/mon/no3/gn/20250516/no3_mon_WOA2023_REF_gn_201501-202212.nc md5:ffd2b0c1b1f35f5176ba26c43478c9d2
+obs4REF/NOAA-NCEI/WOA2023/mon/o2/gn/20250516/o2_mon_WOA2023_REF_gn_201501-202212.nc md5:1a41d343329730d357a6a21b59f201c7
+obs4REF/NOAA-NCEI/WOA2023/mon/po4/gn/20250516/po4_mon_WOA2023_REF_gn_201501-202212.nc md5:ff65f5c81f3775af49be3e22305d4979
+obs4REF/NOAA-NCEI/WOA2023/mon/so/gn/20250516/so_mon_WOA2023_REF_gn_201501-202212.nc md5:0da9596667c3b6bbc5388e420cd24a3b
+obs4REF/NOAA-NCEI/WOA2023/mon/thetao/gn/20250516/thetao_mon_WOA2023_REF_gn_201501-202212.nc md5:e3bb4f9b387191571459dc49520632ee
+obs4REF/NOC/RAPID/mon/msftmz/NA/20250516/msftmz_mon_RAPID_REF_NA_200404-202302.nc md5:e9ee555d923d39112dcbf591440000b3
+obs4REF/UCI-ORNL/Hoffman/yr/fgco2/gm/20250516/fgco2_yr_Hoffman_REF_gm_185007-201007.nc md5:93419dd752e0ae20e69b7db3f8ea9f5b
+obs4REF/UCI-ORNL/Hoffman/yr/nbp/gm/20250516/nbp_yr_Hoffman_REF_gm_185007-201007.nc md5:1c0b93c81e2608c668ec39c45ca828de

climate_ref/dataset_registry/sample_data.txt CHANGED Viewed

@@ -23,22 +23,20 @@ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_A
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b3148af8bee4c3562607aeaec9d81d94abad366c85421dd55ef222516fd3f7e0
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsutcs/gn/v20191115/rsutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 9f336ed549c02bf31f73f4679b850463ca8edc4db7afb04b31acd643bc3b51c8
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 4c74c74031fbc88cb0df70826e0aea63a582af5f5a714603a0ae70b3ff834097
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tauu/gn/v20191115/tauu_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 2a1eb8768134189addd7541975ce39d75406d530ba68b2c1286f9f1d8fd14f17
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/ts/gn/v20191115/ts_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b3b8e7675287471fd27f3de1f3ecada278eb7ea384da24031d0f3a949e04f757
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Emon/cSoil/gn/v20191115/cSoil_Emon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc d78d9bc532d79a50a0e40645f91572d61a92881aea43552061baea0e77353777
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/gpp/gn/v20191115/gpp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 41a43edaa1ff7385232f8fbf67fcc551eb77e51ba6cd75b95b5f0ee71af19544
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/lai/gn/v20191115/lai_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 4e6e1530da9030cfb7c207543369c636f339ee3c59c4621e22405494dc436755
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrro/gn/v20191115/mrro_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 3b9c010af44ca894bb23a55903fdc57dcd5af8a4e22ea1e98b41b18274720fd9
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrsos/gn/v20191115/mrsos_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc ca21d4f5341114cbc217d7c30f89c4375b02ea0a2b723d9cb069db6a9c08b08f
-CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 476b4ebacf38ddb78d44975b01d859e9ca3ed7955b92adc4bd098591867e56b0
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc a400042a848e855335de99e3ae8f5f2a900476db953ad70e6562fc4b8beb0ffc
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/areacello/gn/v20191115/areacello_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc fe5adde7cfe6d80462ad649fe68c2aa983ba700d96cff5ac830b1d7e7b09ca6f
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/sftof/gn/v20191115/sftof_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 89106ad204fb1ac9f3b25f4b93d6fcf9e95db5f1f4466b9baafcb426bad24e4f
-CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-197912.nc 9016163abd89155710291dc541a4146778f18a320435a2ee618f0da1424b0338
-CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_198001-198912.nc 97094c32cb3e8f019ad87145d348307c2a57ba9b0acf0cc5c59978213060e8d4
-CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_199001-199912.nc be682038248b0d79ea9e0402a2a173bd5c054dfdd8cc0932f6cb85d925670889
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200912.nc 266c386ddfbe9556404afadef1e778a64b5f4c5dd746c54a1beee3e5fbc9954b
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_201001-201412.nc 21fa3a223baced12da8a62ad406ba8126851c3e36589184fba2bb1c8b269c6e3
-CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc a757237973708d28f2333542fc34ed9543a15256c7f38d0139fcfe66bdb8d95c
-CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 701fa01ac2c50b622c3454bb3d69a54fed1c92b66f9194c09148b2349aa11342
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc da3a4b261779bd8477a6e067c7e7b611d9438e74d76cff225a44a8dacedc3fd4
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc b10be33023c7fdc77c7550ea1bfc6976e354841e6ba6aeaece6eec4b3308d99e
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/SImon/siconc/gn/v20200817/siconc_SImon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 384b45a0f88678ffaf0c9701a0fc0175fb0c319d7d94d0b3c4334939d653e51d
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/v20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 6750d4a2432842cb9342ef99b8ecb3569e8ff3dcbbf020f7f9f43a6f7af42f06
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/sftlf/gn/v20191115/sftlf_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 08d84ba3cf02a2481b76611dfa1abe25cbf76326003eebc4eb00c99b32fea19a
@@ -55,6 +53,10 @@ CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_
 CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_Amon_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn_189001-190912.nc ae9193e06aec8a7f63f2f7ec63fec9d25b2c1dc5de2bbe7065428c722090f45c
 CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/Amon/tas/gn/v20190815/tas_Amon_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn_191001-191512.nc 2113b7d4a781c7c28de55b0ffe99c42b49692d113f5109e7a4d0214211a9eb1c
 CMIP6/CMIP/MPI-M/MPI-ESM1-2-LR/esm-piControl/r1i1p1f1/fx/areacella/gn/v20190815/areacella_fx_MPI-ESM1-2-LR_esm-piControl_r1i1p1f1_gn.nc 2f5595635a566d5b4a889468f687aabf99ca5cdadb11f6a770d05d09775bfabb
+CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/ImonAnt/snc/gn/v20190308/snc_ImonAnt_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 5a88cd5ed6ed33f9b24fd4bf942b5e427cb3512584a38030b07feb158c7a78c2
+CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/ImonGre/snc/gn/v20190308/snc_ImonGre_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 5f6b6ccb2450eeeebc3b24d3d76cf306894094efa2f6ee53403be89595f4455c
+CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/LImon/snc/gn/v20190308/snc_LImon_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 27005c5952a981c27c17ad8d169d01c852e2daee3236adf1e170b6b96aafcfff
+CMIP6/CMIP/NCAR/CESM2/historical/r1i1p1f1/Lmon/burntFractionAll/gn/v20190308/burntFractionAll_Lmon_CESM2_historical_r1i1p1f1_gn_200001-201412.nc 61fbd29b05a5bdfa11001265c3960b5147e641148eff2a55cc027aa56db7263c
 CMIP6/DAMIP/CSIRO/ACCESS-ESM1-5/hist-GHG/r1i1p1f1/Amon/psl/gn/v20200615/psl_Amon_ACCESS-ESM1-5_hist-GHG_r1i1p1f1_gn_200001-202012.nc 5fed6b48717fcf508e8b5936be304db0346362c2182c5d13a141afe7c982021a
 CMIP6/DAMIP/CSIRO/ACCESS-ESM1-5/hist-GHG/r1i1p1f1/Amon/ts/gn/v20200615/ts_Amon_ACCESS-ESM1-5_hist-GHG_r1i1p1f1_gn_200001-202012.nc aa98fa93ef29ea266a94d87876b4ae08917caf77e38c5df1eae1ec98a5f559cb
 CMIP6/DAMIP/CSIRO/ACCESS-ESM1-5/hist-GHG/r1i1p1f1/fx/areacella/gn/v20200615/areacella_fx_ACCESS-ESM1-5_hist-GHG_r1i1p1f1_gn.nc 7179f75ff1754e9666b10f8c5508040bbe9fdac0c8117a9231ba5977d4889f27

climate_ref/datasets/base.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from pathlib import Path
-from typing import Protocol
+from typing import Protocol, cast
 import pandas as pd
 from loguru import logger
@@ -44,14 +44,49 @@ class DatasetAdapter(Protocol):
     dataset_cls: type[Dataset]
     slug_column: str
+    """
+    The column in the data catalog that contains the dataset slug.
+    The dataset slug is a unique identifier for the dataset that includes the version of the dataset.
+    This can be used to group files together that belong to the same dataset.
+    """
     dataset_specific_metadata: tuple[str, ...]
     file_specific_metadata: tuple[str, ...] = ()
+    version_metadata: str = "version"
+    """
+    The column in the data catalog that contains the version of the dataset.
+    """
+    dataset_id_metadata: tuple[str, ...] = ()
+    """
+    The group of metadata columns that are specific to the dataset excluding the version information.
+    Each unique dataset should have the same values for these columns.
+    This is generally the columns that describe the `slug` of a dataset,
+    excluding the version information.
+    """
     def pretty_subset(self, data_catalog: pd.DataFrame) -> pd.DataFrame:
         """
         Get a subset of the data_catalog to pretty print
+        Parameters
+        ----------
+        data_catalog
+            Data catalog to subset
+        Returns
+        -------
+        :
+            Subset of the data catalog to pretty print
         """
-        ...
+        return data_catalog[
+            [
+                *self.dataset_id_metadata,
+                self.version_metadata,
+            ]
+        ]
     def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
         """
@@ -202,6 +237,8 @@ class DatasetAdapter(Protocol):
         Iterating over different datasets within the data catalog can be done using a `groupby`
         operation for the `instance_id` column.
+        Only the latest version of each dataset is returned.
         The index of the data catalog is the primary key of the dataset.
         This should be maintained during any processing.
@@ -213,6 +250,27 @@ class DatasetAdapter(Protocol):
         with db.session.begin():
             # TODO: Paginate this query to avoid loading all the data at once
             if include_files:
-                return self._get_dataset_files(db, limit)
+                catalog = self._get_dataset_files(db, limit)
             else:
-                return self._get_datasets(db, limit)
+                catalog = self._get_datasets(db, limit)
+        def _get_latest_version(dataset_catalog: pd.DataFrame) -> pd.DataFrame:
+            """
+            Get the latest version of each dataset based on the version metadata.
+            This assumes that the version can be sorted lexicographically.
+            """
+            latest_version = dataset_catalog[self.version_metadata].max()
+            return cast(
+                pd.DataFrame, dataset_catalog[dataset_catalog[self.version_metadata] == latest_version]
+            )
+        # If there are no datasets, return an empty DataFrame
+        if catalog.empty:
+            return pd.DataFrame(columns=self.dataset_specific_metadata + self.file_specific_metadata)
+        # Group by the dataset ID and get the latest version for each dataset
+        return catalog.groupby(
+            list(self.dataset_id_metadata), group_keys=False, as_index=False, sort=False
+        ).apply(_get_latest_version)

climate_ref/datasets/cmip6.py CHANGED Viewed

@@ -196,40 +196,21 @@ class CMIP6DatasetAdapter(DatasetAdapter):
     file_specific_metadata = ("start_time", "end_time", "path")
+    version_metadata = "version"
+    dataset_id_metadata = (
+        "activity_id",
+        "institution_id",
+        "source_id",
+        "experiment_id",
+        "member_id",
+        "table_id",
+        "variable_id",
+        "grid_label",
+    )
     def __init__(self, n_jobs: int = 1):
         self.n_jobs = n_jobs
-    def pretty_subset(self, data_catalog: pd.DataFrame) -> pd.DataFrame:
-        """
-        Get a subset of the data_catalog to pretty print
-        This is particularly useful for CMIP6 datasets, which have a lot of metadata columns.
-        Parameters
-        ----------
-        data_catalog
-            Data catalog to subset
-        Returns
-        -------
-        :
-            Subset of the data catalog to pretty print
-        """
-        return data_catalog[
-            [
-                "activity_id",
-                "institution_id",
-                "source_id",
-                "experiment_id",
-                "member_id",
-                "table_id",
-                "variable_id",
-                "grid_label",
-                "version",
-            ]
-        ]
     def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
         """
         Generate a data catalog from the specified file or directory
@@ -266,15 +247,8 @@ class CMIP6DatasetAdapter(DatasetAdapter):
         datasets["end_time"] = _parse_datetime(datasets["end_time"])
         drs_items = [
-            "activity_id",
-            "institution_id",
-            "source_id",
-            "experiment_id",
-            "member_id",
-            "table_id",
-            "variable_id",
-            "grid_label",
-            "version",
+            *self.dataset_id_metadata,
+            self.version_metadata,
         ]
         datasets["instance_id"] = datasets.apply(
             lambda row: "CMIP6." + ".".join([row[item] for item in drs_items]), axis=1

climate_ref/datasets/obs4mips.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
-import re
 import traceback
 from pathlib import Path
 from typing import Any
@@ -8,6 +7,7 @@ from typing import Any
 import pandas as pd
 import xarray as xr
 from ecgtools import Builder
+from ecgtools.parsers.utilities import extract_attr_with_regex  # type: ignore
 from loguru import logger
 from climate_ref.datasets.base import DatasetAdapter
@@ -15,25 +15,6 @@ from climate_ref.datasets.cmip6 import _parse_datetime
 from climate_ref.models.dataset import Dataset, Obs4MIPsDataset
-def extract_attr_with_regex(
-    input_str: str, regex: str, strip_chars: str | None, ignore_case: bool
-) -> list[Any] | None:
-    """
-    Extract version information from attribute with regular expressions.
-    """
-    if ignore_case:
-        pattern = re.compile(regex, re.IGNORECASE)
-    else:
-        pattern = re.compile(regex)
-    match = re.findall(pattern, input_str)
-    if match:
-        matchstr = max(match, key=len)
-        match = matchstr.strip(strip_chars) if strip_chars else matchstr.strip()
-        return match
-    else:
-        return None
 def parse_obs4mips(file: str) -> dict[str, Any | None]:
     """Parser for obs4mips"""
     keys = sorted(
@@ -144,38 +125,18 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
     )
     file_specific_metadata = ("start_time", "end_time", "path")
+    version_metadata = "source_version_number"
+    dataset_id_metadata = (
+        "activity_id",
+        "institution_id",
+        "source_id",
+        "variable_id",
+        "grid_label",
+    )
     def __init__(self, n_jobs: int = 1):
         self.n_jobs = n_jobs
-    def pretty_subset(self, data_catalog: pd.DataFrame) -> pd.DataFrame:
-        """
-        Get a subset of the data_catalog to pretty print
-        This is particularly useful for obs4MIPs datasets, which have a lot of metadata columns.
-        Parameters
-        ----------
-        data_catalog
-            Data catalog to subset
-        Returns
-        -------
-        :
-            Subset of the data catalog to pretty print
-        """
-        return data_catalog[
-            [
-                "activity_id",
-                "institution_id",
-                "source_id",
-                "variable_id",
-                "grid_label",
-                "source_version_number",
-            ]
-        ]
     def find_local_datasets(self, file_or_directory: Path) -> pd.DataFrame:
         """
         Generate a data catalog from the specified file or directory
@@ -211,12 +172,8 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
         datasets["end_time"] = _parse_datetime(datasets["end_time"])
         drs_items = [
-            "activity_id",
-            "institution_id",
-            "source_id",
-            "variable_id",
-            "grid_label",
-            "source_version_number",
+            *self.dataset_id_metadata,
+            self.version_metadata,
         ]
         datasets["instance_id"] = datasets.apply(
             lambda row: "obs4MIPs." + ".".join([row[item] for item in drs_items]), axis=1

climate_ref/executor/__init__.py CHANGED Viewed

@@ -9,8 +9,9 @@ The simplest executor is the `LocalExecutor`, which runs the diagnostic in the s
 This is useful for local testing and debugging.
 """
+from .hpc import HPCExecutor
 from .local import LocalExecutor
 from .result_handling import handle_execution_result
 from .synchronous import SynchronousExecutor
-__all__ = ["LocalExecutor", "SynchronousExecutor", "handle_execution_result"]
+__all__ = ["HPCExecutor", "LocalExecutor", "SynchronousExecutor", "handle_execution_result"]

climate-ref 0.5.5__py3-none-any.whl → 0.6.1__py3-none-any.whl

climate-ref 0.5.5py3-none-any.whl → 0.6.1py3-none-any.whl