PyPI - climate-ref - Versions diffs - 0.7.0__tar.gz → 0.8.0__tar.gz - Mend

climate-ref 0.7.0tar.gz → 0.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (95) hide show

{climate_ref-0.7.0 → climate_ref-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: climate-ref
-Version: 0.7.0
+Version: 0.8.0
 Summary: Application which runs the CMIP Rapid Evaluation Framework
 Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
 License-Expression: Apache-2.0

{climate_ref-0.7.0 → climate_ref-0.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "climate-ref"
-version = "0.7.0"
+version = "0.8.0"
 description = "Application which runs the CMIP Rapid Evaluation Framework"
 readme = "README.md"
 authors = [

{climate_ref-0.7.0 → climate_ref-0.8.0}/src/climate_ref/cli/datasets.py RENAMED Viewed

@@ -6,8 +6,6 @@ which executions are required for a given diagnostic without having to re-parse
 """
-import errno
-import os
 import shutil
 from collections.abc import Iterable
 from pathlib import Path
@@ -133,7 +131,7 @@ def ingest(  # noqa
         if not _dir.exists():
             logger.error(f"File or directory {_dir} does not exist")
-            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
+            continue
         # TODO: This assumes that all datasets are nc files.
         # THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.

{climate_ref-0.7.0 → climate_ref-0.8.0}/src/climate_ref/config.py RENAMED Viewed

@@ -14,11 +14,14 @@ which always take precedence over any other configuration values.
 # `esgpull` configuration management system with some of the extra complexity removed.
 # https://github.com/ESGF/esgf-download/blob/main/esgpull/config.py
+import datetime
 import importlib.resources
 import os
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal
+import platformdirs
+import requests
 import tomlkit
 from attr import Factory
 from attrs import define, field
@@ -334,6 +337,46 @@ def _load_config(config_file: str | Path, doc: dict[str, Any]) -> "Config":
     return _converter_defaults_relaxed.structure(doc, Config)
+DEFAULT_IGNORE_DATASETS_MAX_AGE = datetime.timedelta(hours=6)
+DEFAULT_IGNORE_DATASETS_URL = (
+    "https://raw.githubusercontent.com/Climate-REF/climate-ref/refs/heads/main/default_ignore_datasets.yaml"
+)
+def _get_default_ignore_datasets_file() -> Path:
+    """
+    Get the path to the ignore datasets file
+    """
+    cache_dir = platformdirs.user_cache_path("climate_ref")
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    ignore_datasets_file = cache_dir / "default_ignore_datasets.yaml"
+    download = True
+    if ignore_datasets_file.exists():
+        # Only update if the ignore datasets file is older than `DEFAULT_IGNORE_DATASETS_MAX_AGE`.
+        modification_time = datetime.datetime.fromtimestamp(ignore_datasets_file.stat().st_mtime)
+        age = datetime.datetime.now() - modification_time
+        if age < DEFAULT_IGNORE_DATASETS_MAX_AGE:
+            download = False
+    if download:
+        logger.info(
+            f"Downloading default ignore datasets file from {DEFAULT_IGNORE_DATASETS_URL} "
+            f"to {ignore_datasets_file}"
+        )
+        response = requests.get(DEFAULT_IGNORE_DATASETS_URL, timeout=120)
+        try:
+            response.raise_for_status()
+        except requests.RequestException as exc:
+            logger.warning(f"Failed to download default ignore datasets file: {exc}")
+            ignore_datasets_file.touch(exist_ok=True)
+        else:
+            with ignore_datasets_file.open(mode="wb") as file:
+                file.write(response.content)
+    return ignore_datasets_file
 @define(auto_attribs=True)
 class Config:
     """
@@ -364,6 +407,26 @@ class Config:
     - `complete`: Use the complete parser, which parses the dataset based on all available metadata.
     """
+    ignore_datasets_file: Path = field(factory=_get_default_ignore_datasets_file)
+    """
+    Path to the file containing the ignore datasets
+    This file is a YAML file that contains a list of facets to ignore per diagnostic.
+    The format is:
+    ```yaml
+    provider:
+      diagnostic:
+        source_type:
+          - facet: value
+          - another_facet: [another_value1, another_value2]
+    ```
+    If this is not specified, a default ignore datasets file will be used.
+    The default file is downloaded from the Climate-REF GitHub repository
+    if it does not exist or is older than 6 hours.
+    """
     paths: PathConfig = Factory(PathConfig)
     db: DbConfig = Factory(DbConfig)
     executor: ExecutorConfig = Factory(ExecutorConfig)

{climate_ref-0.7.0 → climate_ref-0.8.0}/src/climate_ref/executor/hpc.py RENAMED Viewed

@@ -19,8 +19,9 @@ except ImportError:  # pragma: no cover
     )
 import os
+import re
 import time
-from typing import Any
+from typing import Annotated, Any, Literal
 import parsl
 from loguru import logger
@@ -29,6 +30,7 @@ from parsl.config import Config as ParslConfig
 from parsl.executors import HighThroughputExecutor
 from parsl.launchers import SimpleLauncher, SrunLauncher
 from parsl.providers import SlurmProvider
+from pydantic import BaseModel, Field, StrictBool, field_validator, model_validator
 from tqdm import tqdm
 from climate_ref.config import Config
@@ -43,6 +45,72 @@ from .local import ExecutionFuture, process_result
 from .pbs_scheduler import SmartPBSProvider
+class SlurmConfig(BaseModel):
+    """Slurm Configurations"""
+    scheduler: Literal["slurm"]
+    account: str
+    username: str
+    partition: str | None = None
+    log_dir: str = "runinfo"
+    qos: str | None = None
+    req_nodes: Annotated[int, Field(strict=True, ge=1, le=1000)] = 1
+    cores_per_worker: Annotated[int, Field(strict=True, ge=1, le=1000)] = 1
+    mem_per_worker: Annotated[float, Field(strict=True, gt=0, lt=1000.0)] | None = None
+    max_workers_per_node: Annotated[int, Field(strict=True, ge=1, le=1000)] = 16
+    validation: StrictBool = False
+    walltime: str = "00:30:00"
+    scheduler_options: str = ""
+    retries: Annotated[int, Field(strict=True, ge=1, le=3)] = 2
+    max_blocks: Annotated[int, Field(strict=True, ge=1)] = 1  # one block mean one job?
+    worker_init: str = ""
+    overrides: str = ""
+    cmd_timeout: Annotated[int, Field(strict=True, ge=0)] = 120
+    cpu_affinity: str = "none"
+    @model_validator(mode="before")
+    def _check_parition_qos(cls, data: Any) -> Any:
+        if not ("partition" in data or "qos" in data):
+            raise ValueError("partition or qos is needed")
+        return data
+    @field_validator("scheduler_options")
+    def _validate_sbatch_syntax(cls, v: str | None) -> Any:
+        if not v:
+            return v
+        sbatch_pattern = re.compile(
+            r"^\s*#SBATCH\s+"  # Start with #SBATCH
+            r"(?:-\w+\s+[^\s]+"  # Option-value pairs
+            r"(?:\s+-\w+\s+[^\s]+)*)"  # Additional options
+            r"\s*$",
+            re.IGNORECASE | re.MULTILINE,
+        )
+        invalid_lines = [
+            line
+            for line in v.split("\n")
+            if not (line.strip().upper().startswith("#SBATCH") and sbatch_pattern.match(line.strip()))
+        ]
+        if invalid_lines:
+            error_msg = (
+                "Invalid SBATCH directives:\n"
+                + "\n".join(invalid_lines)
+                + "\n"
+                + "Expected format: '#SBATCH -option value [-option value ...]'"
+            )
+            raise ValueError(error_msg)
+        return v
+    @field_validator("walltime")
+    def _validate_walltime(cls, v: str) -> str:
+        pattern = r"^(\d+-)?\d{1,5}:[0-5][0-9]:[0-5][0-9]$"
+        if not re.match(pattern, v):
+            raise ValueError("Walltime must be in `D-HH:MM:SS/HH:MM:SS` format")
+        return v
 @python_app
 def _process_run(definition: ExecutionDefinition, log_level: str) -> ExecutionResult:
     """Run the function on computer nodes"""
@@ -112,13 +180,18 @@ class HPCExecutor:
         self.cores_per_worker = _to_int(executor_config.get("cores_per_worker"))
         self.mem_per_worker = _to_float(executor_config.get("mem_per_worker"))
-        hours, minutes, seconds = map(int, self.walltime.split(":"))
+        if self.scheduler == "slurm":
+            self.slurm_config = SlurmConfig.model_validate(executor_config)
+            hours, minutes, seconds = map(int, self.slurm_config.walltime.split(":"))
+            if self.slurm_config.validation and HAS_REAL_SLURM:
+                self._validate_slurm_params()
+        else:
+            hours, minutes, seconds = map(int, self.walltime.split(":"))
         total_minutes = hours * 60 + minutes + seconds / 60
         self.total_minutes = total_minutes
-        if executor_config.get("validation") and HAS_REAL_SLURM:
-            self._validate_slurm_params()
         self._initialize_parsl()
         self.parsl_results: list[ExecutionFuture] = []
@@ -131,45 +204,52 @@ class HPCExecutor:
             ValueError: If account, partition or QOS are invalid or inaccessible.
         """
         slurm_checker = SlurmChecker()
-        if self.account and not slurm_checker.get_account_info(self.account):
-            raise ValueError(f"Account: {self.account} not valid")
+        if self.slurm_config.account and not slurm_checker.get_account_info(self.slurm_config.account):
+            raise ValueError(f"Account: {self.slurm_config.account} not valid")
         partition_limits = None
         node_info = None
-        if self.partition:
-            if not slurm_checker.get_partition_info(self.partition):
-                raise ValueError(f"Partition: {self.partition} not valid")
+        if self.slurm_config.partition:
+            if not slurm_checker.get_partition_info(self.slurm_config.partition):
+                raise ValueError(f"Partition: {self.slurm_config.partition} not valid")
-            if not slurm_checker.can_account_use_partition(self.account, self.partition):
-                raise ValueError(f"Account: {self.account} cannot access partiton: {self.partition}")
+            if not slurm_checker.can_account_use_partition(
+                self.slurm_config.account, self.slurm_config.partition
+            ):
+                raise ValueError(
+                    f"Account: {self.slurm_config.account}"
+                    f" cannot access partiton: {self.slurm_config.partition}"
+                )
-            partition_limits = slurm_checker.get_partition_limits(self.partition)
-            node_info = slurm_checker.get_node_from_partition(self.partition)
+            partition_limits = slurm_checker.get_partition_limits(self.slurm_config.partition)
+            node_info = slurm_checker.get_node_from_partition(self.slurm_config.partition)
         qos_limits = None
-        if self.qos:
-            if not slurm_checker.get_qos_info(self.qos):
-                raise ValueError(f"QOS: {self.qos} not valid")
+        if self.slurm_config.qos:
+            if not slurm_checker.get_qos_info(self.slurm_config.qos):
+                raise ValueError(f"QOS: {self.slurm_config.qos} not valid")
-            if not slurm_checker.can_account_use_qos(self.account, self.qos):
-                raise ValueError(f"Account: {self.account} cannot access qos: {self.qos}")
+            if not slurm_checker.can_account_use_qos(self.slurm_config.account, self.slurm_config.qos):
+                raise ValueError(
+                    f"Account: {self.slurm_config.account} cannot access qos: {self.slurm_config.qos}"
+                )
-            qos_limits = slurm_checker.get_qos_limits(self.qos)
+            qos_limits = slurm_checker.get_qos_limits(self.slurm_config.qos)
         max_cores_per_node = int(node_info["cpus"]) if node_info else None
-        if max_cores_per_node and self.cores_per_worker:
-            if self.cores_per_worker > max_cores_per_node:
+        if max_cores_per_node and self.slurm_config.cores_per_worker:
+            if self.slurm_config.cores_per_worker > max_cores_per_node:
                 raise ValueError(
-                    f"cores_per_work:{self.cores_per_worker}"
+                    f"cores_per_work:{self.slurm_config.cores_per_worker}"
                     f"larger than the maximum in a node {max_cores_per_node}"
                 )
         max_mem_per_node = float(node_info["real_memory"]) if node_info else None
-        if max_mem_per_node and self.mem_per_worker:
-            if self.mem_per_worker > max_mem_per_node:
+        if max_mem_per_node and self.slurm_config.mem_per_worker:
+            if self.slurm_config.mem_per_worker > max_mem_per_node:
                 raise ValueError(
-                    f"mem_per_work:{self.mem_per_worker}"
+                    f"mem_per_work:{self.slurm_config.mem_per_worker}"
                     f"larger than the maximum mem in a node {max_mem_per_node}"
                 )
@@ -182,8 +262,8 @@ class HPCExecutor:
         if self.total_minutes > float(max_walltime_minutes):
             raise ValueError(
-                f"Walltime: {self.walltime} exceed the maximum time "
-                f"{max_walltime_minutes} allowed by {self.partition} and {self.qos}"
+                f"Walltime: {self.slurm_config.walltime} exceed the maximum time "
+                f"{max_walltime_minutes} allowed by {self.slurm_config.partition} and {self.slurm_config.qos}"
             )
     def _initialize_parsl(self) -> None:
@@ -192,19 +272,34 @@ class HPCExecutor:
         provider: SlurmProvider | SmartPBSProvider
         if self.scheduler == "slurm":
             provider = SlurmProvider(
-                account=self.account,
-                partition=self.partition,
-                qos=self.qos,
-                nodes_per_block=self.req_nodes,
-                max_blocks=int(executor_config.get("max_blocks", 1)),
-                scheduler_options=executor_config.get("scheduler_options", "#SBATCH -C cpu"),
-                worker_init=executor_config.get("worker_init", "source .venv/bin/activate"),
+                account=self.slurm_config.account,
+                partition=self.slurm_config.partition,
+                qos=self.slurm_config.qos,
+                nodes_per_block=self.slurm_config.req_nodes,
+                max_blocks=self.slurm_config.max_blocks,
+                scheduler_options=self.slurm_config.scheduler_options,
+                worker_init=self.slurm_config.worker_init,
                 launcher=SrunLauncher(
                     debug=True,
-                    overrides=executor_config.get("overrides", ""),
+                    overrides=self.slurm_config.overrides,
                 ),
-                walltime=self.walltime,
-                cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
+                walltime=self.slurm_config.walltime,
+                cmd_timeout=self.slurm_config.cmd_timeout,
+            )
+            executor = HighThroughputExecutor(
+                label="ref_hpc_executor",
+                cores_per_worker=self.slurm_config.cores_per_worker,
+                mem_per_worker=self.slurm_config.mem_per_worker,
+                max_workers_per_node=self.slurm_config.max_workers_per_node,
+                cpu_affinity=self.slurm_config.cpu_affinity,
+                provider=provider,
+            )
+            hpc_config = ParslConfig(
+                run_dir=self.slurm_config.log_dir,
+                executors=[executor],
+                retries=self.slurm_config.retries,
             )
         elif self.scheduler == "pbs":
@@ -227,23 +322,24 @@ class HPCExecutor:
                 walltime=self.walltime,
                 cmd_timeout=int(executor_config.get("cmd_timeout", 120)),
             )
-        else:
-            raise ValueError(f"Unsupported scheduler: {self.scheduler}")
-        executor = HighThroughputExecutor(
-            label="ref_hpc_executor",
-            cores_per_worker=self.cores_per_worker if self.cores_per_worker else 1,
-            mem_per_worker=self.mem_per_worker,
-            max_workers_per_node=_to_int(executor_config.get("max_workers_per_node", 16)),
-            cpu_affinity=str(executor_config.get("cpu_affinity")),
-            provider=provider,
-        )
+            executor = HighThroughputExecutor(
+                label="ref_hpc_executor",
+                cores_per_worker=self.cores_per_worker if self.cores_per_worker else 1,
+                mem_per_worker=self.mem_per_worker,
+                max_workers_per_node=_to_int(executor_config.get("max_workers_per_node", 16)),
+                cpu_affinity=str(executor_config.get("cpu_affinity")),
+                provider=provider,
+            )
-        hpc_config = ParslConfig(
-            run_dir=self.log_dir,
-            executors=[executor],
-            retries=int(executor_config.get("retries", 2)),
-        )
+            hpc_config = ParslConfig(
+                run_dir=self.log_dir,
+                executors=[executor],
+                retries=int(executor_config.get("retries", 2)),
+            )
+        else:
+            raise ValueError(f"Unsupported scheduler: {self.scheduler}")
         parsl.load(hpc_config)

{climate_ref-0.7.0 → climate_ref-0.8.0}/src/climate_ref/executor/local.py RENAMED Viewed

@@ -88,8 +88,7 @@ def _process_run(definition: ExecutionDefinition, log_level: str) -> ExecutionRe
     except Exception:  # pragma: no cover
         # This isn't expected but if it happens we want to log the error before the process exits
         logger.exception("Error running diagnostic")
-        # This will kill the process pool
-        raise
+        return ExecutionResult.build_from_failure(definition)
 class LocalExecutor:

{climate_ref-0.7.0 → climate_ref-0.8.0}/src/climate_ref/executor/result_handling.py RENAMED Viewed

@@ -197,12 +197,19 @@ def handle_execution_result(
         The result of the diagnostic execution, either successful or failed
     """
     # Always copy log data to the results directory
-    _copy_file_to_results(
-        config.paths.scratch,
-        config.paths.results,
-        execution.output_fragment,
-        EXECUTION_LOG_FILENAME,
-    )
+    try:
+        _copy_file_to_results(
+            config.paths.scratch,
+            config.paths.results,
+            execution.output_fragment,
+            EXECUTION_LOG_FILENAME,
+        )
+    except FileNotFoundError:
+        logger.error(
+            f"Could not find log file {EXECUTION_LOG_FILENAME} in scratch directory: {config.paths.scratch}"
+        )
+        execution.mark_failed()
+        return
     if not result.successful or result.metric_bundle_filename is None:
         logger.error(f"{execution} failed")

{climate_ref-0.7.0 → climate_ref-0.8.0}/src/climate_ref/solver.py RENAMED Viewed

@@ -353,7 +353,7 @@ class ExecutionSolver:
                 yield from solve_executions(self.data_catalog, diagnostic, provider)
-def solve_required_executions(  # noqa: PLR0913
+def solve_required_executions(  # noqa: PLR0912, PLR0913
     db: Database,
     dry_run: bool = False,
     execute: bool = True,
@@ -396,7 +396,14 @@ def solve_required_executions(  # noqa: PLR0913
             f"for {potential_execution.diagnostic.full_slug()}"
         )
+        if potential_execution.provider.slug not in provider_count:
+            provider_count[potential_execution.provider.slug] = 0
+        if potential_execution.diagnostic.full_slug() not in diagnostic_count:
+            diagnostic_count[potential_execution.diagnostic.full_slug()] = 0
         if dry_run:
+            provider_count[potential_execution.provider.slug] += 1
+            diagnostic_count[potential_execution.diagnostic.full_slug()] += 1
             continue
         # Use a transaction to make sure that the models
@@ -421,11 +428,6 @@ def solve_required_executions(  # noqa: PLR0913
                 },
             )
-            if diagnostic.provider.slug not in provider_count:
-                provider_count[diagnostic.provider.slug] = 0
-            if diagnostic.full_slug() not in diagnostic_count:
-                diagnostic_count[diagnostic.full_slug()] = 0
             if created:
                 logger.info(f"Created new execution group: {potential_execution.execution_slug()!r}")
                 db.session.flush()
@@ -471,5 +473,14 @@ def solve_required_executions(  # noqa: PLR0913
                 provider_count[diagnostic.provider.slug] += 1
                 diagnostic_count[diagnostic.full_slug()] += 1
+    logger.info("Solve complete")
+    logger.info(f"Found {sum(diagnostic_count.values())} new executions")
+    for diag, count in diagnostic_count.items():
+        logger.info(f"  {diag}: {count} new executions")
+    for prov, count in provider_count.items():
+        logger.info(f"  {prov}: {count} new executions")
     if timeout > 0:
         executor.join(timeout=timeout)
+        logger.info("All executions complete")

{climate_ref-0.7.0 → climate_ref-0.8.0}/tests/unit/cli/test_datasets.py RENAMED Viewed

@@ -142,11 +142,9 @@ class TestIngest:
                 "--source-type",
                 "cmip6",
             ],
-            expected_exit_code=1,
         )
-        assert isinstance(result.exception, FileNotFoundError)
-        assert result.exception.filename == sample_data_dir / "missing"
+        # Continues past the missing directory
         assert f"File or directory {sample_data_dir / 'missing'} does not exist" in result.stderr
     def test_ingest_dryrun(self, sample_data_dir, db, invoke_cli):

{climate_ref-0.7.0 → climate_ref-0.8.0}/tests/unit/executor/test_hpc_executor.py RENAMED Viewed

@@ -4,8 +4,9 @@ from unittest.mock import MagicMock, patch
 import parsl
 import pytest
 from parsl.dataflow import futures
+from pydantic import ValidationError
-from climate_ref.executor.hpc import HPCExecutor, execute_locally
+from climate_ref.executor.hpc import HPCExecutor, SlurmConfig, execute_locally
 from climate_ref.executor.local import ExecutionFuture
 from climate_ref_core.diagnostics import ExecutionResult
 from climate_ref_core.exceptions import DiagnosticError
@@ -27,14 +28,26 @@ def test_execute_locally_failed(definition_factory, mock_diagnostic):
 class TestHPCExecutor:
-    def test_is_executor(self, tmp_path):
-        executor = HPCExecutor(log_dir=tmp_path / "parsl_runinfo")
+    @pytest.fixture
+    def base_config(self, tmp_path):
+        """Shared config dictionary for all tests in this class."""
+        return {
+            "scheduler": "slurm",
+            "account": "myaccount",
+            "username": "myname",
+            "qos": "myqos",
+            "partition": "mypartition",
+            "log_dir": str(tmp_path / "parsl_runinfo"),
+        }
+    def test_is_executor(self, base_config):
+        executor = HPCExecutor(**base_config)
         assert executor.name == "hpc"
         assert isinstance(executor, Executor)
         parsl.dfk().cleanup()
-    def test_run_metric(self, metric_definition, provider, mock_diagnostic, mocker, caplog, tmp_path):
+    def test_run_metric(self, metric_definition, provider, mock_diagnostic, mocker, caplog, base_config):
         with patch.object(HPCExecutor, "run", autospec=True) as mock_run:
             # Configure the mock to behave similarly to the original
             mock_run.side_effect = lambda self, definition, execution=None: (
@@ -47,7 +60,7 @@ class TestHPCExecutor:
                 )
             )
-            executor = HPCExecutor(log_dir=tmp_path / "parsl_runinfo")
+            executor = HPCExecutor(**base_config)
             # shall have the SerializationError, but not raised
             executor.run(metric_definition, None)
@@ -57,8 +70,8 @@ class TestHPCExecutor:
         parsl.dfk().cleanup()
-    def test_join(self, metric_definition, tmp_path):
-        executor = HPCExecutor(log_dir=tmp_path / "parsl_runinfo")
+    def test_join(self, metric_definition, base_config):
+        executor = HPCExecutor(**base_config)
         future = futures.AppFuture(1)
         executor.parsl_results = [ExecutionFuture(future, definition=metric_definition, execution_id=None)]
@@ -74,8 +87,8 @@ class TestHPCExecutor:
         assert len(executor.parsl_results) == 0
-    def test_join_diagnostic_exception(self, metric_definition, tmp_path):
-        executor = HPCExecutor(log_dir=tmp_path / "parsl_runinfo")
+    def test_join_diagnostic_exception(self, metric_definition, base_config):
+        executor = HPCExecutor(**base_config)
         future = futures.AppFuture(1)
         executor.parsl_results = [ExecutionFuture(future, definition=metric_definition, execution_id=None)]
@@ -99,8 +112,8 @@ class TestHPCExecutor:
         )
         assert len(executor.parsl_results) == 0
-    def test_join_other_exception(self, metric_definition, tmp_path):
-        executor = HPCExecutor(log_dir=tmp_path / "parsl_runinfo")
+    def test_join_other_exception(self, metric_definition, base_config):
+        executor = HPCExecutor(**base_config)
         future = futures.AppFuture(1)
         executor.parsl_results = [ExecutionFuture(future, definition=metric_definition, execution_id=None)]
@@ -108,3 +121,55 @@ class TestHPCExecutor:
         with pytest.raises(AssertionError, match=re.escape("Execution result should not be None")):
             executor.join(0.1)
+    @pytest.mark.parametrize(
+        "field_name, invalid_value",
+        [
+            ("scheduler", "pbs"),
+            ("account", 1234),
+            ("username", 0.001),
+            ("log_dir", True),
+            ("qos", 1234),
+            ("req_nodes", 1001),
+            ("cores_per_worker", 1001),
+            ("mem_per_worker", -1),
+            ("max_workers_per_node", 1001),
+            ("validation", "true"),
+            ("walltime", "3"),
+            ("scheduler_options", 10),
+            ("scheduler_options", "#SABTCH -C cpu\n$LLLL -C"),
+            ("retries", "2"),
+            ("max_blocks", "1"),
+            ("worker_init", 1),
+            ("overrides", 0),
+            ("cmd_timeout", -1),
+            ("cpu_affinity", 1),
+        ],
+    )
+    def test_hpc_slurm_error_config(self, field_name, invalid_value):
+        slurm_cfg_dict = {
+            "scheduler": "slurm",
+            "qos": "myqos",
+            "account": "myaccount",
+            "username": "myname",
+            "req_nodes": 3,
+            "scheduler_options": "#SBATCH -C cpu",
+        }
+        slurm_cfg_dict[field_name] = invalid_value
+        with pytest.raises(ValidationError):
+            SlurmConfig.model_validate(slurm_cfg_dict)
+    @pytest.mark.parametrize(
+        "missing_config",
+        [
+            ["scheduler"],
+            ["account"],
+            ["username"],
+            ["partition", "qos"],
+        ],
+    )
+    def test_hpc_slurm_missing_required_config(self, missing_config, base_config):
+        slurm_cfg_dict = base_config
+        [slurm_cfg_dict.pop(m) for m in missing_config]
+        with pytest.raises(ValidationError):
+            SlurmConfig.model_validate(slurm_cfg_dict)

{climate_ref-0.7.0 → climate_ref-0.8.0}/tests/unit/test_config.py RENAMED Viewed

@@ -1,13 +1,23 @@
 import importlib.metadata
 import logging
 import sys
+from datetime import timedelta
 from pathlib import Path
+import platformdirs
 import pytest
+import requests
 from attr import evolve
 from cattrs import IterableValidationError
-from climate_ref.config import DEFAULT_LOG_FORMAT, Config, PathConfig, transform_error
+import climate_ref.config
+from climate_ref.config import (
+    DEFAULT_LOG_FORMAT,
+    Config,
+    PathConfig,
+    _get_default_ignore_datasets_file,
+    transform_error,
+)
 from climate_ref_core.exceptions import InvalidExecutorException
 from climate_ref_core.executor import Executor
@@ -141,6 +151,9 @@ filename = "sqlite://climate_ref.db"
         without_defaults = cfg.dump(defaults=False)
         assert without_defaults == {
+            "ignore_datasets_file": str(
+                platformdirs.user_cache_path("climate_ref") / "default_ignore_datasets.yaml"
+            ),
             "log_level": "INFO",
             "log_format": DEFAULT_LOG_FORMAT,
             "cmip6_parser": "complete",
@@ -149,6 +162,9 @@ filename = "sqlite://climate_ref.db"
             ],
         }
         assert with_defaults == {
+            "ignore_datasets_file": str(
+                platformdirs.user_cache_path("climate_ref") / "default_ignore_datasets.yaml"
+            ),
             "log_level": "INFO",
             "log_format": DEFAULT_LOG_FORMAT,
             "cmip6_parser": "complete",
@@ -245,3 +261,38 @@ def test_transform_error():
     err = IterableValidationError("Validation error", [ValueError("Test error"), KeyError()], Config)
     assert transform_error(err, "test") == ["invalid value @ test", "required field missing @ test"]
+@pytest.mark.parametrize("status", ["fresh", "stale", "missing"])
+def test_get_default_ignore_datasets_file(mocker, tmp_path, status):
+    mocker.patch.object(climate_ref.config.platformdirs, "user_cache_path", return_value=tmp_path)
+    mocker.patch.object(
+        climate_ref.config.requests,
+        "get",
+        return_value=mocker.MagicMock(status_code=200, content=b"downloaded"),
+    )
+    expected_path = tmp_path / "default_ignore_datasets.yaml"
+    if status != "missing":
+        expected_path.write_text("existing", encoding="utf-8")
+    if status == "stale":
+        mocker.patch.object(climate_ref.config, "DEFAULT_IGNORE_DATASETS_MAX_AGE", timedelta(seconds=-1))
+    path = climate_ref.config._get_default_ignore_datasets_file()
+    assert path == tmp_path / "default_ignore_datasets.yaml"
+    if status == "fresh":
+        assert path.read_text(encoding="utf-8") == "existing"
+    else:
+        assert path.read_text(encoding="utf-8") == "downloaded"
+def test_get_default_ignore_datasets_file_fail(mocker, tmp_path):
+    mocker.patch.object(climate_ref.config.platformdirs, "user_cache_path", return_value=tmp_path)
+    result = mocker.MagicMock(status_code=404, content=b"{}")
+    result.raise_for_status.side_effect = requests.RequestException
+    mocker.patch.object(climate_ref.config.requests, "get", return_value=result)
+    path = _get_default_ignore_datasets_file()
+    assert path == tmp_path / "default_ignore_datasets.yaml"
+    assert path.parent.exists()
+    assert path.read_text(encoding="utf-8") == ""