PyPI - climate-ref - Versions diffs - 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

climate-ref 0.6.5py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

climate_ref/cli/__init__.py +12 -3
climate_ref/cli/_utils.py +56 -2
climate_ref/cli/datasets.py +48 -9
climate_ref/cli/executions.py +351 -24
climate_ref/cli/providers.py +1 -2
climate_ref/config.py +4 -4
climate_ref/database.py +62 -4
climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
climate_ref/dataset_registry/sample_data.txt +269 -107
climate_ref/datasets/__init__.py +3 -3
climate_ref/datasets/base.py +121 -20
climate_ref/datasets/cmip6.py +2 -0
climate_ref/datasets/obs4mips.py +26 -15
climate_ref/executor/__init__.py +8 -1
climate_ref/executor/hpc.py +7 -1
climate_ref/executor/result_handling.py +151 -64
climate_ref/migrations/env.py +12 -10
climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +1 -1
climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +1 -1
climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +108 -0
climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
climate_ref/models/__init__.py +1 -6
climate_ref/models/base.py +4 -18
climate_ref/models/dataset.py +10 -6
climate_ref/models/diagnostic.py +2 -1
climate_ref/models/execution.py +225 -12
climate_ref/models/metric_value.py +27 -112
climate_ref/models/mixins.py +144 -0
climate_ref/models/provider.py +2 -1
climate_ref/provider_registry.py +4 -4
climate_ref/slurm.py +2 -2
climate_ref/testing.py +1 -1
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/METADATA +2 -2
climate_ref-0.7.0.dist-info/RECORD +58 -0
climate_ref-0.6.5.dist-info/RECORD +0 -54
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/WHEEL +0 -0
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/entry_points.txt +0 -0
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/LICENCE +0 -0
{climate_ref-0.6.5.dist-info → climate_ref-0.7.0.dist-info}/licenses/NOTICE +0 -0

climate_ref/datasets/base.py CHANGED Viewed

@@ -2,16 +2,36 @@ from pathlib import Path
 from typing import Any, Protocol, cast
 import pandas as pd
+from attrs import define
 from loguru import logger
 from sqlalchemy.orm import joinedload
 from climate_ref.config import Config
-from climate_ref.database import Database
+from climate_ref.database import Database, ModelState
 from climate_ref.datasets.utils import validate_path
 from climate_ref.models.dataset import Dataset, DatasetFile
 from climate_ref_core.exceptions import RefException
+@define
+class DatasetRegistrationResult:
+    """
+    Result of registering a dataset, containing information about file changes
+    """
+    dataset: Dataset
+    dataset_state: ModelState | None
+    files_added: list[str]
+    files_updated: list[str]
+    files_removed: list[str]
+    files_unchanged: list[str]
+    @property
+    def total_changes(self) -> int:
+        """Total number of file changes (added + updated + removed)"""
+        return len(self.files_added) + len(self.files_updated) + len(self.files_removed)
 def _log_duplicate_metadata(
     data_catalog: pd.DataFrame, unique_metadata: pd.DataFrame, slug_column: str
 ) -> None:
@@ -26,7 +46,8 @@ def _log_duplicate_metadata(
         invalid_dataset_columns = invalid_dataset_nunique[invalid_dataset_nunique.gt(1)].index.tolist()
         # Include time_range in the list of invalid columns to make debugging easier
-        invalid_dataset_columns.append("time_range")
+        if "time_range" in data_catalog.columns and "time_range" not in invalid_dataset_columns:
+            invalid_dataset_columns.append("time_range")
         data_catalog_subset = data_catalog[data_catalog[slug_column] == instance_id]
@@ -169,9 +190,9 @@ class DatasetAdapter(Protocol):
         return data_catalog
-    def register_dataset(
+    def register_dataset(  # noqa: PLR0915
         self, config: Config, db: Database, data_catalog_dataset: pd.DataFrame
-    ) -> Dataset | None:
+    ) -> DatasetRegistrationResult:
         """
         Register a dataset in the database using the data catalog
@@ -187,7 +208,7 @@ class DatasetAdapter(Protocol):
         Returns
         -------
         :
-            Registered dataset if successful, else None
+            Registration result with dataset and file change information
         """
         DatasetModel = self.dataset_cls
@@ -197,24 +218,104 @@ class DatasetAdapter(Protocol):
             raise RefException(f"Found multiple datasets in the same directory: {unique_slugs}")
         slug = unique_slugs[0]
+        # Upsert the dataset (create a new dataset or update the metadata)
         dataset_metadata = data_catalog_dataset[list(self.dataset_specific_metadata)].iloc[0].to_dict()
-        dataset, created = db.get_or_create(DatasetModel, defaults=dataset_metadata, slug=slug)
-        if not created:
-            logger.warning(f"{dataset} already exists in the database. Skipping")
-            return None
+        dataset, dataset_state = db.update_or_create(DatasetModel, defaults=dataset_metadata, slug=slug)
+        if dataset_state == ModelState.CREATED:
+            logger.info(f"Created new dataset: {dataset}")
+        elif dataset_state == ModelState.UPDATED:
+            logger.info(f"Updating existing dataset: {dataset}")
         db.session.flush()
-        for dataset_file in data_catalog_dataset.to_dict(orient="records"):
-            path = validate_path(dataset_file.pop("path"))
-            db.session.add(
-                DatasetFile(
-                    path=str(path),
-                    dataset_id=dataset.id,
-                    start_time=dataset_file.pop("start_time"),
-                    end_time=dataset_file.pop("end_time"),
+        # Initialize result tracking
+        files_added = []
+        files_updated = []
+        files_removed = []
+        files_unchanged = []
+        # Get current files for this dataset
+        current_files = db.session.query(DatasetFile).filter_by(dataset_id=dataset.id).all()
+        current_file_paths = {f.path: f for f in current_files}
+        # Get new file data from data catalog
+        new_file_data = data_catalog_dataset.to_dict(orient="records")
+        new_file_lookup = {}
+        for dataset_file in new_file_data:
+            file_path = str(validate_path(dataset_file["path"]))
+            new_file_lookup[file_path] = {
+                "start_time": dataset_file["start_time"],
+                "end_time": dataset_file["end_time"],
+            }
+        new_file_paths = set(new_file_lookup.keys())
+        existing_file_paths = set(current_file_paths.keys())
+        # TODO: support removing files that are no longer present
+        # We want to keep a record of the dataset if it was used by a diagnostic in the past
+        files_to_remove = existing_file_paths - new_file_paths
+        if files_to_remove:
+            files_removed = list(files_to_remove)
+            logger.warning(f"Files to remove: {files_removed}")
+            raise NotImplementedError("Removing files is not yet supported")
+        # Update existing files if start/end times have changed
+        for file_path, existing_file in current_file_paths.items():
+            if file_path in new_file_lookup:
+                new_times = new_file_lookup[file_path]
+                if (
+                    existing_file.start_time != new_times["start_time"]
+                    or existing_file.end_time != new_times["end_time"]
+                ):
+                    logger.warning(f"Updating file times for {file_path}")
+                    existing_file.start_time = new_times["start_time"]
+                    existing_file.end_time = new_times["end_time"]
+                    files_updated.append(file_path)
+                else:
+                    files_unchanged.append(file_path)
+        # Add new files (batch operation)
+        files_to_add = new_file_paths - existing_file_paths
+        if files_to_add:
+            files_added = list(files_to_add)
+            new_dataset_files = []
+            for file_path in files_to_add:
+                file_times = new_file_lookup[file_path]
+                new_dataset_files.append(
+                    DatasetFile(
+                        path=file_path,
+                        dataset_id=dataset.id,
+                        start_time=file_times["start_time"],
+                        end_time=file_times["end_time"],
+                    )
                 )
-            )
-        return dataset
+            db.session.add_all(new_dataset_files)
+        # Determine final dataset state
+        # If dataset metadata changed, use that state
+        # If no metadata changed but files changed, consider it updated
+        # If nothing changed, keep the original state (None for existing, CREATED for new)
+        final_dataset_state = dataset_state
+        if dataset_state is None and (files_added or files_updated or files_removed):
+            final_dataset_state = ModelState.UPDATED
+        result = DatasetRegistrationResult(
+            dataset=dataset,
+            dataset_state=final_dataset_state,
+            files_added=files_added,
+            files_updated=files_updated,
+            files_removed=files_removed,
+            files_unchanged=files_unchanged,
+        )
+        change_message = f": ({final_dataset_state.name})" if final_dataset_state else ""
+        logger.debug(
+            f"Dataset registration complete for {dataset.slug}{change_message} "
+            f"{len(files_added)} files added, "
+            f"{len(files_updated)} files updated, "
+            f"{len(files_removed)} files removed, "
+            f"{len(files_unchanged)} files unchanged"
+        )
+        return result
     def _get_dataset_files(self, db: Database, limit: int | None = None) -> pd.DataFrame:
         dataset_type = self.dataset_cls.__mapper_args__["polymorphic_identity"]

climate_ref/datasets/cmip6.py CHANGED Viewed

@@ -119,6 +119,8 @@ class CMIP6DatasetAdapter(DatasetAdapter):
     file_specific_metadata = ("start_time", "end_time", "path")
     version_metadata = "version"
+    # See https://wcrp-cmip.github.io/WGCM_Infrastructure_Panel/Papers/CMIP6_global_attributes_filenames_CVs_v6.2.7.pdf
+    # under "Directory structure template"
     dataset_id_metadata = (
         "activity_id",
         "institution_id",

climate_ref/datasets/obs4mips.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import Any
 import pandas as pd
 import xarray as xr
 from ecgtools import Builder
-from ecgtools.parsers.utilities import extract_attr_with_regex  # type: ignore
 from loguru import logger
 from climate_ref.datasets.base import DatasetAdapter
@@ -15,7 +14,7 @@ from climate_ref.datasets.cmip6 import _parse_datetime
 from climate_ref.models.dataset import Dataset, Obs4MIPsDataset
-def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
+def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:  # noqa: PLR0912
     """
     Parser for obs4mips
@@ -41,6 +40,7 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
                 "source_type",
                 "variable_id",
                 "variant_label",
+                "source_version_number",
             }
         )
     )
@@ -48,6 +48,10 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
     try:
         time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
         with xr.open_dataset(file, chunks={}, decode_times=time_coder) as ds:
+            if ds.attrs.get("activity_id", "") != "obs4MIPs":
+                traceback_message = f"{file} is not an obs4MIPs dataset"
+                raise TypeError(traceback_message)
             has_none_value = any(ds.attrs.get(key) is None for key in keys)
             if has_none_value:
                 missing_fields = [key for key in keys if ds.attrs.get(key) is None]
@@ -55,10 +59,6 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
                 raise AttributeError(traceback_message)
             info = {key: ds.attrs.get(key) for key in keys}
-            if info["activity_id"] != "obs4MIPs":
-                traceback_message = f"{file} is not an obs4MIPs dataset"
-                raise TypeError(traceback_message)
             variable_id = info["variable_id"]
             if variable_id:
@@ -86,12 +86,12 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
             else:
                 info["time_range"] = f"{start_time}-{end_time}"
         info["path"] = str(file)
-        info["source_version_number"] = (
-            extract_attr_with_regex(
-                str(file), regex=r"v\d{4}\d{2}\d{2}|v\d{1}", strip_chars=None, ignore_case=True
-            )
-            or "v0"
-        )
+        # Parsing the version like for CMIP6 fails because some obs4REF paths
+        # do not include "v" in the version directory name.
+        # TODO: fix obs4REF paths
+        info["version"] = Path(file).parent.name
+        if not info["version"].startswith("v"):  # type: ignore[union-attr]
+            info["version"] = "v{version}".format(**info)
         return info
     except (TypeError, AttributeError) as err:
@@ -99,7 +99,7 @@ def parse_obs4mips(file: str, **kwargs: Any) -> dict[str, Any]:
             logger.warning(str(err.args[0]))
         else:
             logger.warning(str(err.args))
-        return {"INVALID_ASSET": file, "TRACEBACK": traceback_message}
+        return {"INVALID_ASSET": file, "TRACEBACK": str(err)}
     except Exception:
         logger.warning(traceback.format_exc())
         return {"INVALID_ASSET": file, "TRACEBACK": traceback.format_exc()}
@@ -129,18 +129,22 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
         "variant_label",
         "long_name",
         "units",
+        "version",
         "vertical_levels",
         "source_version_number",
         slug_column,
     )
     file_specific_metadata = ("start_time", "end_time", "path")
-    version_metadata = "source_version_number"
+    version_metadata = "version"
+    # See ODS2.5 at https://doi.org/10.5281/zenodo.11500474 under "Directory structure template"
     dataset_id_metadata = (
         "activity_id",
         "institution_id",
         "source_id",
+        "frequency",
         "variable_id",
+        "nominal_resolution",
         "grid_label",
     )
@@ -186,7 +190,14 @@ class Obs4MIPsDatasetAdapter(DatasetAdapter):
             self.version_metadata,
         ]
         datasets["instance_id"] = datasets.apply(
-            lambda row: "obs4MIPs." + ".".join([row[item] for item in drs_items]), axis=1
+            lambda row: "obs4MIPs."
+            + ".".join(
+                [
+                    row[item].replace(" ", "") if item == "nominal_resolution" else row[item]
+                    for item in drs_items
+                ]
+            ),
+            axis=1,
         )
         datasets["finalised"] = True
         return datasets

climate_ref/executor/__init__.py CHANGED Viewed

@@ -9,7 +9,14 @@ The simplest executor is the `LocalExecutor`, which runs the diagnostic in the s
 This is useful for local testing and debugging.
 """
-from .hpc import HPCExecutor
+from climate_ref_core.exceptions import InvalidExecutorException
+try:
+    from .hpc import HPCExecutor
+except InvalidExecutorException as exc:
+    # This exception is reraised when importing the executor as `climate_ref.executors.HPCExecutor`
+    HPCExecutor = exc  # type: ignore
 from .local import LocalExecutor
 from .result_handling import handle_execution_result
 from .synchronous import SynchronousExecutor

climate_ref/executor/hpc.py CHANGED Viewed

@@ -5,12 +5,18 @@ If you want to
 - run REF under the HPC workflows
 - run REF in multiple nodes
+The `HPCExecutor` requires the optional `parsl` dependency.
+This dependency (and therefore this executor) is not available on Windows.
 """
 try:
     import parsl
 except ImportError:  # pragma: no cover
-    raise ImportError("The HPCExecutor requires the `parsl` package")
+    from climate_ref_core.exceptions import InvalidExecutorException
+    raise InvalidExecutorException(
+        "climate_ref_core.executor.hpc.HPCExecutor", "The HPCExecutor requires the `parsl` package"
+    )
 import os
 import time

climate_ref/executor/result_handling.py CHANGED Viewed

@@ -17,11 +17,12 @@ from loguru import logger
 from sqlalchemy import insert
 from climate_ref.database import Database
-from climate_ref.models import ScalarMetricValue
+from climate_ref.models import ScalarMetricValue, SeriesMetricValue
 from climate_ref.models.execution import Execution, ExecutionOutput, ResultOutputType
 from climate_ref_core.diagnostics import ExecutionResult, ensure_relative_path
 from climate_ref_core.exceptions import ResultValidationError
 from climate_ref_core.logging import EXECUTION_LOG_FILENAME
+from climate_ref_core.metric_values import SeriesMetricValue as TSeries
 from climate_ref_core.pycmec.controlled_vocabulary import CV
 from climate_ref_core.pycmec.metric import CMECMetric
 from climate_ref_core.pycmec.output import CMECOutput, OutputDict
@@ -65,6 +66,113 @@ def _copy_file_to_results(
     shutil.copy(input_directory / filename, output_filename)
+def _process_execution_scalar(
+    database: Database,
+    result: "ExecutionResult",
+    execution: Execution,
+    cv: CV,
+) -> None:
+    """
+    Process the scalar values from the execution result and store them in the database
+    This also validates the scalar values against the controlled vocabulary
+    """
+    # Load the metric bundle from the file
+    cmec_metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
+    # Check that the diagnostic values conform with the controlled vocabulary
+    try:
+        cv.validate_metrics(cmec_metric_bundle)
+    except (ResultValidationError, AssertionError):
+        # TODO: Remove once we have settled on a controlled vocabulary
+        logger.exception("Diagnostic values do not conform with the controlled vocabulary")
+        # execution.mark_failed()
+    # Perform a bulk insert of scalar values
+    # The current implementation will swallow the exception, but display a log message
+    try:
+        scalar_values = [
+            {
+                "execution_id": execution.id,
+                "value": result.value,
+                "attributes": result.attributes,
+                **result.dimensions,
+            }
+            for result in cmec_metric_bundle.iter_results()
+        ]
+        logger.debug(f"Ingesting {len(scalar_values)} scalar values for execution {execution.id}")
+        if scalar_values:
+            # Perform this in a nested transaction to rollback if something goes wrong
+            # We will lose the metric values for a given execution, but not the whole execution
+            with database.session.begin_nested():
+                database.session.execute(
+                    insert(ScalarMetricValue),
+                    scalar_values,
+                )
+    # This is a broad exception catch to ensure we log any issues
+    except Exception:
+        logger.exception("Something went wrong when ingesting diagnostic values")
+def _process_execution_series(
+    config: "Config",
+    database: Database,
+    result: "ExecutionResult",
+    execution: Execution,
+    cv: CV,
+) -> None:
+    """
+    Process the series values from the execution result and store them in the database
+    This also copies the series values file from the scratch directory to the results directory
+    and validates the series values against the controlled vocabulary.
+    """
+    assert result.series_filename, "Series filename must be set in the result"
+    _copy_file_to_results(
+        config.paths.scratch,
+        config.paths.results,
+        execution.output_fragment,
+        result.series_filename,
+    )
+    # Load the series values from the file
+    series_values_path = result.to_output_path(result.series_filename)
+    series_values = TSeries.load_from_json(series_values_path)
+    try:
+        cv.validate_metrics(series_values)
+    except (ResultValidationError, AssertionError):
+        # TODO: Remove once we have settled on a controlled vocabulary
+        logger.exception("Diagnostic values do not conform with the controlled vocabulary")
+        # execution.mark_failed()
+    # Perform a bulk insert of series values
+    try:
+        series_values_content = [
+            {
+                "execution_id": execution.id,
+                "values": series_result.values,
+                "attributes": series_result.attributes,
+                "index": series_result.index,
+                "index_name": series_result.index_name,
+                **series_result.dimensions,
+            }
+            for series_result in series_values
+        ]
+        logger.debug(f"Ingesting {len(series_values)} series values for execution {execution.id}")
+        if series_values:
+            # Perform this in a nested transaction to rollback if something goes wrong
+            # We will lose the metric values for a given execution, but not the whole execution
+            with database.session.begin_nested():
+                database.session.execute(
+                    insert(SeriesMetricValue),
+                    series_values_content,
+                )
+    except Exception:
+        logger.exception("Something went wrong when ingesting diagnostic series values")
 def handle_execution_result(
     config: "Config",
     database: Database,
@@ -88,7 +196,7 @@ def handle_execution_result(
     result
         The result of the diagnostic execution, either successful or failed
     """
-    # Always copy log data
+    # Always copy log data to the results directory
     _copy_file_to_results(
         config.paths.scratch,
         config.paths.results,
@@ -96,74 +204,52 @@ def handle_execution_result(
         EXECUTION_LOG_FILENAME,
     )
-    if result.successful and result.metric_bundle_filename is not None:
-        logger.info(f"{execution} successful")
+    if not result.successful or result.metric_bundle_filename is None:
+        logger.error(f"{execution} failed")
+        execution.mark_failed()
+        return
+    logger.info(f"{execution} successful")
+    _copy_file_to_results(
+        config.paths.scratch,
+        config.paths.results,
+        execution.output_fragment,
+        result.metric_bundle_filename,
+    )
+    if result.output_bundle_filename:
         _copy_file_to_results(
             config.paths.scratch,
             config.paths.results,
             execution.output_fragment,
-            result.metric_bundle_filename,
+            result.output_bundle_filename,
+        )
+        _handle_output_bundle(
+            config,
+            database,
+            execution,
+            result.to_output_path(result.output_bundle_filename),
         )
-        execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
-        if result.output_bundle_filename:
-            _copy_file_to_results(
-                config.paths.scratch,
-                config.paths.results,
-                execution.output_fragment,
-                result.output_bundle_filename,
-            )
-            _handle_output_bundle(
-                config,
-                database,
-                execution,
-                result.to_output_path(result.output_bundle_filename),
-            )
-        cmec_metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
-        # Check that the diagnostic values conform with the controlled vocabulary
-        try:
-            cv = CV.load_from_file(config.paths.dimensions_cv)
-            cv.validate_metrics(cmec_metric_bundle)
-        except (ResultValidationError, AssertionError):
-            logger.exception("Diagnostic values do not conform with the controlled vocabulary")
-            # execution.mark_failed()
-        # Perform a bulk insert of scalar values
-        # The current implementation will swallow the exception, but display a log message
-        try:
-            scalar_values = [
-                {
-                    "execution_id": execution.id,
-                    "value": result.value,
-                    "attributes": result.attributes,
-                    **result.dimensions,
-                }
-                for result in cmec_metric_bundle.iter_results()
-            ]
-            if scalar_values:
-                # Perform this in a nested transaction to rollback if something goes wrong
-                # We will lose the metric values for a given execution, but not the whole execution
-                with database.session.begin_nested():
-                    database.session.execute(
-                        insert(ScalarMetricValue),
-                        scalar_values,
-                    )
-        except Exception:
-            # TODO: Remove once we have settled on a controlled vocabulary
-            logger.exception("Something went wrong when ingesting diagnostic values")
-        # TODO Ingest the series values
-        # TODO: This should check if the result is the most recent for the execution,
-        # if so then update the dirty fields
-        # i.e. if there are outstanding executions don't make as clean
-        execution.execution_group.dirty = False
-    else:
-        logger.error(f"{execution} failed")
-        execution.mark_failed()
+    cv = CV.load_from_file(config.paths.dimensions_cv)
+    if result.series_filename:
+        # Process the series values if they are present
+        # This will ingest the series values into the database
+        _process_execution_series(config=config, database=database, result=result, execution=execution, cv=cv)
+    # Process the scalar values
+    # This will ingest the scalar values into the database
+    _process_execution_scalar(database=database, result=result, execution=execution, cv=cv)
+    # TODO: This should check if the result is the most recent for the execution,
+    # if so then update the dirty fields
+    # i.e. if there are outstanding executions don't make as clean
+    execution.execution_group.dirty = False
+    # Finally, mark the execution as successful
+    execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
 def _handle_output_bundle(
@@ -220,12 +306,13 @@ def _handle_outputs(
             filename,
         )
         database.session.add(
-            ExecutionOutput(
+            ExecutionOutput.build(
                 execution_id=execution.id,
                 output_type=output_type,
                 filename=str(filename),
                 description=output_info.description,
                 short_name=key,
                 long_name=output_info.long_name,
+                dimensions=output_info.dimensions or {},
             )
         )

climate_ref/migrations/env.py CHANGED Viewed

@@ -4,7 +4,10 @@ from sqlalchemy import Connection, inspect
 from climate_ref.config import Config
 from climate_ref.database import Database
-from climate_ref.models import Base, MetricValue
+from climate_ref.models import Base
+from climate_ref.models.execution import ExecutionOutput
+from climate_ref.models.metric_value import MetricValue
+from climate_ref.models.mixins import DimensionMixin
 from climate_ref_core.logging import capture_logging
 from climate_ref_core.pycmec.controlled_vocabulary import CV
@@ -33,7 +36,7 @@ target_metadata = Base.metadata
 # Custom migration functions that are run on every migration
-def _add_metric_value_columns(connection: Connection) -> None:
+def _add_dimension_columns(connection: Connection, table: str, Cls: type[DimensionMixin]) -> None:
     """
     Add any missing columns in the current CV to the database
@@ -44,27 +47,25 @@ def _add_metric_value_columns(connection: Connection) -> None:
     connection
         Open connection to the database
     """
-    metric_value_table = "metric_value"
     inspector = inspect(connection)
     # Check if table already exists
     # Skip if it doesn't
     tables = inspector.get_table_names()
-    if metric_value_table not in tables:
-        logger.warning(f"No table named {metric_value_table!r} found")
+    if table not in tables:
+        logger.warning(f"No table named {table!r} found")
         return
     # Extract the current columns in the DB
-    existing_columns = [c["name"] for c in inspector.get_columns(metric_value_table)]
+    existing_columns = [c["name"] for c in inspector.get_columns(table)]
     cv_file = ref_config.paths.dimensions_cv
     cv = CV.load_from_file(cv_file)
     for dimension in cv.dimensions:
         if dimension.name not in existing_columns:
-            logger.info(f"Adding missing metric value dimension: {dimension.name!r}")
-            op.add_column(metric_value_table, MetricValue.build_dimension_column(dimension))
+            logger.info(f"Adding missing value dimension: {dimension.name!r}")
+            op.add_column(table, Cls.build_dimension_column(dimension))
 def include_object(object_, name: str, type_, reflected, compare_to) -> bool:
@@ -134,7 +135,8 @@ def run_migrations_online() -> None:
             # Set up the Operations context
             # This is needed to alter the tables
             with op.Operations.context(context.get_context()):  # type: ignore
-                _add_metric_value_columns(connection)
+                _add_dimension_columns(connection, "metric_value", MetricValue)
+                _add_dimension_columns(connection, "execution_output", ExecutionOutput)
 if context.is_offline_mode():

climate-ref 0.6.5__py3-none-any.whl → 0.7.0__py3-none-any.whl

climate-ref 0.6.5py3-none-any.whl → 0.7.0py3-none-any.whl