PyPI - climate-ref - Versions diffs - 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

climate-ref 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

climate_ref/cli/__init__.py +18 -8
climate_ref/cli/datasets.py +31 -27
climate_ref/cli/executions.py +1 -1
climate_ref/cli/providers.py +2 -4
climate_ref/cli/solve.py +1 -2
climate_ref/config.py +50 -19
climate_ref/constants.py +1 -1
climate_ref/database.py +1 -0
climate_ref/dataset_registry/sample_data.txt +14 -0
climate_ref/datasets/base.py +43 -39
climate_ref/executor/__init__.py +4 -262
climate_ref/executor/local.py +170 -37
climate_ref/executor/result_handling.py +231 -0
climate_ref/executor/synchronous.py +62 -0
climate_ref/migrations/env.py +5 -0
climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -21
climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +57 -0
climate_ref/models/__init__.py +3 -1
climate_ref/models/base.py +2 -0
climate_ref/models/metric_value.py +138 -13
climate_ref/provider_registry.py +1 -1
climate_ref/solver.py +18 -30
climate_ref/testing.py +11 -7
{climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/METADATA +12 -6
climate_ref-0.5.2.dist-info/RECORD +47 -0
climate_ref-0.5.0.dist-info/RECORD +0 -44
{climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/WHEEL +0 -0
{climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/entry_points.txt +0 -0
{climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/licenses/LICENCE +0 -0
{climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/licenses/NOTICE +0 -0

climate_ref/cli/__init__.py CHANGED Viewed

@@ -12,7 +12,7 @@ from loguru import logger
 from climate_ref import __version__
 from climate_ref.cli import config, datasets, executions, providers, solve
 from climate_ref.config import Config
-from climate_ref.constants import config_filename
+from climate_ref.constants import CONFIG_FILENAME
 from climate_ref.database import Database
 from climate_ref_core import __version__ as __core_version__
 from climate_ref_core.logging import add_log_handler
@@ -23,7 +23,8 @@ class LogLevel(str, Enum):
     Log levels for the CLI
     """
-    Normal = "WARNING"
+    Error = "ERROR"
+    Warning = "WARNING"
     Debug = "DEBUG"
     Info = "INFO"
@@ -65,7 +66,7 @@ def _load_config(configuration_directory: Path | None = None) -> Config:
     """
     try:
         if configuration_directory:
-            config = Config.load(configuration_directory / config_filename, allow_missing=False)
+            config = Config.load(configuration_directory / CONFIG_FILENAME, allow_missing=False)
         else:
             config = Config.default()
     except FileNotFoundError:
@@ -109,19 +110,28 @@ app = build_app()
 @app.callback()
-def main(
+def main(  # noqa: PLR0913
     ctx: typer.Context,
     configuration_directory: Annotated[Path | None, typer.Option(help="Configuration directory")] = None,
-    verbose: Annotated[bool, typer.Option("--verbose", "-v")] = False,
-    log_level: Annotated[LogLevel, typer.Option(case_sensitive=False)] = LogLevel.Normal,
+    verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Set the log level to DEBUG")] = False,
+    quiet: Annotated[bool, typer.Option("--quiet", "-q", help="Set the log level to WARNING")] = False,
+    log_level: Annotated[
+        LogLevel, typer.Option(case_sensitive=False, help="Set the level of logging information to display")
+    ] = LogLevel.Info,
     version: Annotated[
         Optional[bool],
-        typer.Option("--version", callback=_version_callback, is_eager=True),
+        typer.Option(
+            "--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
+        ),
     ] = None,
 ) -> None:
     """
-    climate_ref: A CLI for the CMIP Rapid Evaluation Framework
+    climate_ref: A CLI for the Assessment Fast Track Rapid Evaluation Framework
+    This CLI provides a number of commands for managing and executing diagnostics.
     """
+    if quiet:
+        log_level = LogLevel.Warning
     if verbose:
         log_level = LogLevel.Debug

climate_ref/cli/datasets.py CHANGED Viewed

@@ -90,7 +90,7 @@ def list_columns(
 @app.command()
 def ingest(  # noqa: PLR0913
     ctx: typer.Context,
-    file_or_directory: Path,
+    file_or_directory: list[Path],
     source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
     solve: Annotated[bool, typer.Option(help="Solve for new diagnostic executions after ingestion")] = False,
     dry_run: Annotated[bool, typer.Option(help="Do not ingest datasets into the database")] = False,
@@ -107,40 +107,44 @@ def ingest(  # noqa: PLR0913
     config = ctx.obj.config
     db = ctx.obj.database
-    file_or_directory = Path(file_or_directory).expanduser()
-    logger.info(f"ingesting {file_or_directory}")
     kwargs = {}
     if n_jobs is not None:
         kwargs["n_jobs"] = n_jobs
+    # Create a data catalog from the specified file or directory
     adapter = get_dataset_adapter(source_type.value, **kwargs)
-    # Create a data catalog from the specified file or directory
-    if not file_or_directory.exists():
-        logger.error(f"File or directory {file_or_directory} does not exist")
-        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), file_or_directory)
-    data_catalog = adapter.find_local_datasets(file_or_directory)
-    data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
-    logger.info(
-        f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
-    )
-    pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
-    for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
-        logger.info(f"Processing dataset {instance_id}")
-        if dry_run:
-            dataset = db.session.query(Dataset).filter_by(slug=instance_id, dataset_type=source_type).first()
-            if not dataset:
-                logger.info(f"Would save dataset {instance_id} to the database")
-                continue
-        else:
+    for _dir in file_or_directory:
+        _dir = Path(_dir).expanduser()
+        logger.info(f"Ingesting {_dir}")
+        if not _dir.exists():
+            logger.error(f"File or directory {_dir} does not exist")
+            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
+        data_catalog = adapter.find_local_datasets(_dir)
+        data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
+        logger.info(
+            f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
+        )
+        pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
+        for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
+            logger.debug(f"Processing dataset {instance_id}")
             with db.session.begin():
-                adapter.register_dataset(config, db, data_catalog_dataset)
+                if dry_run:
+                    dataset = (
+                        db.session.query(Dataset)
+                        .filter_by(slug=instance_id, dataset_type=source_type)
+                        .first()
+                    )
+                    if not dataset:
+                        logger.info(f"Would save dataset {instance_id} to the database")
+                        continue
+                else:
+                    adapter.register_dataset(config, db, data_catalog_dataset)
     if solve:
         solve_required_executions(

climate_ref/cli/executions.py CHANGED Viewed

@@ -20,7 +20,7 @@ from climate_ref.cli._utils import df_to_table, pretty_print_df
 from climate_ref.config import Config
 from climate_ref.models import Execution, ExecutionGroup
 from climate_ref.models.execution import get_execution_group_and_latest
-from climate_ref_core.executor import EXECUTION_LOG_FILENAME
+from climate_ref_core.logging import EXECUTION_LOG_FILENAME
 app = typer.Typer(help=__doc__)
 console = Console()

climate_ref/cli/providers.py CHANGED Viewed

@@ -24,8 +24,7 @@ def list_(ctx: typer.Context) -> None:
     """
     config = ctx.obj.config
     db = ctx.obj.database
-    with db.session.begin():
-        provider_registry = ProviderRegistry.build_from_config(config, db)
+    provider_registry = ProviderRegistry.build_from_config(config, db)
     def get_env(provider: DiagnosticProvider) -> str:
         env = ""
@@ -61,8 +60,7 @@ def create_env(
     """
     config = ctx.obj.config
     db = ctx.obj.database
-    with db.session.begin():
-        providers = ProviderRegistry.build_from_config(config, db).providers
+    providers = ProviderRegistry.build_from_config(config, db).providers
     if provider is not None:
         available = ", ".join([f'"{p.slug}"' for p in providers])

climate_ref/cli/solve.py CHANGED Viewed

@@ -19,5 +19,4 @@ def solve(
     """
     config = ctx.obj.config
     db = ctx.obj.database
-    with ctx.obj.database.session.begin():
-        solve_required_executions(config=config, db=db, dry_run=dry_run, timeout=timeout)
+    solve_required_executions(config=config, db=db, dry_run=dry_run, timeout=timeout)

climate_ref/config.py CHANGED Viewed

@@ -34,11 +34,10 @@ from climate_ref._config_helpers import (
     env_field,
     transform_error,
 )
-from climate_ref.constants import config_filename
-from climate_ref.executor import import_executor_cls
+from climate_ref.constants import CONFIG_FILENAME
 from climate_ref_core.env import env
 from climate_ref_core.exceptions import InvalidExecutorException
-from climate_ref_core.executor import Executor
+from climate_ref_core.executor import Executor, import_executor_cls
 if TYPE_CHECKING:
     from climate_ref.database import Database
@@ -75,11 +74,13 @@ class PathConfig:
     /// admonition | Warning
         type: warning
-    These paths must be common across all systems that the REF is being run
+    These paths must be common across all systems that the REF is being run.
+    Generally, this means that they should be mounted in the same location on all systems.
     ///
     If any of these paths are specified as relative paths,
     they will be resolved to absolute paths.
+    These absolute paths will be used for all operations in the REF.
     """
     log: Path = env_field(name="LOG_ROOT", converter=ensure_absolute_path)
@@ -156,14 +157,14 @@ class ExecutorConfig:
     Configuration to define the executor to use for running diagnostics
     """
-    executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.local.LocalExecutor")
+    executor: str = env_field(name="EXECUTOR", default="climate_ref.executor.LocalExecutor")
     """
-    Executor to use for running diagnostics
+    Executor class to use for running diagnostics
     This should be the fully qualified name of the executor class
-    (e.g. `climate_ref.executor.local.LocalExecutor`).
-    The default is to use the local executor.
-    The environment variable `REF_EXECUTOR` takes precedence over this configuration value.
+    (e.g. `climate_ref.executor.LocalExecutor`).
+    The default is to use the local executor which runs the executions locally, in-parallel
+    using a process pool.
     This class will be used for all executions of diagnostics.
     """
@@ -173,6 +174,7 @@ class ExecutorConfig:
     Additional configuration for the executor.
     See the documentation for the executor for the available configuration options.
+    These options will be passed to the executor class when it is created.
     """
     def build(self, config: "Config", database: "Database") -> Executor:
@@ -200,7 +202,30 @@ class ExecutorConfig:
 @define
 class DiagnosticProviderConfig:
     """
-    Configuration for the diagnostic providers
+    Defining the diagnostic providers used by the REF.
+    Each diagnostic provider is a package that contains the logic for running a specific
+    set of diagnostics.
+    This configuration determines which diagnostic providers are loaded and used when solving.
+    Multiple diagnostic providers can be specified as shown in the example below.
+    ```toml
+    [[diagnostic_providers]]
+    provider = "climate_ref_esmvaltool.provider"
+    [diagnostic_providers.config]
+    [[diagnostic_providers]]
+    provider = "climate_ref_ilamb.provider"
+    [diagnostic_providers.config]
+    [[diagnostic_providers]]
+    provider = "climate_ref_pmp.provider"
+    [diagnostic_providers.config]
+    ```
     """
     provider: str
@@ -225,21 +250,29 @@ class DbConfig:
     """
     Database configuration
-    We currently only plan to support SQLite and PostgreSQL databases,
-    although only SQLite is currently implemented and tested.
+    We support SQLite and PostgreSQL databases.
+    The default is to use SQLite, which is a file-based database that is stored in the
+    `REF_CONFIGURATION` directory.
+    This is a good option for testing and development, but not recommended for production use.
+    For production use, we recommend using PostgreSQL.
     """
     database_url: str = env_field(name="DATABASE_URL")
     """
     Database URL that describes the connection to the database.
-    Defaults to sqlite:///{config.paths.db}/climate_ref.db".
+    Defaults to `sqlite:///{config.paths.db}/climate_ref.db`.
     This configuration value will be overridden by the `REF_DATABASE_URL` environment variable.
-    ## Schemas
+    **Schemas**
+    The following schemas are supported:
+    ```
     postgresql://USER:PASSWORD@HOST:PORT/NAME
     sqlite:///RELATIVE_PATH or sqlite:////ABS_PATH or sqlite:///:memory:
+    ```
     """
     run_migrations: bool = field(default=True)
@@ -286,12 +319,10 @@ def _load_config(config_file: str | Path, doc: dict[str, Any]) -> "Config":
     return _converter_defaults_relaxed.structure(doc, Config)
-@define
+@define(auto_attribs=True)
 class Config:
     """
-    REF configuration
-    This class is used to store the configuration of the REF application.
+    Configuration that is used by the REF
     """
     log_level: str = field(default="INFO")
@@ -405,7 +436,7 @@ class Config:
             The default configuration
         """
         root = env.path("REF_CONFIGURATION")
-        path_to_load = root / config_filename
+        path_to_load = root / CONFIG_FILENAME
         logger.debug(f"Loading default configuration from {path_to_load}")
         return cls.load(path_to_load)

climate_ref/constants.py CHANGED Viewed

@@ -2,7 +2,7 @@
 Constants used by the REF
 """
-config_filename = "ref.toml"
+CONFIG_FILENAME = "ref.toml"
 """
 Default name of the configuration file
 """

climate_ref/database.py CHANGED Viewed

@@ -106,6 +106,7 @@ class Database:
         logger.info(f"Connecting to database at {url}")
         self.url = url
         self._engine = sqlalchemy.create_engine(self.url)
+        # TODO: Set autobegin=False
         self.session = Session(self._engine)
     def alembic_config(self, config: "Config") -> AlembicConfig:

climate_ref/dataset_registry/sample_data.txt CHANGED Viewed

@@ -18,12 +18,26 @@ CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/abrupt-4xCO2/r1i1p1f1/fx/areacella/gn/v20191115/a
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/pr/gn/v20191115/pr_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc acc821dc400f53166379d2e23095bc2690d7ca7db6c7a6f88ae29a8771b3c65a
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/psl/gn/v20191115/psl_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc b63a3d4051cf17568df808836b189826da580ca8e1db949b1e93a71c80756c8d
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlut/gn/v20191115/rlut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 44a3c90a41744101afb00344f50947fe46444fe5d6bd3623c0c19aa02a378c86
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rlutcs/gn/v20191115/rlutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 50b54337ffcf581236929c4eb904bc8240f848a5fa646de75129ed5cbddbbc23
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsdt/gn/v20191115/rsdt_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc a4e1fc3a4a4d00c2fa18ec616338426eb3d91165db3bc57e565ffdc8d6bd9d34
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsut/gn/v20191115/rsut_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 8d492ef1f2bb654220fe64977d9942a33af0962ee9afa4017dcc75b6f0103015
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/rsutcs/gn/v20191115/rsutcs_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200501-201412.nc 8bae5124b8fe5040853e9b02f2942ee4c81cfd972b314f2828c04a9965804357
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/tas/gn/v20191115/tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_185001-201412.nc 38e055e57aea5a9ae76ed3fc5325be6783b5694a9edc28aafd24dd462b32e5ce
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Amon/ts/gn/v20191115/ts_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc e02530449c92e0ffc72e9edeba57f5d38ab8652a28486c1c2b9ddada1f38fbd9
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Emon/cSoil/gn/v20191115/cSoil_Emon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc bebda54ca0518630e90b79585910fc38b7edfe118ecf1cf4cb4a8de0950a911e
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/gpp/gn/v20191115/gpp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc da36ed1653f7aafe40a4fc9b99004a46cb45231697ce6b3413dfc171980c37df
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/lai/gn/v20191115/lai_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 972c11880af2cf06c2e1489968b1ac4829d8b00afd7586499814c3ddcfd95ed2
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrro/gn/v20191115/mrro_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc aaaf89f7a1dedf79e8cda71ab345a6809cfb698a63dcc638ccf7a316d13e6920
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/mrsos/gn/v20191115/mrsos_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 4b78059c4f899a48ae6f3a3cf68e95d76e3603044744521c4aadd992dec93995
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Lmon/nbp/gn/v20191115/nbp_Lmon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 75e559842e9081ea7563f2590d2c1e8a22af72abc78e37a012b9d56da532569e
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/areacello/gn/v20191115/areacello_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 6808b64c7328bd118537bfb7cfd35748b4e84cae3f6a5586403aa9d8040e4d0b
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Ofx/sftof/gn/v20191115/sftof_Ofx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 9bc037566546b8a65d063c4e8225b43b56151856f5a8adde5992f44c85b7c727
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-197912.nc f068351200f0afb451a0e39e13d5e3ddeb48b1f1812b97a1a786d802659c969c
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_198001-198912.nc 703b495bf2effa5cae369893e2868ae1f38b69510366404e236a4605e6560ae6
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_199001-199912.nc 79a7ef5b61962da66abd14598412ad4f1ba0c9b531e7ecaf5a6190e0a4f9589c
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200912.nc 001b3e7c639cae224e4de4f14601f492bec7aeb028cd02e9f07812c1db05abb7
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/msftmz/gn/v20191115/msftmz_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_201001-201412.nc 6c6cc432feb33552d643b2a57d0552ac563ec5a90ad462b6daeacdf3e7a9158f
+CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/sos/gn/v20191115/sos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 92384dba703a6575d696bf8c04337c3a1d4f538bcd9ca28bf61ab058f8038b30
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/Omon/tos/gn/v20191115/tos_Omon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-201412.nc 94121a2233aff78ef8799c5d59b6b6f3e7d3f2fb7ceb3a4a1645943ef3e88040
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/SImon/siconc/gn/v20200817/siconc_SImon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_197901-201412.nc 886c62a24797e893fe11b8de4d16c8a277bdee931b692d533f2fb3fa39820aa1
 CMIP6/CMIP/CSIRO/ACCESS-ESM1-5/historical/r1i1p1f1/fx/areacella/gn/v20191115/areacella_fx_ACCESS-ESM1-5_historical_r1i1p1f1_gn.nc 4587a7b0110a226a805f681ee9fe456d20ec310302b2c120334c21595e4e96cb

climate_ref/datasets/base.py CHANGED Viewed

@@ -156,6 +156,43 @@ class DatasetAdapter(Protocol):
             )
         return dataset
+    def _get_dataset_files(self, db: Database, limit: int | None = None) -> pd.DataFrame:
+        dataset_type = self.dataset_cls.__mapper_args__["polymorphic_identity"]
+        result = (
+            db.session.query(DatasetFile)
+            # The join is necessary to be able to order by the dataset columns
+            .join(DatasetFile.dataset)
+            .where(Dataset.dataset_type == dataset_type)
+            # The joinedload is necessary to avoid N+1 queries (one for each dataset)
+            # https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html#the-zen-of-joined-eager-loading
+            .options(joinedload(DatasetFile.dataset.of_type(self.dataset_cls)))
+            .order_by(Dataset.updated_at.desc())
+            .limit(limit)
+            .all()
+        )
+        return pd.DataFrame(
+            [
+                {
+                    **{k: getattr(file, k) for k in self.file_specific_metadata},
+                    **{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
+                }
+                for file in result
+            ],
+            index=[file.dataset.id for file in result],
+        )
+    def _get_datasets(self, db: Database, limit: int | None = None) -> pd.DataFrame:
+        result_datasets = (
+            db.session.query(self.dataset_cls).order_by(Dataset.updated_at.desc()).limit(limit).all()
+        )
+        return pd.DataFrame(
+            [{k: getattr(dataset, k) for k in self.dataset_specific_metadata} for dataset in result_datasets],
+            index=[file.id for file in result_datasets],
+        )
     def load_catalog(
         self, db: Database, include_files: bool = True, limit: int | None = None
     ) -> pd.DataFrame:
@@ -173,42 +210,9 @@ class DatasetAdapter(Protocol):
         :
             Data catalog containing the metadata for the currently ingested datasets
         """
-        DatasetModel = self.dataset_cls
-        dataset_type = DatasetModel.__mapper_args__["polymorphic_identity"]
-        # TODO: Paginate this query to avoid loading all the data at once
-        if include_files:
-            result = (
-                db.session.query(DatasetFile)
-                # The join is necessary to be able to order by the dataset columns
-                .join(DatasetFile.dataset)
-                .where(Dataset.dataset_type == dataset_type)
-                # The joinedload is necessary to avoid N+1 queries (one for each dataset)
-                # https://docs.sqlalchemy.org/en/14/orm/loading_relationships.html#the-zen-of-joined-eager-loading
-                .options(joinedload(DatasetFile.dataset.of_type(DatasetModel)))
-                .order_by(Dataset.updated_at.desc())
-                .limit(limit)
-                .all()
-            )
-            return pd.DataFrame(
-                [
-                    {
-                        **{k: getattr(file, k) for k in self.file_specific_metadata},
-                        **{k: getattr(file.dataset, k) for k in self.dataset_specific_metadata},
-                    }
-                    for file in result
-                ],
-                index=[file.dataset.id for file in result],
-            )
-        else:
-            result_datasets = (
-                db.session.query(DatasetModel).order_by(Dataset.updated_at.desc()).limit(limit).all()
-            )
-            return pd.DataFrame(
-                [
-                    {k: getattr(dataset, k) for k in self.dataset_specific_metadata}
-                    for dataset in result_datasets
-                ],
-                index=[file.id for file in result_datasets],
-            )
+        with db.session.begin():
+            # TODO: Paginate this query to avoid loading all the data at once
+            if include_files:
+                return self._get_dataset_files(db, limit)
+            else:
+                return self._get_datasets(db, limit)

climate-ref 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

climate-ref 0.5.0py3-none-any.whl → 0.5.2py3-none-any.whl