PyPI - climate-ref - Versions diffs - 0.6.6__tar.gz → 0.8.0__tar.gz - Mend

climate-ref 0.6.6tar.gz → 0.8.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

{climate_ref-0.6.6 → climate_ref-0.8.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: climate-ref
-Version: 0.6.6
+Version: 0.8.0
 Summary: Application which runs the CMIP Rapid Evaluation Framework
 Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
 License-Expression: Apache-2.0

{climate_ref-0.6.6 → climate_ref-0.8.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "climate-ref"
-version = "0.6.6"
+version = "0.8.0"
 description = "Application which runs the CMIP Rapid Evaluation Framework"
 readme = "README.md"
 authors = [

{climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/__init__.py RENAMED Viewed

@@ -3,11 +3,12 @@
 import importlib
 from enum import Enum
 from pathlib import Path
-from typing import Annotated, Optional
+from typing import Annotated
 import typer
 from attrs import define
 from loguru import logger
+from rich.console import Console
 from climate_ref import __version__
 from climate_ref.cli import config, datasets, executions, providers, solve
@@ -37,6 +38,7 @@ class CLIContext:
     config: Config
     database: Database
+    console: Console
 def _version_callback(value: bool) -> None:
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
         raise typer.Exit()
+def _create_console() -> Console:
+    # Hook for testing to disable color output
+    # Rich respects the NO_COLOR environment variabl
+    return Console()
 def _load_config(configuration_directory: Path | None = None) -> Config:
     """
     Load the configuration from the specified directory
@@ -129,7 +138,7 @@ def main(  # noqa: PLR0913
         typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
     ] = LogLevel.Info,
     version: Annotated[
-        Optional[bool],
+        bool | None,
         typer.Option(
             "--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
         ),
@@ -155,7 +164,7 @@ def main(  # noqa: PLR0913
     logger.debug(f"Configuration loaded from: {config._config_file!s}")
-    ctx.obj = CLIContext(config=config, database=Database.from_config(config))
+    ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
 if __name__ == "__main__":

{climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/_utils.py RENAMED Viewed

@@ -5,6 +5,59 @@ from rich.console import Console
 from rich.table import Table
+def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
+    """
+    Parse facet filters from key=value format into a dictionary.
+    Parameters
+    ----------
+    filters
+        List of filter strings in 'key=value' format
+    Returns
+    -------
+    dict[str, str]
+        Dictionary mapping facet keys to values
+    Raises
+    ------
+    ValueError
+        If a filter string is not in valid 'key=value' format
+    Examples
+    --------
+    >>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
+    {'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
+    """
+    if not filters:
+        return {}
+    parsed: dict[str, str] = {}
+    for filter_str in filters:
+        if "=" not in filter_str:
+            raise ValueError(
+                f"Invalid filter format: '{filter_str}'. "
+                f"Expected format: 'key=value' or 'dataset_type.key=value' "
+                f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
+            )
+        key, value = filter_str.split("=", 1)
+        key = key.strip()
+        value = value.strip()
+        if not key:
+            raise ValueError(f"Empty key in filter: '{filter_str}'")
+        if not value:
+            raise ValueError(f"Empty value in filter: '{filter_str}'")
+        if key in parsed:
+            logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
+        parsed[key] = value
+    return parsed
 def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
     """
     Convert a DataFrame to a rich Table instance
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
     table = Table(*[str(column) for column in df.columns])
-    for index, value_list in enumerate(df.values.tolist()):
+    for value_list in df.values.tolist():
         row = [str(x) for x in value_list]
         table.add_row(*row)
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
     # Drop duplicates as they are not informative to CLI users.
     df = df.drop_duplicates()
-    if console is None:
+    if console is None:  # pragma: no branch
+        logger.debug("Creating new console for pretty printing")
         console = Console()
     max_col_count = console.width // 10

{climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/datasets.py RENAMED Viewed

@@ -6,8 +6,6 @@ which executions are required for a given diagnostic without having to re-parse
 """
-import errno
-import os
 import shutil
 from collections.abc import Iterable
 from pathlib import Path
@@ -15,9 +13,9 @@ from typing import Annotated
 import typer
 from loguru import logger
-from rich.console import Console
 from climate_ref.cli._utils import pretty_print_df
+from climate_ref.database import ModelState
 from climate_ref.datasets import get_dataset_adapter
 from climate_ref.models import Dataset
 from climate_ref.provider_registry import ProviderRegistry
@@ -27,7 +25,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
 from climate_ref_core.datasets import SourceDatasetType
 app = typer.Typer(help=__doc__)
-console = Console()
 @app.command(name="list")
@@ -70,7 +67,7 @@ def list_(
             raise typer.Exit(code=1)
         data_catalog = data_catalog[column].sort_values(by=column)
-    pretty_print_df(data_catalog, console=console)
+    pretty_print_df(data_catalog, console=ctx.obj.console)
 @app.command()
@@ -97,7 +94,7 @@ def list_columns(
 @app.command()
-def ingest(  # noqa: PLR0913
+def ingest(  # noqa
     ctx: typer.Context,
     file_or_directory: list[Path],
     source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
@@ -106,7 +103,7 @@ def ingest(  # noqa: PLR0913
     n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
     skip_invalid: Annotated[
         bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
-    ] = False,
+    ] = True,
 ) -> None:
     """
     Ingest a directory of datasets into the database
@@ -118,6 +115,7 @@ def ingest(  # noqa: PLR0913
     """
     config = ctx.obj.config
     db = ctx.obj.database
+    console = ctx.obj.console
     kwargs = {}
@@ -133,16 +131,35 @@ def ingest(  # noqa: PLR0913
         if not _dir.exists():
             logger.error(f"File or directory {_dir} does not exist")
-            raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
+            continue
+        # TODO: This assumes that all datasets are nc files.
+        # THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
+        if not _dir.rglob("*.nc"):
+            logger.error(f"No .nc files found in {_dir}")
+            continue
-        data_catalog = adapter.find_local_datasets(_dir)
-        data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
+        try:
+            data_catalog = adapter.find_local_datasets(_dir)
+            data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
+        except Exception as e:
+            logger.error(f"Error ingesting datasets from {_dir}: {e}")
+            continue
         logger.info(
             f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
         )
         pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
+        # track stats for a given directory
+        num_created_datasets = 0
+        num_updated_datasets = 0
+        num_unchanged_datasets = 0
+        num_created_files = 0
+        num_updated_files = 0
+        num_removed_files = 0
+        num_unchanged_files = 0
         for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
             logger.debug(f"Processing dataset {instance_id}")
             with db.session.begin():
@@ -154,9 +171,29 @@ def ingest(  # noqa: PLR0913
                     )
                     if not dataset:
                         logger.info(f"Would save dataset {instance_id} to the database")
-                        continue
                 else:
-                    adapter.register_dataset(config, db, data_catalog_dataset)
+                    results = adapter.register_dataset(config, db, data_catalog_dataset)
+                    if results.dataset_state == ModelState.CREATED:
+                        num_created_datasets += 1
+                    elif results.dataset_state == ModelState.UPDATED:
+                        num_updated_datasets += 1
+                    else:
+                        num_unchanged_datasets += 1
+                    num_created_files += len(results.files_added)
+                    num_updated_files += len(results.files_updated)
+                    num_removed_files += len(results.files_removed)
+                    num_unchanged_files += len(results.files_unchanged)
+        if not dry_run:
+            ingestion_msg = (
+                f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
+                " (created/updated/unchanged), "
+                f"Files: "
+                f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
+                " (created/updated/removed/unchanged)"
+            )
+            logger.info(ingestion_msg)
     if solve:
         solve_required_executions(

climate-ref 0.6.6__tar.gz → 0.8.0__tar.gz

climate-ref 0.6.6tar.gz → 0.8.0tar.gz