climate-ref 0.6.5__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {climate_ref-0.6.5 → climate_ref-0.7.0}/PKG-INFO +2 -2
- {climate_ref-0.6.5 → climate_ref-0.7.0}/README.md +1 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/pyproject.toml +1 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/cli/__init__.py +12 -3
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/cli/_utils.py +56 -2
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/cli/datasets.py +48 -9
- climate_ref-0.7.0/src/climate_ref/cli/executions.py +538 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/cli/providers.py +1 -2
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/config.py +4 -4
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/database.py +62 -4
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- climate_ref-0.7.0/src/climate_ref/dataset_registry/sample_data.txt +269 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/__init__.py +3 -3
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/base.py +121 -20
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/cmip6.py +2 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/obs4mips.py +26 -15
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/executor/__init__.py +8 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/executor/hpc.py +7 -1
- climate_ref-0.7.0/src/climate_ref/executor/result_handling.py +318 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/env.py +12 -10
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +1 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +1 -1
- climate_ref-0.7.0/src/climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +108 -0
- climate_ref-0.7.0/src/climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref-0.7.0/src/climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/__init__.py +1 -6
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/base.py +4 -18
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/dataset.py +10 -6
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/diagnostic.py +2 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/execution.py +225 -12
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/metric_value.py +27 -112
- climate_ref-0.7.0/src/climate_ref/models/mixins.py +144 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/models/provider.py +2 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/provider_registry.py +4 -4
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/slurm.py +2 -2
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/testing.py +1 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/cli/test_datasets.py +1 -1
- climate_ref-0.7.0/tests/unit/cli/test_executions/test_inspect.txt +25 -0
- climate_ref-0.7.0/tests/unit/cli/test_executions.py +831 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/cli/test_solve.py +4 -4
- climate_ref-0.7.0/tests/unit/cli/test_utils.py +60 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db_complete.yml +1072 -128
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db_drs.yml +1072 -128
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_local_complete.yml +1036 -66
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_local_drs.yml +1054 -92
- climate_ref-0.7.0/tests/unit/datasets/test_datasets.py +376 -0
- climate_ref-0.7.0/tests/unit/datasets/test_obs4mips/obs4mips_catalog_db.yml +3679 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_obs4mips/obs4mips_catalog_local.yml +619 -72
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_obs4mips.py +3 -1
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_pmp_climatology/pmp_catalog_local.yml +619 -72
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_pmp_climatology.py +1 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/executor/test_result_handling.py +75 -5
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/executor/test_synchronous_executor.py +2 -2
- climate_ref-0.7.0/tests/unit/models/test_metric_execution.py +115 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_database.py +4 -3
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_solver/test_solve_metrics.yml +20 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_solver.py +12 -6
- climate_ref-0.6.5/src/climate_ref/cli/executions.py +0 -211
- climate_ref-0.6.5/src/climate_ref/dataset_registry/sample_data.txt +0 -107
- climate_ref-0.6.5/src/climate_ref/executor/result_handling.py +0 -231
- climate_ref-0.6.5/tests/unit/cli/test_executions/test_inspect.txt +0 -27
- climate_ref-0.6.5/tests/unit/cli/test_executions.py +0 -148
- climate_ref-0.6.5/tests/unit/datasets/test_datasets.py +0 -100
- climate_ref-0.6.5/tests/unit/datasets/test_obs4mips/obs4mips_catalog_db.yml +0 -756
- climate_ref-0.6.5/tests/unit/models/test_metric_execution.py +0 -38
- {climate_ref-0.6.5 → climate_ref-0.7.0}/.gitignore +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/Dockerfile +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/LICENCE +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/NOTICE +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/conftest.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/__init__.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/_config_helpers.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/alembic.ini +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/cli/config.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/cli/solve.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/constants.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/cmip6_parsers.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/pmp_climatology.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/datasets/utils.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/executor/local.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/executor/pbs_scheduler.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/executor/synchronous.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/README +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/script.py.mako +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/migrations/versions/2025-07-03T1505_795c1e6cf496_drop_unique_requirement_on_slug.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/py.typed +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/src/climate_ref/solver.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/cli/test_config.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/cli/test_providers.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/cli/test_root.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/conftest.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db.yml +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_cmip6.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/datasets/test_utils.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/executor/test_hpc_executor.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/executor/test_local_executor.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/models/test_metric_value.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_config.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_pbssmartprovider.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_provider_registry.py +0 -0
- {climate_ref-0.6.5 → climate_ref-0.7.0}/tests/unit/test_slurm.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climate-ref
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Application which runs the CMIP Rapid Evaluation Framework
|
|
5
5
|
Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -70,7 +70,7 @@ pip install climate-ref
|
|
|
70
70
|
If you want to use the diagnostic providers for the Assessment Fast Track, you can install them with:
|
|
71
71
|
|
|
72
72
|
```bash
|
|
73
|
-
pip install climate-ref[aft-providers]
|
|
73
|
+
pip install "climate-ref[aft-providers]"
|
|
74
74
|
```
|
|
75
75
|
|
|
76
76
|
## Quick Start
|
|
@@ -3,11 +3,12 @@
|
|
|
3
3
|
import importlib
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Annotated
|
|
6
|
+
from typing import Annotated
|
|
7
7
|
|
|
8
8
|
import typer
|
|
9
9
|
from attrs import define
|
|
10
10
|
from loguru import logger
|
|
11
|
+
from rich.console import Console
|
|
11
12
|
|
|
12
13
|
from climate_ref import __version__
|
|
13
14
|
from climate_ref.cli import config, datasets, executions, providers, solve
|
|
@@ -37,6 +38,7 @@ class CLIContext:
|
|
|
37
38
|
|
|
38
39
|
config: Config
|
|
39
40
|
database: Database
|
|
41
|
+
console: Console
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def _version_callback(value: bool) -> None:
|
|
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
|
|
|
46
48
|
raise typer.Exit()
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
def _create_console() -> Console:
|
|
52
|
+
# Hook for testing to disable color output
|
|
53
|
+
|
|
54
|
+
# Rich respects the NO_COLOR environment variabl
|
|
55
|
+
return Console()
|
|
56
|
+
|
|
57
|
+
|
|
49
58
|
def _load_config(configuration_directory: Path | None = None) -> Config:
|
|
50
59
|
"""
|
|
51
60
|
Load the configuration from the specified directory
|
|
@@ -129,7 +138,7 @@ def main( # noqa: PLR0913
|
|
|
129
138
|
typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
|
|
130
139
|
] = LogLevel.Info,
|
|
131
140
|
version: Annotated[
|
|
132
|
-
|
|
141
|
+
bool | None,
|
|
133
142
|
typer.Option(
|
|
134
143
|
"--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
|
|
135
144
|
),
|
|
@@ -155,7 +164,7 @@ def main( # noqa: PLR0913
|
|
|
155
164
|
|
|
156
165
|
logger.debug(f"Configuration loaded from: {config._config_file!s}")
|
|
157
166
|
|
|
158
|
-
ctx.obj = CLIContext(config=config, database=Database.from_config(config))
|
|
167
|
+
ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
|
|
159
168
|
|
|
160
169
|
|
|
161
170
|
if __name__ == "__main__":
|
|
@@ -5,6 +5,59 @@ from rich.console import Console
|
|
|
5
5
|
from rich.table import Table
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
|
|
9
|
+
"""
|
|
10
|
+
Parse facet filters from key=value format into a dictionary.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
filters
|
|
15
|
+
List of filter strings in 'key=value' format
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict[str, str]
|
|
20
|
+
Dictionary mapping facet keys to values
|
|
21
|
+
|
|
22
|
+
Raises
|
|
23
|
+
------
|
|
24
|
+
ValueError
|
|
25
|
+
If a filter string is not in valid 'key=value' format
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
|
|
30
|
+
{'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
|
|
31
|
+
"""
|
|
32
|
+
if not filters:
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
parsed: dict[str, str] = {}
|
|
36
|
+
for filter_str in filters:
|
|
37
|
+
if "=" not in filter_str:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Invalid filter format: '{filter_str}'. "
|
|
40
|
+
f"Expected format: 'key=value' or 'dataset_type.key=value' "
|
|
41
|
+
f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
key, value = filter_str.split("=", 1)
|
|
45
|
+
key = key.strip()
|
|
46
|
+
value = value.strip()
|
|
47
|
+
|
|
48
|
+
if not key:
|
|
49
|
+
raise ValueError(f"Empty key in filter: '{filter_str}'")
|
|
50
|
+
if not value:
|
|
51
|
+
raise ValueError(f"Empty value in filter: '{filter_str}'")
|
|
52
|
+
|
|
53
|
+
if key in parsed:
|
|
54
|
+
logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
|
|
55
|
+
|
|
56
|
+
parsed[key] = value
|
|
57
|
+
|
|
58
|
+
return parsed
|
|
59
|
+
|
|
60
|
+
|
|
8
61
|
def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
9
62
|
"""
|
|
10
63
|
Convert a DataFrame to a rich Table instance
|
|
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
|
32
85
|
|
|
33
86
|
table = Table(*[str(column) for column in df.columns])
|
|
34
87
|
|
|
35
|
-
for
|
|
88
|
+
for value_list in df.values.tolist():
|
|
36
89
|
row = [str(x) for x in value_list]
|
|
37
90
|
table.add_row(*row)
|
|
38
91
|
|
|
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
|
|
|
59
112
|
# Drop duplicates as they are not informative to CLI users.
|
|
60
113
|
df = df.drop_duplicates()
|
|
61
114
|
|
|
62
|
-
if console is None:
|
|
115
|
+
if console is None: # pragma: no branch
|
|
116
|
+
logger.debug("Creating new console for pretty printing")
|
|
63
117
|
console = Console()
|
|
64
118
|
|
|
65
119
|
max_col_count = console.width // 10
|
|
@@ -15,9 +15,9 @@ from typing import Annotated
|
|
|
15
15
|
|
|
16
16
|
import typer
|
|
17
17
|
from loguru import logger
|
|
18
|
-
from rich.console import Console
|
|
19
18
|
|
|
20
19
|
from climate_ref.cli._utils import pretty_print_df
|
|
20
|
+
from climate_ref.database import ModelState
|
|
21
21
|
from climate_ref.datasets import get_dataset_adapter
|
|
22
22
|
from climate_ref.models import Dataset
|
|
23
23
|
from climate_ref.provider_registry import ProviderRegistry
|
|
@@ -27,7 +27,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
|
|
|
27
27
|
from climate_ref_core.datasets import SourceDatasetType
|
|
28
28
|
|
|
29
29
|
app = typer.Typer(help=__doc__)
|
|
30
|
-
console = Console()
|
|
31
30
|
|
|
32
31
|
|
|
33
32
|
@app.command(name="list")
|
|
@@ -70,7 +69,7 @@ def list_(
|
|
|
70
69
|
raise typer.Exit(code=1)
|
|
71
70
|
data_catalog = data_catalog[column].sort_values(by=column)
|
|
72
71
|
|
|
73
|
-
pretty_print_df(data_catalog, console=console)
|
|
72
|
+
pretty_print_df(data_catalog, console=ctx.obj.console)
|
|
74
73
|
|
|
75
74
|
|
|
76
75
|
@app.command()
|
|
@@ -97,7 +96,7 @@ def list_columns(
|
|
|
97
96
|
|
|
98
97
|
|
|
99
98
|
@app.command()
|
|
100
|
-
def ingest( # noqa
|
|
99
|
+
def ingest( # noqa
|
|
101
100
|
ctx: typer.Context,
|
|
102
101
|
file_or_directory: list[Path],
|
|
103
102
|
source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
|
|
@@ -106,7 +105,7 @@ def ingest( # noqa: PLR0913
|
|
|
106
105
|
n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
|
|
107
106
|
skip_invalid: Annotated[
|
|
108
107
|
bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
|
|
109
|
-
] =
|
|
108
|
+
] = True,
|
|
110
109
|
) -> None:
|
|
111
110
|
"""
|
|
112
111
|
Ingest a directory of datasets into the database
|
|
@@ -118,6 +117,7 @@ def ingest( # noqa: PLR0913
|
|
|
118
117
|
"""
|
|
119
118
|
config = ctx.obj.config
|
|
120
119
|
db = ctx.obj.database
|
|
120
|
+
console = ctx.obj.console
|
|
121
121
|
|
|
122
122
|
kwargs = {}
|
|
123
123
|
|
|
@@ -135,14 +135,33 @@ def ingest( # noqa: PLR0913
|
|
|
135
135
|
logger.error(f"File or directory {_dir} does not exist")
|
|
136
136
|
raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), _dir)
|
|
137
137
|
|
|
138
|
-
|
|
139
|
-
|
|
138
|
+
# TODO: This assumes that all datasets are nc files.
|
|
139
|
+
# THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
|
|
140
|
+
if not _dir.rglob("*.nc"):
|
|
141
|
+
logger.error(f"No .nc files found in {_dir}")
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
data_catalog = adapter.find_local_datasets(_dir)
|
|
146
|
+
data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.error(f"Error ingesting datasets from {_dir}: {e}")
|
|
149
|
+
continue
|
|
140
150
|
|
|
141
151
|
logger.info(
|
|
142
152
|
f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
|
|
143
153
|
)
|
|
144
154
|
pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
|
|
145
155
|
|
|
156
|
+
# track stats for a given directory
|
|
157
|
+
num_created_datasets = 0
|
|
158
|
+
num_updated_datasets = 0
|
|
159
|
+
num_unchanged_datasets = 0
|
|
160
|
+
num_created_files = 0
|
|
161
|
+
num_updated_files = 0
|
|
162
|
+
num_removed_files = 0
|
|
163
|
+
num_unchanged_files = 0
|
|
164
|
+
|
|
146
165
|
for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
|
|
147
166
|
logger.debug(f"Processing dataset {instance_id}")
|
|
148
167
|
with db.session.begin():
|
|
@@ -154,9 +173,29 @@ def ingest( # noqa: PLR0913
|
|
|
154
173
|
)
|
|
155
174
|
if not dataset:
|
|
156
175
|
logger.info(f"Would save dataset {instance_id} to the database")
|
|
157
|
-
continue
|
|
158
176
|
else:
|
|
159
|
-
adapter.register_dataset(config, db, data_catalog_dataset)
|
|
177
|
+
results = adapter.register_dataset(config, db, data_catalog_dataset)
|
|
178
|
+
|
|
179
|
+
if results.dataset_state == ModelState.CREATED:
|
|
180
|
+
num_created_datasets += 1
|
|
181
|
+
elif results.dataset_state == ModelState.UPDATED:
|
|
182
|
+
num_updated_datasets += 1
|
|
183
|
+
else:
|
|
184
|
+
num_unchanged_datasets += 1
|
|
185
|
+
num_created_files += len(results.files_added)
|
|
186
|
+
num_updated_files += len(results.files_updated)
|
|
187
|
+
num_removed_files += len(results.files_removed)
|
|
188
|
+
num_unchanged_files += len(results.files_unchanged)
|
|
189
|
+
|
|
190
|
+
if not dry_run:
|
|
191
|
+
ingestion_msg = (
|
|
192
|
+
f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
|
|
193
|
+
" (created/updated/unchanged), "
|
|
194
|
+
f"Files: "
|
|
195
|
+
f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
|
|
196
|
+
" (created/updated/removed/unchanged)"
|
|
197
|
+
)
|
|
198
|
+
logger.info(ingestion_msg)
|
|
160
199
|
|
|
161
200
|
if solve:
|
|
162
201
|
solve_required_executions(
|