climate-ref 0.6.6__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {climate_ref-0.6.6 → climate_ref-0.8.0}/PKG-INFO +1 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/pyproject.toml +1 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/__init__.py +12 -3
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/_utils.py +56 -2
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/datasets.py +49 -12
- climate_ref-0.8.0/src/climate_ref/cli/executions.py +538 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/providers.py +1 -2
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/config.py +67 -4
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/database.py +62 -4
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/dataset_registry/obs4ref_reference.txt +0 -9
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/dataset_registry/sample_data.txt +10 -19
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/__init__.py +3 -3
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/base.py +121 -20
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/cmip6.py +2 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/obs4mips.py +26 -15
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/hpc.py +149 -53
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/local.py +1 -2
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/result_handling.py +17 -7
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/env.py +12 -10
- climate_ref-0.8.0/src/climate_ref/migrations/versions/2025-09-10T1358_2f6e36738e06_use_version_as_version_facet_for_.py +35 -0
- climate_ref-0.8.0/src/climate_ref/migrations/versions/2025-09-22T2359_20cd136a5b04_add_pmp_version.py +35 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/__init__.py +1 -6
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/base.py +4 -20
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/dataset.py +2 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/diagnostic.py +2 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/execution.py +219 -7
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/metric_value.py +25 -110
- climate_ref-0.8.0/src/climate_ref/models/mixins.py +144 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/models/provider.py +2 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/provider_registry.py +4 -4
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/slurm.py +2 -2
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/solver.py +17 -6
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/testing.py +1 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_datasets.py +2 -4
- climate_ref-0.8.0/tests/unit/cli/test_executions/test_inspect.txt +25 -0
- climate_ref-0.8.0/tests/unit/cli/test_executions.py +831 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_solve.py +4 -4
- climate_ref-0.8.0/tests/unit/cli/test_utils.py +60 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db_complete.yml +20 -20
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db_drs.yml +20 -20
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_local_complete.yml +30 -30
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_local_drs.yml +30 -30
- climate_ref-0.8.0/tests/unit/datasets/test_datasets.py +376 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_obs4mips/obs4mips_catalog_db.yml +501 -523
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_obs4mips/obs4mips_catalog_local.yml +174 -314
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_obs4mips.py +2 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_pmp_climatology/pmp_catalog_local.yml +174 -314
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_pmp_climatology.py +1 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_hpc_executor.py +76 -11
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_result_handling.py +5 -2
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_synchronous_executor.py +2 -2
- climate_ref-0.8.0/tests/unit/models/test_metric_execution.py +115 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_config.py +52 -1
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_database.py +4 -3
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_solver/test_solve_metrics.yml +8 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_solver.py +12 -6
- climate_ref-0.6.6/src/climate_ref/cli/executions.py +0 -229
- climate_ref-0.6.6/tests/unit/cli/test_executions/test_inspect.txt +0 -27
- climate_ref-0.6.6/tests/unit/cli/test_executions.py +0 -180
- climate_ref-0.6.6/tests/unit/datasets/test_datasets.py +0 -100
- climate_ref-0.6.6/tests/unit/models/test_metric_execution.py +0 -38
- {climate_ref-0.6.6 → climate_ref-0.8.0}/.gitignore +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/Dockerfile +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/LICENCE +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/NOTICE +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/README.md +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/conftest.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/__init__.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/_config_helpers.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/alembic.ini +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/config.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/cli/solve.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/constants.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/cmip6_parsers.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/pmp_climatology.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/datasets/utils.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/__init__.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/pbs_scheduler.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/executor/synchronous.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/README +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/script.py.mako +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-07-03T1505_795c1e6cf496_drop_unique_requirement_on_slug.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-07-20T1521_94beace57a9c_cmip6_finalised.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-08-05T0327_a1b2c3d4e5f6_finalised_on_base_dataset.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/migrations/versions/2025-09-05T2019_8d28e5e0f9c3_add_indexes.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/src/climate_ref/py.typed +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_config.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_providers.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/cli/test_root.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/conftest.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6/cmip6_catalog_db.yml +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_cmip6.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/datasets/test_utils.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/executor/test_local_executor.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/models/test_metric_value.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_pbssmartprovider.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_provider_registry.py +0 -0
- {climate_ref-0.6.6 → climate_ref-0.8.0}/tests/unit/test_slurm.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: climate-ref
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Application which runs the CMIP Rapid Evaluation Framework
|
|
5
5
|
Author-email: Jared Lewis <jared.lewis@climate-resource.com>, Mika Pflueger <mika.pflueger@climate-resource.com>, Bouwe Andela <b.andela@esciencecenter.nl>, Jiwoo Lee <lee1043@llnl.gov>, Min Xu <xum1@ornl.gov>, Nathan Collier <collierno@ornl.gov>, Dora Hegedus <dora.hegedus@stfc.ac.uk>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -3,11 +3,12 @@
|
|
|
3
3
|
import importlib
|
|
4
4
|
from enum import Enum
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import Annotated
|
|
6
|
+
from typing import Annotated
|
|
7
7
|
|
|
8
8
|
import typer
|
|
9
9
|
from attrs import define
|
|
10
10
|
from loguru import logger
|
|
11
|
+
from rich.console import Console
|
|
11
12
|
|
|
12
13
|
from climate_ref import __version__
|
|
13
14
|
from climate_ref.cli import config, datasets, executions, providers, solve
|
|
@@ -37,6 +38,7 @@ class CLIContext:
|
|
|
37
38
|
|
|
38
39
|
config: Config
|
|
39
40
|
database: Database
|
|
41
|
+
console: Console
|
|
40
42
|
|
|
41
43
|
|
|
42
44
|
def _version_callback(value: bool) -> None:
|
|
@@ -46,6 +48,13 @@ def _version_callback(value: bool) -> None:
|
|
|
46
48
|
raise typer.Exit()
|
|
47
49
|
|
|
48
50
|
|
|
51
|
+
def _create_console() -> Console:
|
|
52
|
+
# Hook for testing to disable color output
|
|
53
|
+
|
|
54
|
+
# Rich respects the NO_COLOR environment variabl
|
|
55
|
+
return Console()
|
|
56
|
+
|
|
57
|
+
|
|
49
58
|
def _load_config(configuration_directory: Path | None = None) -> Config:
|
|
50
59
|
"""
|
|
51
60
|
Load the configuration from the specified directory
|
|
@@ -129,7 +138,7 @@ def main( # noqa: PLR0913
|
|
|
129
138
|
typer.Option(case_sensitive=False, help="Set the level of logging information to display"),
|
|
130
139
|
] = LogLevel.Info,
|
|
131
140
|
version: Annotated[
|
|
132
|
-
|
|
141
|
+
bool | None,
|
|
133
142
|
typer.Option(
|
|
134
143
|
"--version", callback=_version_callback, is_eager=True, help="Print the version and exit"
|
|
135
144
|
),
|
|
@@ -155,7 +164,7 @@ def main( # noqa: PLR0913
|
|
|
155
164
|
|
|
156
165
|
logger.debug(f"Configuration loaded from: {config._config_file!s}")
|
|
157
166
|
|
|
158
|
-
ctx.obj = CLIContext(config=config, database=Database.from_config(config))
|
|
167
|
+
ctx.obj = CLIContext(config=config, database=Database.from_config(config), console=_create_console())
|
|
159
168
|
|
|
160
169
|
|
|
161
170
|
if __name__ == "__main__":
|
|
@@ -5,6 +5,59 @@ from rich.console import Console
|
|
|
5
5
|
from rich.table import Table
|
|
6
6
|
|
|
7
7
|
|
|
8
|
+
def parse_facet_filters(filters: list[str] | None) -> dict[str, str]:
|
|
9
|
+
"""
|
|
10
|
+
Parse facet filters from key=value format into a dictionary.
|
|
11
|
+
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
filters
|
|
15
|
+
List of filter strings in 'key=value' format
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
dict[str, str]
|
|
20
|
+
Dictionary mapping facet keys to values
|
|
21
|
+
|
|
22
|
+
Raises
|
|
23
|
+
------
|
|
24
|
+
ValueError
|
|
25
|
+
If a filter string is not in valid 'key=value' format
|
|
26
|
+
|
|
27
|
+
Examples
|
|
28
|
+
--------
|
|
29
|
+
>>> parse_facet_filters(["source_id=GFDL-ESM4", "variable_id=tas"])
|
|
30
|
+
{'source_id': 'GFDL-ESM4', 'variable_id': 'tas'}
|
|
31
|
+
"""
|
|
32
|
+
if not filters:
|
|
33
|
+
return {}
|
|
34
|
+
|
|
35
|
+
parsed: dict[str, str] = {}
|
|
36
|
+
for filter_str in filters:
|
|
37
|
+
if "=" not in filter_str:
|
|
38
|
+
raise ValueError(
|
|
39
|
+
f"Invalid filter format: '{filter_str}'. "
|
|
40
|
+
f"Expected format: 'key=value' or 'dataset_type.key=value' "
|
|
41
|
+
f"(e.g., 'source_id=GFDL-ESM4' or 'cmip6.source_id=GFDL-ESM4')"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
key, value = filter_str.split("=", 1)
|
|
45
|
+
key = key.strip()
|
|
46
|
+
value = value.strip()
|
|
47
|
+
|
|
48
|
+
if not key:
|
|
49
|
+
raise ValueError(f"Empty key in filter: '{filter_str}'")
|
|
50
|
+
if not value:
|
|
51
|
+
raise ValueError(f"Empty value in filter: '{filter_str}'")
|
|
52
|
+
|
|
53
|
+
if key in parsed:
|
|
54
|
+
logger.warning(f"Filter key '{key}' specified multiple times. Using last value: '{value}'")
|
|
55
|
+
|
|
56
|
+
parsed[key] = value
|
|
57
|
+
|
|
58
|
+
return parsed
|
|
59
|
+
|
|
60
|
+
|
|
8
61
|
def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
9
62
|
"""
|
|
10
63
|
Convert a DataFrame to a rich Table instance
|
|
@@ -32,7 +85,7 @@ def df_to_table(df: pd.DataFrame, max_col_count: int = -1) -> Table:
|
|
|
32
85
|
|
|
33
86
|
table = Table(*[str(column) for column in df.columns])
|
|
34
87
|
|
|
35
|
-
for
|
|
88
|
+
for value_list in df.values.tolist():
|
|
36
89
|
row = [str(x) for x in value_list]
|
|
37
90
|
table.add_row(*row)
|
|
38
91
|
|
|
@@ -59,7 +112,8 @@ def pretty_print_df(df: pd.DataFrame, console: Console | None = None) -> None:
|
|
|
59
112
|
# Drop duplicates as they are not informative to CLI users.
|
|
60
113
|
df = df.drop_duplicates()
|
|
61
114
|
|
|
62
|
-
if console is None:
|
|
115
|
+
if console is None: # pragma: no branch
|
|
116
|
+
logger.debug("Creating new console for pretty printing")
|
|
63
117
|
console = Console()
|
|
64
118
|
|
|
65
119
|
max_col_count = console.width // 10
|
|
@@ -6,8 +6,6 @@ which executions are required for a given diagnostic without having to re-parse
|
|
|
6
6
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
import errno
|
|
10
|
-
import os
|
|
11
9
|
import shutil
|
|
12
10
|
from collections.abc import Iterable
|
|
13
11
|
from pathlib import Path
|
|
@@ -15,9 +13,9 @@ from typing import Annotated
|
|
|
15
13
|
|
|
16
14
|
import typer
|
|
17
15
|
from loguru import logger
|
|
18
|
-
from rich.console import Console
|
|
19
16
|
|
|
20
17
|
from climate_ref.cli._utils import pretty_print_df
|
|
18
|
+
from climate_ref.database import ModelState
|
|
21
19
|
from climate_ref.datasets import get_dataset_adapter
|
|
22
20
|
from climate_ref.models import Dataset
|
|
23
21
|
from climate_ref.provider_registry import ProviderRegistry
|
|
@@ -27,7 +25,6 @@ from climate_ref_core.dataset_registry import dataset_registry_manager, fetch_al
|
|
|
27
25
|
from climate_ref_core.datasets import SourceDatasetType
|
|
28
26
|
|
|
29
27
|
app = typer.Typer(help=__doc__)
|
|
30
|
-
console = Console()
|
|
31
28
|
|
|
32
29
|
|
|
33
30
|
@app.command(name="list")
|
|
@@ -70,7 +67,7 @@ def list_(
|
|
|
70
67
|
raise typer.Exit(code=1)
|
|
71
68
|
data_catalog = data_catalog[column].sort_values(by=column)
|
|
72
69
|
|
|
73
|
-
pretty_print_df(data_catalog, console=console)
|
|
70
|
+
pretty_print_df(data_catalog, console=ctx.obj.console)
|
|
74
71
|
|
|
75
72
|
|
|
76
73
|
@app.command()
|
|
@@ -97,7 +94,7 @@ def list_columns(
|
|
|
97
94
|
|
|
98
95
|
|
|
99
96
|
@app.command()
|
|
100
|
-
def ingest( # noqa
|
|
97
|
+
def ingest( # noqa
|
|
101
98
|
ctx: typer.Context,
|
|
102
99
|
file_or_directory: list[Path],
|
|
103
100
|
source_type: Annotated[SourceDatasetType, typer.Option(help="Type of source dataset")],
|
|
@@ -106,7 +103,7 @@ def ingest( # noqa: PLR0913
|
|
|
106
103
|
n_jobs: Annotated[int | None, typer.Option(help="Number of jobs to run in parallel")] = None,
|
|
107
104
|
skip_invalid: Annotated[
|
|
108
105
|
bool, typer.Option(help="Ignore (but log) any datasets that don't pass validation")
|
|
109
|
-
] =
|
|
106
|
+
] = True,
|
|
110
107
|
) -> None:
|
|
111
108
|
"""
|
|
112
109
|
Ingest a directory of datasets into the database
|
|
@@ -118,6 +115,7 @@ def ingest( # noqa: PLR0913
|
|
|
118
115
|
"""
|
|
119
116
|
config = ctx.obj.config
|
|
120
117
|
db = ctx.obj.database
|
|
118
|
+
console = ctx.obj.console
|
|
121
119
|
|
|
122
120
|
kwargs = {}
|
|
123
121
|
|
|
@@ -133,16 +131,35 @@ def ingest( # noqa: PLR0913
|
|
|
133
131
|
|
|
134
132
|
if not _dir.exists():
|
|
135
133
|
logger.error(f"File or directory {_dir} does not exist")
|
|
136
|
-
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# TODO: This assumes that all datasets are nc files.
|
|
137
|
+
# THis is true for CMIP6 and obs4MIPs but may not be true for other dataset types in the future.
|
|
138
|
+
if not _dir.rglob("*.nc"):
|
|
139
|
+
logger.error(f"No .nc files found in {_dir}")
|
|
140
|
+
continue
|
|
137
141
|
|
|
138
|
-
|
|
139
|
-
|
|
142
|
+
try:
|
|
143
|
+
data_catalog = adapter.find_local_datasets(_dir)
|
|
144
|
+
data_catalog = adapter.validate_data_catalog(data_catalog, skip_invalid=skip_invalid)
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.error(f"Error ingesting datasets from {_dir}: {e}")
|
|
147
|
+
continue
|
|
140
148
|
|
|
141
149
|
logger.info(
|
|
142
150
|
f"Found {len(data_catalog)} files for {len(data_catalog[adapter.slug_column].unique())} datasets"
|
|
143
151
|
)
|
|
144
152
|
pretty_print_df(adapter.pretty_subset(data_catalog), console=console)
|
|
145
153
|
|
|
154
|
+
# track stats for a given directory
|
|
155
|
+
num_created_datasets = 0
|
|
156
|
+
num_updated_datasets = 0
|
|
157
|
+
num_unchanged_datasets = 0
|
|
158
|
+
num_created_files = 0
|
|
159
|
+
num_updated_files = 0
|
|
160
|
+
num_removed_files = 0
|
|
161
|
+
num_unchanged_files = 0
|
|
162
|
+
|
|
146
163
|
for instance_id, data_catalog_dataset in data_catalog.groupby(adapter.slug_column):
|
|
147
164
|
logger.debug(f"Processing dataset {instance_id}")
|
|
148
165
|
with db.session.begin():
|
|
@@ -154,9 +171,29 @@ def ingest( # noqa: PLR0913
|
|
|
154
171
|
)
|
|
155
172
|
if not dataset:
|
|
156
173
|
logger.info(f"Would save dataset {instance_id} to the database")
|
|
157
|
-
continue
|
|
158
174
|
else:
|
|
159
|
-
adapter.register_dataset(config, db, data_catalog_dataset)
|
|
175
|
+
results = adapter.register_dataset(config, db, data_catalog_dataset)
|
|
176
|
+
|
|
177
|
+
if results.dataset_state == ModelState.CREATED:
|
|
178
|
+
num_created_datasets += 1
|
|
179
|
+
elif results.dataset_state == ModelState.UPDATED:
|
|
180
|
+
num_updated_datasets += 1
|
|
181
|
+
else:
|
|
182
|
+
num_unchanged_datasets += 1
|
|
183
|
+
num_created_files += len(results.files_added)
|
|
184
|
+
num_updated_files += len(results.files_updated)
|
|
185
|
+
num_removed_files += len(results.files_removed)
|
|
186
|
+
num_unchanged_files += len(results.files_unchanged)
|
|
187
|
+
|
|
188
|
+
if not dry_run:
|
|
189
|
+
ingestion_msg = (
|
|
190
|
+
f"Datasets: {num_created_datasets}/{num_updated_datasets}/{num_unchanged_datasets}"
|
|
191
|
+
" (created/updated/unchanged), "
|
|
192
|
+
f"Files: "
|
|
193
|
+
f"{num_created_files}/{num_updated_files}/{num_removed_files}/{num_unchanged_files}"
|
|
194
|
+
" (created/updated/removed/unchanged)"
|
|
195
|
+
)
|
|
196
|
+
logger.info(ingestion_msg)
|
|
160
197
|
|
|
161
198
|
if solve:
|
|
162
199
|
solve_required_executions(
|