climate-ref 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- climate_ref/cli/__init__.py +18 -8
- climate_ref/cli/datasets.py +31 -27
- climate_ref/cli/executions.py +1 -1
- climate_ref/cli/providers.py +2 -4
- climate_ref/cli/solve.py +1 -2
- climate_ref/config.py +50 -19
- climate_ref/constants.py +1 -1
- climate_ref/database.py +1 -0
- climate_ref/dataset_registry/sample_data.txt +14 -0
- climate_ref/datasets/base.py +43 -39
- climate_ref/executor/__init__.py +4 -262
- climate_ref/executor/local.py +170 -37
- climate_ref/executor/result_handling.py +231 -0
- climate_ref/executor/synchronous.py +62 -0
- climate_ref/migrations/env.py +5 -0
- climate_ref/migrations/versions/2025-05-02T1418_341a4aa2551e_regenerate.py +0 -21
- climate_ref/migrations/versions/2025-05-09T2032_03dbb4998e49_series_metric_value.py +57 -0
- climate_ref/models/__init__.py +3 -1
- climate_ref/models/base.py +2 -0
- climate_ref/models/metric_value.py +138 -13
- climate_ref/provider_registry.py +1 -1
- climate_ref/solver.py +18 -30
- climate_ref/testing.py +11 -7
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/METADATA +12 -6
- climate_ref-0.5.2.dist-info/RECORD +47 -0
- climate_ref-0.5.0.dist-info/RECORD +0 -44
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/WHEEL +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/entry_points.txt +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/licenses/LICENCE +0 -0
- {climate_ref-0.5.0.dist-info → climate_ref-0.5.2.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Execute diagnostics in different environments
|
|
3
|
+
|
|
4
|
+
We support running diagnostics in different environments, such as locally,
|
|
5
|
+
in a separate process, or in a container.
|
|
6
|
+
These environments are represented by `climate_ref.executor.Executor` classes.
|
|
7
|
+
|
|
8
|
+
The simplest executor is the `LocalExecutor`, which runs the diagnostic in the same process.
|
|
9
|
+
This is useful for local testing and debugging.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import pathlib
|
|
13
|
+
import shutil
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
from loguru import logger
|
|
17
|
+
from sqlalchemy import insert
|
|
18
|
+
|
|
19
|
+
from climate_ref.database import Database
|
|
20
|
+
from climate_ref.models import ScalarMetricValue
|
|
21
|
+
from climate_ref.models.execution import Execution, ExecutionOutput, ResultOutputType
|
|
22
|
+
from climate_ref_core.diagnostics import ExecutionResult, ensure_relative_path
|
|
23
|
+
from climate_ref_core.exceptions import ResultValidationError
|
|
24
|
+
from climate_ref_core.logging import EXECUTION_LOG_FILENAME
|
|
25
|
+
from climate_ref_core.pycmec.controlled_vocabulary import CV
|
|
26
|
+
from climate_ref_core.pycmec.metric import CMECMetric
|
|
27
|
+
from climate_ref_core.pycmec.output import CMECOutput, OutputDict
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
from climate_ref.config import Config
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _copy_file_to_results(
|
|
34
|
+
scratch_directory: pathlib.Path,
|
|
35
|
+
results_directory: pathlib.Path,
|
|
36
|
+
fragment: pathlib.Path | str,
|
|
37
|
+
filename: pathlib.Path | str,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""
|
|
40
|
+
Copy a file from the scratch directory to the executions directory
|
|
41
|
+
|
|
42
|
+
Parameters
|
|
43
|
+
----------
|
|
44
|
+
scratch_directory
|
|
45
|
+
The directory where the file is currently located
|
|
46
|
+
results_directory
|
|
47
|
+
The directory where the file should be copied to
|
|
48
|
+
fragment
|
|
49
|
+
The fragment of the executions directory where the file should be copied
|
|
50
|
+
filename
|
|
51
|
+
The name of the file to be copied
|
|
52
|
+
"""
|
|
53
|
+
assert results_directory != scratch_directory
|
|
54
|
+
input_directory = scratch_directory / fragment
|
|
55
|
+
output_directory = results_directory / fragment
|
|
56
|
+
|
|
57
|
+
filename = ensure_relative_path(filename, input_directory)
|
|
58
|
+
|
|
59
|
+
if not (input_directory / filename).exists():
|
|
60
|
+
raise FileNotFoundError(f"Could not find {filename} in {input_directory}")
|
|
61
|
+
|
|
62
|
+
output_filename = output_directory / filename
|
|
63
|
+
output_filename.parent.mkdir(parents=True, exist_ok=True)
|
|
64
|
+
|
|
65
|
+
shutil.copy(input_directory / filename, output_filename)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def handle_execution_result(
|
|
69
|
+
config: "Config",
|
|
70
|
+
database: Database,
|
|
71
|
+
execution: Execution,
|
|
72
|
+
result: "ExecutionResult",
|
|
73
|
+
) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Handle the result of a diagnostic execution
|
|
76
|
+
|
|
77
|
+
This will update the diagnostic execution result with the output of the diagnostic execution.
|
|
78
|
+
The output will be copied from the scratch directory to the executions directory.
|
|
79
|
+
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
config
|
|
83
|
+
The configuration to use
|
|
84
|
+
database
|
|
85
|
+
The active database session to use
|
|
86
|
+
execution
|
|
87
|
+
The diagnostic execution result DB object to update
|
|
88
|
+
result
|
|
89
|
+
The result of the diagnostic execution, either successful or failed
|
|
90
|
+
"""
|
|
91
|
+
# Always copy log data
|
|
92
|
+
_copy_file_to_results(
|
|
93
|
+
config.paths.scratch,
|
|
94
|
+
config.paths.results,
|
|
95
|
+
execution.output_fragment,
|
|
96
|
+
EXECUTION_LOG_FILENAME,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if result.successful and result.metric_bundle_filename is not None:
|
|
100
|
+
logger.info(f"{execution} successful")
|
|
101
|
+
|
|
102
|
+
_copy_file_to_results(
|
|
103
|
+
config.paths.scratch,
|
|
104
|
+
config.paths.results,
|
|
105
|
+
execution.output_fragment,
|
|
106
|
+
result.metric_bundle_filename,
|
|
107
|
+
)
|
|
108
|
+
execution.mark_successful(result.as_relative_path(result.metric_bundle_filename))
|
|
109
|
+
|
|
110
|
+
if result.output_bundle_filename:
|
|
111
|
+
_copy_file_to_results(
|
|
112
|
+
config.paths.scratch,
|
|
113
|
+
config.paths.results,
|
|
114
|
+
execution.output_fragment,
|
|
115
|
+
result.output_bundle_filename,
|
|
116
|
+
)
|
|
117
|
+
_handle_output_bundle(
|
|
118
|
+
config,
|
|
119
|
+
database,
|
|
120
|
+
execution,
|
|
121
|
+
result.to_output_path(result.output_bundle_filename),
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
cmec_metric_bundle = CMECMetric.load_from_json(result.to_output_path(result.metric_bundle_filename))
|
|
125
|
+
|
|
126
|
+
# Check that the diagnostic values conform with the controlled vocabulary
|
|
127
|
+
try:
|
|
128
|
+
cv = CV.load_from_file(config.paths.dimensions_cv)
|
|
129
|
+
cv.validate_metrics(cmec_metric_bundle)
|
|
130
|
+
except (ResultValidationError, AssertionError):
|
|
131
|
+
logger.exception("Diagnostic values do not conform with the controlled vocabulary")
|
|
132
|
+
# TODO: Mark the diagnostic execution result as failed once the CV has stabilised
|
|
133
|
+
# execution.mark_failed()
|
|
134
|
+
|
|
135
|
+
# Perform a bulk insert of scalar values
|
|
136
|
+
# TODO: The section below will likely fail until we have agreed on a controlled vocabulary
|
|
137
|
+
# The current implementation will swallow the exception, but display a log message
|
|
138
|
+
try:
|
|
139
|
+
# Perform this in a nested transaction to (hopefully) gracefully rollback if something
|
|
140
|
+
# goes wrong
|
|
141
|
+
with database.session.begin_nested():
|
|
142
|
+
database.session.execute(
|
|
143
|
+
insert(ScalarMetricValue),
|
|
144
|
+
[
|
|
145
|
+
{
|
|
146
|
+
"execution_id": execution.id,
|
|
147
|
+
"value": result.value,
|
|
148
|
+
"attributes": result.attributes,
|
|
149
|
+
**result.dimensions,
|
|
150
|
+
}
|
|
151
|
+
for result in cmec_metric_bundle.iter_results()
|
|
152
|
+
],
|
|
153
|
+
)
|
|
154
|
+
except Exception:
|
|
155
|
+
# TODO: Remove once we have settled on a controlled vocabulary
|
|
156
|
+
logger.exception("Something went wrong when ingesting diagnostic values")
|
|
157
|
+
|
|
158
|
+
# TODO Ingest the series values
|
|
159
|
+
|
|
160
|
+
# TODO: This should check if the result is the most recent for the execution,
|
|
161
|
+
# if so then update the dirty fields
|
|
162
|
+
# i.e. if there are outstanding executions don't make as clean
|
|
163
|
+
execution.execution_group.dirty = False
|
|
164
|
+
else:
|
|
165
|
+
logger.error(f"{execution} failed")
|
|
166
|
+
execution.mark_failed()
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _handle_output_bundle(
|
|
170
|
+
config: "Config",
|
|
171
|
+
database: Database,
|
|
172
|
+
execution: Execution,
|
|
173
|
+
cmec_output_bundle_filename: pathlib.Path,
|
|
174
|
+
) -> None:
|
|
175
|
+
# Extract the registered outputs
|
|
176
|
+
# Copy the content to the output directory
|
|
177
|
+
# Track in the db
|
|
178
|
+
cmec_output_bundle = CMECOutput.load_from_json(cmec_output_bundle_filename)
|
|
179
|
+
_handle_outputs(
|
|
180
|
+
cmec_output_bundle.plots,
|
|
181
|
+
output_type=ResultOutputType.Plot,
|
|
182
|
+
config=config,
|
|
183
|
+
database=database,
|
|
184
|
+
execution=execution,
|
|
185
|
+
)
|
|
186
|
+
_handle_outputs(
|
|
187
|
+
cmec_output_bundle.data,
|
|
188
|
+
output_type=ResultOutputType.Data,
|
|
189
|
+
config=config,
|
|
190
|
+
database=database,
|
|
191
|
+
execution=execution,
|
|
192
|
+
)
|
|
193
|
+
_handle_outputs(
|
|
194
|
+
cmec_output_bundle.html,
|
|
195
|
+
output_type=ResultOutputType.HTML,
|
|
196
|
+
config=config,
|
|
197
|
+
database=database,
|
|
198
|
+
execution=execution,
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _handle_outputs(
|
|
203
|
+
outputs: dict[str, OutputDict] | None,
|
|
204
|
+
output_type: ResultOutputType,
|
|
205
|
+
config: "Config",
|
|
206
|
+
database: Database,
|
|
207
|
+
execution: Execution,
|
|
208
|
+
) -> None:
|
|
209
|
+
outputs = outputs or {}
|
|
210
|
+
|
|
211
|
+
for key, output_info in outputs.items():
|
|
212
|
+
filename = ensure_relative_path(
|
|
213
|
+
output_info.filename, config.paths.scratch / execution.output_fragment
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
_copy_file_to_results(
|
|
217
|
+
config.paths.scratch,
|
|
218
|
+
config.paths.results,
|
|
219
|
+
execution.output_fragment,
|
|
220
|
+
filename,
|
|
221
|
+
)
|
|
222
|
+
database.session.add(
|
|
223
|
+
ExecutionOutput(
|
|
224
|
+
execution_id=execution.id,
|
|
225
|
+
output_type=output_type,
|
|
226
|
+
filename=str(filename),
|
|
227
|
+
description=output_info.description,
|
|
228
|
+
short_name=key,
|
|
229
|
+
long_name=output_info.long_name,
|
|
230
|
+
)
|
|
231
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
from climate_ref.config import Config
|
|
4
|
+
from climate_ref.database import Database
|
|
5
|
+
from climate_ref.executor.local import process_result
|
|
6
|
+
from climate_ref.models import Execution
|
|
7
|
+
from climate_ref_core.diagnostics import ExecutionDefinition
|
|
8
|
+
from climate_ref_core.executor import execute_locally
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SynchronousExecutor:
|
|
12
|
+
"""
|
|
13
|
+
Run a diagnostic synchronously, in-process.
|
|
14
|
+
|
|
15
|
+
This is mainly useful for debugging and testing.
|
|
16
|
+
[climate_ref.executor.LocalExecutor][] is a more general purpose executor.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
name = "synchronous"
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self, *, database: Database | None = None, config: Config | None = None, **kwargs: Any
|
|
23
|
+
) -> None:
|
|
24
|
+
if config is None:
|
|
25
|
+
config = Config.default()
|
|
26
|
+
if database is None:
|
|
27
|
+
database = Database.from_config(config, run_migrations=False)
|
|
28
|
+
|
|
29
|
+
self.database = database
|
|
30
|
+
self.config = config
|
|
31
|
+
|
|
32
|
+
def run(
|
|
33
|
+
self,
|
|
34
|
+
definition: ExecutionDefinition,
|
|
35
|
+
execution: Execution | None = None,
|
|
36
|
+
) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Run a diagnostic in process
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
definition
|
|
43
|
+
A description of the information needed for this execution of the diagnostic
|
|
44
|
+
execution
|
|
45
|
+
A database model representing the execution of the diagnostic.
|
|
46
|
+
If provided, the result will be updated in the database when completed.
|
|
47
|
+
"""
|
|
48
|
+
result = execute_locally(definition, log_level=self.config.log_level)
|
|
49
|
+
process_result(self.config, self.database, result, execution)
|
|
50
|
+
|
|
51
|
+
def join(self, timeout: float) -> None:
|
|
52
|
+
"""
|
|
53
|
+
Wait for all diagnostics to finish
|
|
54
|
+
|
|
55
|
+
This returns immediately because the executor runs diagnostics synchronously.
|
|
56
|
+
|
|
57
|
+
Parameters
|
|
58
|
+
----------
|
|
59
|
+
timeout
|
|
60
|
+
Timeout in seconds (Not used)
|
|
61
|
+
"""
|
|
62
|
+
pass
|
climate_ref/migrations/env.py
CHANGED
|
@@ -8,6 +8,11 @@ from climate_ref.models import Base, MetricValue
|
|
|
8
8
|
from climate_ref_core.logging import capture_logging
|
|
9
9
|
from climate_ref_core.pycmec.controlled_vocabulary import CV
|
|
10
10
|
|
|
11
|
+
try:
|
|
12
|
+
import alembic_postgresql_enum # noqa
|
|
13
|
+
except ImportError:
|
|
14
|
+
logger.warning("alembic_postgresql_enum not installed, skipping enum migration support")
|
|
15
|
+
|
|
11
16
|
# Setup logging
|
|
12
17
|
capture_logging()
|
|
13
18
|
logger.debug("Running alembic env")
|
|
@@ -235,38 +235,17 @@ def upgrade() -> None:
|
|
|
235
235
|
sa.Column("attributes", sa.JSON(), nullable=False),
|
|
236
236
|
sa.Column("created_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
|
|
237
237
|
sa.Column("updated_at", sa.DateTime(), server_default=sa.text("(CURRENT_TIMESTAMP)"), nullable=False),
|
|
238
|
-
sa.Column("model", sa.Text(), nullable=True),
|
|
239
|
-
sa.Column("source_id", sa.Text(), nullable=True),
|
|
240
|
-
sa.Column("variant_label", sa.Text(), nullable=True),
|
|
241
|
-
sa.Column("metric", sa.Text(), nullable=True),
|
|
242
|
-
sa.Column("region", sa.Text(), nullable=True),
|
|
243
|
-
sa.Column("statistic", sa.Text(), nullable=True),
|
|
244
238
|
sa.ForeignKeyConstraint(
|
|
245
239
|
["execution_id"], ["execution.id"], name=op.f("fk_metric_value_execution_id_execution")
|
|
246
240
|
),
|
|
247
241
|
sa.PrimaryKeyConstraint("id", name=op.f("pk_metric_value")),
|
|
248
242
|
)
|
|
249
|
-
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
250
|
-
batch_op.create_index(batch_op.f("ix_metric_value_metric"), ["metric"], unique=False)
|
|
251
|
-
batch_op.create_index(batch_op.f("ix_metric_value_model"), ["model"], unique=False)
|
|
252
|
-
batch_op.create_index(batch_op.f("ix_metric_value_region"), ["region"], unique=False)
|
|
253
|
-
batch_op.create_index(batch_op.f("ix_metric_value_source_id"), ["source_id"], unique=False)
|
|
254
|
-
batch_op.create_index(batch_op.f("ix_metric_value_statistic"), ["statistic"], unique=False)
|
|
255
|
-
batch_op.create_index(batch_op.f("ix_metric_value_variant_label"), ["variant_label"], unique=False)
|
|
256
243
|
|
|
257
244
|
# ### end Alembic commands ###
|
|
258
245
|
|
|
259
246
|
|
|
260
247
|
def downgrade() -> None:
|
|
261
248
|
# ### commands auto generated by Alembic - please adjust! ###
|
|
262
|
-
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
263
|
-
batch_op.drop_index(batch_op.f("ix_metric_value_variant_label"))
|
|
264
|
-
batch_op.drop_index(batch_op.f("ix_metric_value_statistic"))
|
|
265
|
-
batch_op.drop_index(batch_op.f("ix_metric_value_source_id"))
|
|
266
|
-
batch_op.drop_index(batch_op.f("ix_metric_value_region"))
|
|
267
|
-
batch_op.drop_index(batch_op.f("ix_metric_value_model"))
|
|
268
|
-
batch_op.drop_index(batch_op.f("ix_metric_value_metric"))
|
|
269
|
-
|
|
270
249
|
op.drop_table("metric_value")
|
|
271
250
|
with op.batch_alter_table("execution_output", schema=None) as batch_op:
|
|
272
251
|
batch_op.drop_index(batch_op.f("ix_execution_output_output_type"))
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""series-metric-value
|
|
2
|
+
|
|
3
|
+
Revision ID: 03dbb4998e49
|
|
4
|
+
Revises: 341a4aa2551e
|
|
5
|
+
Create Date: 2025-05-09 20:32:08.664426
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from collections.abc import Sequence
|
|
10
|
+
from typing import Union
|
|
11
|
+
|
|
12
|
+
import sqlalchemy as sa
|
|
13
|
+
from alembic import op
|
|
14
|
+
from sqlalchemy.dialects import postgresql
|
|
15
|
+
|
|
16
|
+
# revision identifiers, used by Alembic.
|
|
17
|
+
revision: str = "03dbb4998e49"
|
|
18
|
+
down_revision: Union[str, None] = "341a4aa2551e"
|
|
19
|
+
branch_labels: Union[str, Sequence[str], None] = None
|
|
20
|
+
depends_on: Union[str, Sequence[str], None] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upgrade() -> None:
|
|
24
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
25
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
26
|
+
batch_op.add_column(sa.Column("values", sa.JSON(), nullable=True))
|
|
27
|
+
batch_op.add_column(sa.Column("index", sa.JSON(), nullable=True))
|
|
28
|
+
batch_op.add_column(sa.Column("index_name", sa.String(), nullable=True))
|
|
29
|
+
batch_op.alter_column("value", existing_type=sa.FLOAT(), nullable=True)
|
|
30
|
+
|
|
31
|
+
if sa.inspect(op.get_bind()).dialect.name == "postgresql":
|
|
32
|
+
sa.Enum("SCALAR", "SERIES", name="metricvaluetype").create(op.get_bind())
|
|
33
|
+
op.add_column(
|
|
34
|
+
"metric_value",
|
|
35
|
+
sa.Column(
|
|
36
|
+
"type",
|
|
37
|
+
postgresql.ENUM("SCALAR", "SERIES", name="metricvaluetype", create_type=False),
|
|
38
|
+
nullable=False,
|
|
39
|
+
),
|
|
40
|
+
)
|
|
41
|
+
else:
|
|
42
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
43
|
+
batch_op.add_column(
|
|
44
|
+
sa.Column("type", sa.Enum("SCALAR", "SERIES", name="metricvaluetype"), nullable=False)
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def downgrade() -> None:
|
|
49
|
+
# ### commands auto generated by Alembic - please adjust! ###
|
|
50
|
+
with op.batch_alter_table("metric_value", schema=None) as batch_op:
|
|
51
|
+
batch_op.alter_column("value", existing_type=sa.FLOAT(), nullable=False)
|
|
52
|
+
batch_op.drop_column("index_name")
|
|
53
|
+
batch_op.drop_column("index")
|
|
54
|
+
batch_op.drop_column("values")
|
|
55
|
+
batch_op.drop_column("type")
|
|
56
|
+
|
|
57
|
+
# ### end Alembic commands ###
|
climate_ref/models/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ from climate_ref.models.execution import (
|
|
|
14
14
|
ExecutionGroup,
|
|
15
15
|
ExecutionOutput,
|
|
16
16
|
)
|
|
17
|
-
from climate_ref.models.metric_value import MetricValue
|
|
17
|
+
from climate_ref.models.metric_value import MetricValue, ScalarMetricValue, SeriesMetricValue
|
|
18
18
|
from climate_ref.models.provider import Provider
|
|
19
19
|
|
|
20
20
|
Table = TypeVar("Table", bound=Base)
|
|
@@ -29,5 +29,7 @@ __all__ = [
|
|
|
29
29
|
"ExecutionOutput",
|
|
30
30
|
"MetricValue",
|
|
31
31
|
"Provider",
|
|
32
|
+
"ScalarMetricValue",
|
|
33
|
+
"SeriesMetricValue",
|
|
32
34
|
"Table",
|
|
33
35
|
]
|
climate_ref/models/base.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import enum
|
|
2
|
+
from collections.abc import Mapping
|
|
1
3
|
from typing import TYPE_CHECKING, Any, ClassVar
|
|
2
4
|
|
|
3
5
|
from loguru import logger
|
|
4
|
-
from sqlalchemy import Column, ForeignKey, Text
|
|
6
|
+
from sqlalchemy import Column, ForeignKey, Text, event
|
|
5
7
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
6
8
|
|
|
7
9
|
from climate_ref.models.base import Base, CreatedUpdatedMixin
|
|
@@ -11,9 +13,23 @@ if TYPE_CHECKING:
|
|
|
11
13
|
from climate_ref.models.execution import Execution
|
|
12
14
|
|
|
13
15
|
|
|
16
|
+
class MetricValueType(enum.Enum):
|
|
17
|
+
"""
|
|
18
|
+
Type of metric value
|
|
19
|
+
|
|
20
|
+
This is used to determine how the metric value should be interpreted.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
# The value is a single number
|
|
24
|
+
SCALAR = "scalar"
|
|
25
|
+
|
|
26
|
+
# The value is a list of numbers
|
|
27
|
+
SERIES = "series"
|
|
28
|
+
|
|
29
|
+
|
|
14
30
|
class MetricValue(CreatedUpdatedMixin, Base):
|
|
15
31
|
"""
|
|
16
|
-
Represents a single
|
|
32
|
+
Represents a single metric value
|
|
17
33
|
|
|
18
34
|
This value has a number of dimensions which are used to query the diagnostic value.
|
|
19
35
|
These dimensions describe aspects such as the type of statistic being measured,
|
|
@@ -26,14 +42,24 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
26
42
|
|
|
27
43
|
__tablename__ = "metric_value"
|
|
28
44
|
|
|
45
|
+
__mapper_args__: ClassVar[Mapping[str, str]] = { # type: ignore
|
|
46
|
+
"polymorphic_on": "type",
|
|
47
|
+
}
|
|
48
|
+
|
|
29
49
|
id: Mapped[int] = mapped_column(primary_key=True)
|
|
30
50
|
execution_id: Mapped[int] = mapped_column(ForeignKey("execution.id"))
|
|
31
51
|
|
|
32
|
-
value: Mapped[float] = mapped_column()
|
|
33
52
|
attributes: Mapped[dict[str, Any]] = mapped_column()
|
|
34
53
|
|
|
35
54
|
execution: Mapped["Execution"] = relationship(back_populates="values")
|
|
36
55
|
|
|
56
|
+
type: Mapped[MetricValueType] = mapped_column()
|
|
57
|
+
"""
|
|
58
|
+
Type of metric value
|
|
59
|
+
|
|
60
|
+
This value is used to determine how the metric value should be interpreted.
|
|
61
|
+
"""
|
|
62
|
+
|
|
37
63
|
_cv_dimensions: ClassVar[list[str]] = []
|
|
38
64
|
|
|
39
65
|
@property
|
|
@@ -55,13 +81,7 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
55
81
|
return dims
|
|
56
82
|
|
|
57
83
|
def __repr__(self) -> str:
|
|
58
|
-
return
|
|
59
|
-
f"<MetricValue "
|
|
60
|
-
f"id={self.id} "
|
|
61
|
-
f"execution={self.execution} "
|
|
62
|
-
f"value={self.value} "
|
|
63
|
-
f"dimensions={self.dimensions}>"
|
|
64
|
-
)
|
|
84
|
+
return f"<MetricValue id={self.id} execution={self.execution} dimensions={self.dimensions}>"
|
|
65
85
|
|
|
66
86
|
@staticmethod
|
|
67
87
|
def build_dimension_column(dimension: Dimension) -> Column[str]:
|
|
@@ -143,7 +163,22 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
143
163
|
for key in keys:
|
|
144
164
|
cls._cv_dimensions.remove(key)
|
|
145
165
|
|
|
146
|
-
assert not len(cls._cv_dimensions)
|
|
166
|
+
assert not len(cls._cv_dimensions)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class ScalarMetricValue(MetricValue):
|
|
170
|
+
"""
|
|
171
|
+
A scalar value with an associated dimensions
|
|
172
|
+
|
|
173
|
+
This is a subclass of MetricValue that is used to represent a scalar value.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
__mapper_args__: ClassVar[Mapping[str, Any]] = { # type: ignore
|
|
177
|
+
"polymorphic_identity": MetricValueType.SCALAR,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# This is a scalar value
|
|
181
|
+
value: Mapped[float] = mapped_column(nullable=True)
|
|
147
182
|
|
|
148
183
|
@classmethod
|
|
149
184
|
def build(
|
|
@@ -158,7 +193,7 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
158
193
|
Build a MetricValue from a collection of dimensions and a value
|
|
159
194
|
|
|
160
195
|
This is a helper method that validates the dimensions supplied and provides an interface
|
|
161
|
-
similar to [climate_ref_core.
|
|
196
|
+
similar to [climate_ref_core.metric_values.ScalarMetricValue][].
|
|
162
197
|
|
|
163
198
|
Parameters
|
|
164
199
|
----------
|
|
@@ -187,9 +222,99 @@ class MetricValue(CreatedUpdatedMixin, Base):
|
|
|
187
222
|
if k not in cls._cv_dimensions:
|
|
188
223
|
raise KeyError(f"Unknown dimension column '{k}'")
|
|
189
224
|
|
|
190
|
-
return
|
|
225
|
+
return ScalarMetricValue(
|
|
191
226
|
execution_id=execution_id,
|
|
192
227
|
value=value,
|
|
193
228
|
attributes=attributes,
|
|
194
229
|
**dimensions,
|
|
195
230
|
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class SeriesMetricValue(MetricValue):
|
|
234
|
+
"""
|
|
235
|
+
A scalar value with an associated dimensions
|
|
236
|
+
|
|
237
|
+
This is a subclass of MetricValue that is used to represent a scalar value.
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
__mapper_args__: ClassVar[Mapping[str, Any]] = { # type: ignore
|
|
241
|
+
"polymorphic_identity": MetricValueType.SERIES,
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
# This is a scalar value
|
|
245
|
+
values: Mapped[list[float | int]] = mapped_column(nullable=True)
|
|
246
|
+
index: Mapped[list[float | int | str]] = mapped_column(nullable=True)
|
|
247
|
+
index_name: Mapped[str] = mapped_column(nullable=True)
|
|
248
|
+
|
|
249
|
+
@classmethod
|
|
250
|
+
def build( # noqa: PLR0913
|
|
251
|
+
cls,
|
|
252
|
+
*,
|
|
253
|
+
execution_id: int,
|
|
254
|
+
values: list[float | int],
|
|
255
|
+
index: list[float | int | str],
|
|
256
|
+
index_name: str,
|
|
257
|
+
dimensions: dict[str, str],
|
|
258
|
+
attributes: dict[str, Any] | None,
|
|
259
|
+
) -> "MetricValue":
|
|
260
|
+
"""
|
|
261
|
+
Build a database object from a series
|
|
262
|
+
|
|
263
|
+
Parameters
|
|
264
|
+
----------
|
|
265
|
+
execution_id
|
|
266
|
+
Execution that created the diagnostic value
|
|
267
|
+
values
|
|
268
|
+
1-d array of values
|
|
269
|
+
index
|
|
270
|
+
1-d array of index values
|
|
271
|
+
index_name
|
|
272
|
+
Name of the index. Used for presentation purposes
|
|
273
|
+
dimensions
|
|
274
|
+
Dimensions that describe the diagnostic execution result
|
|
275
|
+
attributes
|
|
276
|
+
Optional additional attributes to describe the value,
|
|
277
|
+
but are not in the controlled vocabulary.
|
|
278
|
+
|
|
279
|
+
Raises
|
|
280
|
+
------
|
|
281
|
+
KeyError
|
|
282
|
+
If an unknown dimension was supplied.
|
|
283
|
+
|
|
284
|
+
Dimensions must exist in the controlled vocabulary.
|
|
285
|
+
ValueError
|
|
286
|
+
If the length of values and index do not match.
|
|
287
|
+
|
|
288
|
+
Returns
|
|
289
|
+
-------
|
|
290
|
+
Newly created MetricValue
|
|
291
|
+
"""
|
|
292
|
+
for k in dimensions:
|
|
293
|
+
if k not in cls._cv_dimensions:
|
|
294
|
+
raise KeyError(f"Unknown dimension column '{k}'")
|
|
295
|
+
|
|
296
|
+
if len(values) != len(index):
|
|
297
|
+
raise ValueError(f"Index length ({len(index)}) must match values length ({len(values)})")
|
|
298
|
+
|
|
299
|
+
return SeriesMetricValue(
|
|
300
|
+
execution_id=execution_id,
|
|
301
|
+
values=values,
|
|
302
|
+
index=index,
|
|
303
|
+
index_name=index_name,
|
|
304
|
+
attributes=attributes,
|
|
305
|
+
**dimensions,
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
@event.listens_for(SeriesMetricValue, "before_insert")
|
|
310
|
+
@event.listens_for(SeriesMetricValue, "before_update")
|
|
311
|
+
def validate_series_lengths(mapper: Any, connection: Any, target: SeriesMetricValue) -> None:
|
|
312
|
+
"""
|
|
313
|
+
Validate that values and index have matching lengths
|
|
314
|
+
|
|
315
|
+
This is done on insert and update to ensure that the database is consistent.
|
|
316
|
+
"""
|
|
317
|
+
if target.values is not None and target.index is not None and len(target.values) != len(target.index):
|
|
318
|
+
raise ValueError(
|
|
319
|
+
f"Index length ({len(target.index)}) must match values length ({len(target.values)})"
|
|
320
|
+
)
|
climate_ref/provider_registry.py
CHANGED