dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,1025 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import csv
|
|
3
|
+
import importlib
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime, timedelta
|
|
7
|
+
from typing import Any, Union, Literal
|
|
8
|
+
import copy
|
|
9
|
+
|
|
10
|
+
from pydantic import field_serializer, field_validator, model_validator, Field, ValidationInfo
|
|
11
|
+
from pydantic.functional_validators import BeforeValidator
|
|
12
|
+
from typing_extensions import Annotated
|
|
13
|
+
|
|
14
|
+
from dsgrid.data_models import DSGBaseDatabaseModel, DSGBaseModel
|
|
15
|
+
from dsgrid.dimension.base_models import DimensionType, DimensionCategory
|
|
16
|
+
from dsgrid.dimension.time import (
|
|
17
|
+
TimeIntervalType,
|
|
18
|
+
MeasurementType,
|
|
19
|
+
TimeDimensionType,
|
|
20
|
+
RepresentativePeriodFormat,
|
|
21
|
+
TimeZoneFormat,
|
|
22
|
+
)
|
|
23
|
+
from dsgrid.time.types import DatetimeTimestampType
|
|
24
|
+
from dsgrid.registry.common import REGEX_VALID_REGISTRY_NAME
|
|
25
|
+
from dsgrid.utils.files import compute_file_hash
|
|
26
|
+
from dsgrid.utils.utilities import convert_record_dicts_to_classes
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DimensionBaseModel(DSGBaseDatabaseModel):
|
|
33
|
+
"""Common attributes for all dimensions"""
|
|
34
|
+
|
|
35
|
+
name: str = Field(
|
|
36
|
+
title="name",
|
|
37
|
+
description="Dimension name",
|
|
38
|
+
)
|
|
39
|
+
dimension_type: DimensionType = Field(
|
|
40
|
+
title="dimension_type",
|
|
41
|
+
alias="type",
|
|
42
|
+
description="Type of the dimension",
|
|
43
|
+
json_schema_extra={
|
|
44
|
+
"options": DimensionType.format_for_docs(),
|
|
45
|
+
},
|
|
46
|
+
)
|
|
47
|
+
dimension_id: str | None = Field(
|
|
48
|
+
default=None,
|
|
49
|
+
title="dimension_id",
|
|
50
|
+
description="Unique identifier, generated by dsgrid",
|
|
51
|
+
json_schema_extra={
|
|
52
|
+
"dsg_internal": True,
|
|
53
|
+
"updateable": False,
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
module: str = Field(
|
|
57
|
+
title="module",
|
|
58
|
+
description="Python module with the dimension class",
|
|
59
|
+
default="dsgrid.dimension.standard",
|
|
60
|
+
)
|
|
61
|
+
class_name: str = Field(
|
|
62
|
+
title="class_name",
|
|
63
|
+
description="Dimension record model class name. "
|
|
64
|
+
"The dimension class defines the expected and allowable fields (and their data types)"
|
|
65
|
+
" for the dimension records file."
|
|
66
|
+
"All dimension records must have a 'id' and 'name' field."
|
|
67
|
+
"Some dimension classes support additional fields that can be used for mapping,"
|
|
68
|
+
" querying, display, etc."
|
|
69
|
+
"dsgrid in online-mode only supports dimension classes defined in the"
|
|
70
|
+
" :mod:`dsgrid.dimension.standard` module. If dsgrid does not currently support a"
|
|
71
|
+
" dimension class that you require, please contact the dsgrid-coordination team to"
|
|
72
|
+
" request a new class feature",
|
|
73
|
+
alias="class",
|
|
74
|
+
)
|
|
75
|
+
cls: Any = Field(
|
|
76
|
+
default=None,
|
|
77
|
+
title="cls",
|
|
78
|
+
description="Dimension record model class",
|
|
79
|
+
alias="dimension_class",
|
|
80
|
+
json_schema_extra={
|
|
81
|
+
"dsgrid_internal": True,
|
|
82
|
+
},
|
|
83
|
+
)
|
|
84
|
+
description: str | None = Field(
|
|
85
|
+
default=None,
|
|
86
|
+
title="description",
|
|
87
|
+
description="A description of the dimension records that is helpful, memorable, and "
|
|
88
|
+
"identifiable",
|
|
89
|
+
)
|
|
90
|
+
id: int | None = Field(
|
|
91
|
+
default=None,
|
|
92
|
+
description="Registry database ID",
|
|
93
|
+
json_schema_extra={
|
|
94
|
+
"dsgrid_internal": True,
|
|
95
|
+
},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
@field_validator("name")
|
|
99
|
+
@classmethod
|
|
100
|
+
def check_name(cls, name: str) -> str:
|
|
101
|
+
if REGEX_VALID_REGISTRY_NAME.search(name) is None:
|
|
102
|
+
msg = f"dimension name={name} does not meet the requirements"
|
|
103
|
+
raise ValueError(msg)
|
|
104
|
+
return name
|
|
105
|
+
|
|
106
|
+
@field_validator("module")
|
|
107
|
+
@classmethod
|
|
108
|
+
def check_module(cls, module) -> "DimensionBaseModel":
|
|
109
|
+
if not module.startswith("dsgrid"):
|
|
110
|
+
msg = "Only dsgrid modules are supported as a dimension module."
|
|
111
|
+
raise ValueError(msg)
|
|
112
|
+
return module
|
|
113
|
+
|
|
114
|
+
@field_validator("class_name")
|
|
115
|
+
@classmethod
|
|
116
|
+
def get_dimension_class_name(cls, class_name, info: ValidationInfo):
|
|
117
|
+
"""Set class_name based on inputs."""
|
|
118
|
+
if "module" not in info.data:
|
|
119
|
+
return class_name
|
|
120
|
+
|
|
121
|
+
mod = importlib.import_module(info.data["module"])
|
|
122
|
+
if not hasattr(mod, class_name):
|
|
123
|
+
if class_name is None:
|
|
124
|
+
msg = (
|
|
125
|
+
f'There is no class "{class_name}" in module: {mod}.'
|
|
126
|
+
"\nIf you are using a unique dimension name, you must "
|
|
127
|
+
"specify the dimension class."
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
msg = f"dimension class {class_name} not in {mod}"
|
|
131
|
+
raise ValueError(msg)
|
|
132
|
+
|
|
133
|
+
return class_name
|
|
134
|
+
|
|
135
|
+
@field_validator("cls")
|
|
136
|
+
@classmethod
|
|
137
|
+
def get_dimension_class(cls, dim_class, info: ValidationInfo):
|
|
138
|
+
if "module" not in info.data or "class_name" not in info.data:
|
|
139
|
+
return dim_class
|
|
140
|
+
|
|
141
|
+
if dim_class is not None:
|
|
142
|
+
msg = f"cls={dim_class} should not be set"
|
|
143
|
+
raise ValueError(msg)
|
|
144
|
+
|
|
145
|
+
return getattr(
|
|
146
|
+
importlib.import_module(info.data["module"]),
|
|
147
|
+
info.data["class_name"],
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
@property
|
|
151
|
+
def label(self) -> str:
|
|
152
|
+
"""Return a label for the dimension to be used in user messages."""
|
|
153
|
+
return f"{self.dimension_type} {self.name}"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class DimensionModel(DimensionBaseModel):
|
|
157
|
+
"""Defines a non-time dimension"""
|
|
158
|
+
|
|
159
|
+
filename: str | None = Field(
|
|
160
|
+
title="filename",
|
|
161
|
+
alias="file",
|
|
162
|
+
default=None,
|
|
163
|
+
description="Filename containing dimension records. Only assigned for user input and "
|
|
164
|
+
"output purposes. The registry database stores records in the dimension JSON document.",
|
|
165
|
+
)
|
|
166
|
+
file_hash: str | None = Field(
|
|
167
|
+
title="file_hash",
|
|
168
|
+
description="Hash of the contents of the file",
|
|
169
|
+
json_schema_extra={
|
|
170
|
+
"dsgrid_internal": True,
|
|
171
|
+
},
|
|
172
|
+
default=None,
|
|
173
|
+
)
|
|
174
|
+
records: list = Field(
|
|
175
|
+
title="records",
|
|
176
|
+
description="Dimension records that can either be loaded from filename at "
|
|
177
|
+
"runtime or provided directly. Example of records provided directly:\n"
|
|
178
|
+
"records: [\n"
|
|
179
|
+
" {id: 'scenario_1', name: 'Scenario 1'},\n"
|
|
180
|
+
" {id: 'scenario_2', name: 'Scenario 2'},\n"
|
|
181
|
+
"],",
|
|
182
|
+
default=[],
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
@field_validator("filename")
|
|
186
|
+
@classmethod
|
|
187
|
+
def check_file(cls, filename: str) -> str:
|
|
188
|
+
"""Validate that dimension file exists and has no errors"""
|
|
189
|
+
if filename is not None:
|
|
190
|
+
if not os.path.isfile(filename):
|
|
191
|
+
msg = f"file {filename} does not exist"
|
|
192
|
+
raise ValueError(msg)
|
|
193
|
+
if filename.startswith("s3://"):
|
|
194
|
+
msg = "records must exist in the local filesystem, not on S3"
|
|
195
|
+
raise ValueError(msg)
|
|
196
|
+
if not filename.endswith(".csv"):
|
|
197
|
+
msg = f"only CSV is supported: {filename}"
|
|
198
|
+
raise ValueError(msg)
|
|
199
|
+
|
|
200
|
+
return filename
|
|
201
|
+
|
|
202
|
+
@field_validator("file_hash")
|
|
203
|
+
@classmethod
|
|
204
|
+
def compute_file_hash(cls, file_hash: str, info: ValidationInfo) -> str:
|
|
205
|
+
if info.data.get("filename") is None:
|
|
206
|
+
return file_hash
|
|
207
|
+
|
|
208
|
+
if file_hash is None:
|
|
209
|
+
file_hash = compute_file_hash(info.data["filename"])
|
|
210
|
+
return file_hash
|
|
211
|
+
|
|
212
|
+
@field_validator("records")
|
|
213
|
+
@classmethod
|
|
214
|
+
def add_records(
|
|
215
|
+
cls, records: list[dict[str, Any]], info: ValidationInfo
|
|
216
|
+
) -> list[dict[str, Any]]:
|
|
217
|
+
"""Add records from the file."""
|
|
218
|
+
dim_class = info.data.get("cls")
|
|
219
|
+
if "filename" not in info.data or dim_class is None:
|
|
220
|
+
return records
|
|
221
|
+
|
|
222
|
+
if records:
|
|
223
|
+
if isinstance(records[0], dict):
|
|
224
|
+
records = convert_record_dicts_to_classes(
|
|
225
|
+
records, dim_class, check_duplicates=["id"]
|
|
226
|
+
)
|
|
227
|
+
return records
|
|
228
|
+
|
|
229
|
+
with open(info.data["filename"], encoding="utf-8-sig") as f_in:
|
|
230
|
+
records = convert_record_dicts_to_classes(
|
|
231
|
+
csv.DictReader(f_in), dim_class, check_duplicates=["id"]
|
|
232
|
+
)
|
|
233
|
+
return records
|
|
234
|
+
|
|
235
|
+
@field_serializer("cls", "filename")
|
|
236
|
+
def serialize_cls(self, val: str, _) -> None:
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
class TimeFormatDateTimeTZModel(DSGBaseModel):
|
|
241
|
+
"""Format of timestamps in a dataset is timezone-aware datetime."""
|
|
242
|
+
|
|
243
|
+
dtype: Literal["TIMESTAMP_TZ"] = "TIMESTAMP_TZ"
|
|
244
|
+
time_column: str = Field(
|
|
245
|
+
title="time_column",
|
|
246
|
+
description="Name of the timestamp column in the dataset.",
|
|
247
|
+
default=next(iter(DatetimeTimestampType._fields)),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
def get_time_columns(self) -> list[str]:
|
|
251
|
+
return [self.time_column]
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
class TimeFormatDateTimeNTZModel(DSGBaseModel):
|
|
255
|
+
"""Format of timestamps in a dataset is timezone-naive datetime,
|
|
256
|
+
requiring localization to time zones."""
|
|
257
|
+
|
|
258
|
+
dtype: Literal["TIMESTAMP_NTZ"] = "TIMESTAMP_NTZ"
|
|
259
|
+
time_column: str = Field(
|
|
260
|
+
title="time_column",
|
|
261
|
+
description="Name of the timestamp column in the dataset.",
|
|
262
|
+
default=next(iter(DatetimeTimestampType._fields)),
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
def get_time_columns(self) -> list[str]:
|
|
266
|
+
return [self.time_column]
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class TimeFormatInPartsModel(DSGBaseModel):
|
|
270
|
+
"""Format of timestamps in a dataset is in parts, e.g., month-day-hour format,
|
|
271
|
+
requiring conversion to datetime."""
|
|
272
|
+
|
|
273
|
+
dtype: Literal["time_format_in_parts"] = "time_format_in_parts"
|
|
274
|
+
# TODO: we may allow more columns to be None
|
|
275
|
+
year_column: str = Field(
|
|
276
|
+
title="year_column",
|
|
277
|
+
description="Name of the year column in the dataset.",
|
|
278
|
+
)
|
|
279
|
+
month_column: str = Field(
|
|
280
|
+
title="month_column",
|
|
281
|
+
description="Name of the month column in the dataset. Value is the month in a year (1 - 12)",
|
|
282
|
+
)
|
|
283
|
+
day_column: str = Field(
|
|
284
|
+
title="day_column",
|
|
285
|
+
description="Name of the day column in the dataset. Value is the day in a month (1 - 31).",
|
|
286
|
+
)
|
|
287
|
+
hour_column: str | None = Field(
|
|
288
|
+
title="hour_column",
|
|
289
|
+
description="Name of the hour column in the dataset. Value is the hour in a day (0 - 23). "
|
|
290
|
+
"If None, the hour will be set to 0 for all rows.",
|
|
291
|
+
default=None,
|
|
292
|
+
)
|
|
293
|
+
time_zone: str | None = Field(
|
|
294
|
+
default=None,
|
|
295
|
+
title="time_zone",
|
|
296
|
+
description="IANA time zone of the timestamps. Use None for time zone-naive timestamps.",
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def get_time_columns(self) -> list[str]:
|
|
300
|
+
cols = [self.year_column, self.month_column, self.day_column, self.hour_column]
|
|
301
|
+
return [col for col in cols if col is not None]
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
DateTimeFormat = Annotated[
|
|
305
|
+
TimeFormatDateTimeTZModel | TimeFormatDateTimeNTZModel | TimeFormatInPartsModel,
|
|
306
|
+
Field(discriminator="dtype"),
|
|
307
|
+
]
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class TimeRangeModel(DSGBaseModel):
|
|
311
|
+
"""Defines a continuous range of time."""
|
|
312
|
+
|
|
313
|
+
# This uses str instead of datetime because this object doesn't have the ability
|
|
314
|
+
# to serialize/deserialize by itself.
|
|
315
|
+
# We use the DatetimeRange object during processing.
|
|
316
|
+
start: str = Field(
|
|
317
|
+
title="start",
|
|
318
|
+
description="First timestamp in the data",
|
|
319
|
+
)
|
|
320
|
+
end: str = Field(
|
|
321
|
+
title="end",
|
|
322
|
+
description="Last timestamp in the data (inclusive)",
|
|
323
|
+
)
|
|
324
|
+
str_format: str = Field(
|
|
325
|
+
title="str_format",
|
|
326
|
+
default="%Y-%m-%d %H:%M:%S",
|
|
327
|
+
description="Timestamp string format (for parsing the time ranges). "
|
|
328
|
+
"The string format is used to parse the timestamps provided in the time ranges."
|
|
329
|
+
"Cheatsheet reference: `<https://strftime.org/>`_.",
|
|
330
|
+
)
|
|
331
|
+
frequency: timedelta = Field(
|
|
332
|
+
title="frequency",
|
|
333
|
+
default=timedelta(hours=1),
|
|
334
|
+
description="Resolution of the timestamps",
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
class AnnualRangeModel(DSGBaseModel):
|
|
339
|
+
"""Defines a continuous range of annual time."""
|
|
340
|
+
|
|
341
|
+
start: str = Field(
|
|
342
|
+
title="start",
|
|
343
|
+
description="First year in the data",
|
|
344
|
+
)
|
|
345
|
+
end: str = Field(
|
|
346
|
+
title="end",
|
|
347
|
+
description="Last year in the data (inclusive)",
|
|
348
|
+
)
|
|
349
|
+
str_format: str = Field(
|
|
350
|
+
title="str_format",
|
|
351
|
+
default="%Y",
|
|
352
|
+
description="Timestamp string format. "
|
|
353
|
+
"The string format is used to parse the timestamps provided in the time ranges. "
|
|
354
|
+
"Cheatsheet reference: `<https://strftime.org/>`_.",
|
|
355
|
+
)
|
|
356
|
+
frequency: int = Field(
|
|
357
|
+
title="frequency",
|
|
358
|
+
default=1,
|
|
359
|
+
description="Resolution of the annual time in number of years",
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class MonthRangeModel(DSGBaseModel):
|
|
364
|
+
"""Defines a continuous range of time."""
|
|
365
|
+
|
|
366
|
+
# This uses str instead of datetime because this object doesn't have the ability
|
|
367
|
+
# to serialize/deserialize by itself.
|
|
368
|
+
# We use the DatetimeRange object during processing.
|
|
369
|
+
start: int = Field(
|
|
370
|
+
title="start",
|
|
371
|
+
description="First month in the data (January is 1, December is 12)",
|
|
372
|
+
)
|
|
373
|
+
end: int = Field(
|
|
374
|
+
title="end",
|
|
375
|
+
description="Last month in the data (inclusive)",
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
class IndexRangeModel(DSGBaseModel):
|
|
380
|
+
"""Defines a continuous range of indices."""
|
|
381
|
+
|
|
382
|
+
start: int = Field(
|
|
383
|
+
title="start",
|
|
384
|
+
description="First of indices",
|
|
385
|
+
)
|
|
386
|
+
end: int = Field(
|
|
387
|
+
title="end",
|
|
388
|
+
description="Last of indices (inclusive)",
|
|
389
|
+
)
|
|
390
|
+
starting_timestamp: str = Field(
|
|
391
|
+
title="starting timestamp",
|
|
392
|
+
description="Timestamp the start index corresponds to.",
|
|
393
|
+
)
|
|
394
|
+
str_format: str = Field(
|
|
395
|
+
title="str_format",
|
|
396
|
+
default="%Y-%m-%d %H:%M:%S",
|
|
397
|
+
description="Timestamp string format. "
|
|
398
|
+
"The string format is used to parse the starting timestamp provided. "
|
|
399
|
+
"Cheatsheet reference: `<https://strftime.org/>`_.",
|
|
400
|
+
)
|
|
401
|
+
frequency: timedelta = Field(
|
|
402
|
+
title="frequency",
|
|
403
|
+
default=timedelta(hours=1),
|
|
404
|
+
description="Resolution of the timestamps for which the index range represents.",
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
class TimeDimensionBaseModel(DimensionBaseModel, abc.ABC):
|
|
409
|
+
"""Defines a base model common to all time dimensions."""
|
|
410
|
+
|
|
411
|
+
time_type: TimeDimensionType = Field(
|
|
412
|
+
title="time_type",
|
|
413
|
+
default=TimeDimensionType.DATETIME,
|
|
414
|
+
description="Type of time dimension",
|
|
415
|
+
json_schema_extra={
|
|
416
|
+
"options": TimeDimensionType.format_for_docs(),
|
|
417
|
+
},
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
@field_serializer("cls")
|
|
421
|
+
def serialize_cls(self, val, _):
|
|
422
|
+
return None
|
|
423
|
+
|
|
424
|
+
@abc.abstractmethod
|
|
425
|
+
def is_time_zone_required_in_geography(self):
|
|
426
|
+
"""Returns True if the geography dimension records must contain a time_zone column."""
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
class AlignedTimeSingleTimeZone(DSGBaseModel):
|
|
430
|
+
"""For each geography, data has the same set of timestamps in absolute time.
|
|
431
|
+
Timestamps in the data must be tz-aware.
|
|
432
|
+
|
|
433
|
+
E.g., data in CA and NY both start in 2018-01-01 00:00 EST.
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
format_type: Literal[
|
|
437
|
+
TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME
|
|
438
|
+
] = TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME
|
|
439
|
+
time_zone: str = Field(
|
|
440
|
+
title="time_zone",
|
|
441
|
+
description="IANA time zone of data",
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
@model_validator(mode="before")
|
|
445
|
+
@classmethod
|
|
446
|
+
def handle_legacy_fields(cls, values):
|
|
447
|
+
if values.get("format_type") == "aligned":
|
|
448
|
+
logger.warning(
|
|
449
|
+
"Renaming legacy format_type 'aligned' to 'aligned_in_absolute_time' within the datetime config time_zone_format parameter."
|
|
450
|
+
)
|
|
451
|
+
values["format_type"] = TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value
|
|
452
|
+
|
|
453
|
+
if "timezone" in values:
|
|
454
|
+
logger.warning(
|
|
455
|
+
"Renaming legacy timezone field to time_zone within the aligned_in_absolute_time single time zone time_zone_format."
|
|
456
|
+
)
|
|
457
|
+
values["time_zone"] = values.pop("timezone")
|
|
458
|
+
return values
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
class LocalTimeMultipleTimeZones(DSGBaseModel):
|
|
462
|
+
"""For each geography, data has the same set of timestamps when interpreted as local clock time by adjusting
|
|
463
|
+
for the time zone of each geography.
|
|
464
|
+
Timestamps in the data must be tz-aware.
|
|
465
|
+
|
|
466
|
+
E.g., data in CA may start in 2018-01-01 00:00 PST while data in NY may start in 2018-01-01 00:00 EST.
|
|
467
|
+
They are aligned in clock time but not in absolute time.
|
|
468
|
+
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
format_type: Literal[
|
|
472
|
+
TimeZoneFormat.ALIGNED_IN_CLOCK_TIME
|
|
473
|
+
] = TimeZoneFormat.ALIGNED_IN_CLOCK_TIME
|
|
474
|
+
time_zones: list[str] = Field(
|
|
475
|
+
title="time_zones",
|
|
476
|
+
description="List of unique IANA time zones in the dataset",
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
class DateTimeDimensionModel(TimeDimensionBaseModel):
|
|
481
|
+
"""Defines a time dimension where timestamps translate to datetime objects."""
|
|
482
|
+
|
|
483
|
+
column_format: DateTimeFormat = Field(
|
|
484
|
+
default=TimeFormatDateTimeTZModel(),
|
|
485
|
+
title="time_format",
|
|
486
|
+
description="Specifies the format of the timestamps in the dataset.",
|
|
487
|
+
)
|
|
488
|
+
time_zone_format: Union[AlignedTimeSingleTimeZone, LocalTimeMultipleTimeZones] = Field(
|
|
489
|
+
title="time_zone_format",
|
|
490
|
+
discriminator="format_type",
|
|
491
|
+
description="Specifies whether timestamps are aligned in absolute time or in local time when adjusted for time zone.",
|
|
492
|
+
)
|
|
493
|
+
|
|
494
|
+
measurement_type: MeasurementType = Field(
|
|
495
|
+
title="measurement_type",
|
|
496
|
+
default=MeasurementType.TOTAL,
|
|
497
|
+
description="""
|
|
498
|
+
The type of measurement represented by a value associated with a timestamp:
|
|
499
|
+
mean, min, max, measured, total
|
|
500
|
+
""",
|
|
501
|
+
json_schema_extra={
|
|
502
|
+
"options": MeasurementType.format_for_docs(),
|
|
503
|
+
},
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
ranges: list[TimeRangeModel] = Field(
|
|
507
|
+
title="time_ranges",
|
|
508
|
+
description="Defines the continuous ranges of datetime in the data, inclusive of start and end time.",
|
|
509
|
+
)
|
|
510
|
+
time_interval_type: TimeIntervalType = Field(
|
|
511
|
+
title="time_interval",
|
|
512
|
+
description="The range of time that the value associated with a timestamp represents, e.g., period-beginning",
|
|
513
|
+
json_schema_extra={
|
|
514
|
+
"options": TimeIntervalType.format_descriptions_for_docs(),
|
|
515
|
+
},
|
|
516
|
+
)
|
|
517
|
+
time_column: str = Field(
|
|
518
|
+
title="time_column",
|
|
519
|
+
description="Name of time column in the dataframe. It should be updated during the query process to reflect "
|
|
520
|
+
"any changes to the dataframe time column.",
|
|
521
|
+
default=next(iter(DatetimeTimestampType._fields)),
|
|
522
|
+
)
|
|
523
|
+
localize_to_time_zone: bool = Field(
|
|
524
|
+
title="localize_to_time_zone",
|
|
525
|
+
default=True,
|
|
526
|
+
description="Whether to localize timestamps to time zone(s). If True, timestamps in the dataframe must be tz-naive.",
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
@model_validator(mode="before")
|
|
530
|
+
@classmethod
|
|
531
|
+
def handle_legacy_fields(cls, values):
|
|
532
|
+
if "leap_day_adjustment" in values:
|
|
533
|
+
if values["leap_day_adjustment"] != "none":
|
|
534
|
+
msg = f"Unknown data_schema format: {values=}"
|
|
535
|
+
raise ValueError(msg)
|
|
536
|
+
logger.warning(
|
|
537
|
+
"Dropping deprecated leap_day_adjustment field from the datetime config."
|
|
538
|
+
)
|
|
539
|
+
values.pop("leap_day_adjustment")
|
|
540
|
+
|
|
541
|
+
if "datetime_format" in values:
|
|
542
|
+
logger.warning(
|
|
543
|
+
"Moving legacy datetime_format field to new time_zone_format struct within the datetime config."
|
|
544
|
+
)
|
|
545
|
+
datetime_format = values.pop("datetime_format")
|
|
546
|
+
values["time_zone_format"] = datetime_format
|
|
547
|
+
|
|
548
|
+
if "timezone" in values:
|
|
549
|
+
logger.warning(
|
|
550
|
+
"Renaming legacy timezone field to time_zone and moving it to new time_zone_format struct within the datetime config."
|
|
551
|
+
)
|
|
552
|
+
time_zone = values.pop("timezone")
|
|
553
|
+
if "time_zone_format" in values:
|
|
554
|
+
if isinstance(values["time_zone_format"], dict):
|
|
555
|
+
assert (
|
|
556
|
+
values["time_zone_format"].get("format_type")
|
|
557
|
+
== TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value
|
|
558
|
+
)
|
|
559
|
+
values["time_zone_format"]["time_zone"] = time_zone
|
|
560
|
+
elif isinstance(values["time_zone_format"], AlignedTimeSingleTimeZone):
|
|
561
|
+
assert (
|
|
562
|
+
values["time_zone_format"].format_type
|
|
563
|
+
== TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME
|
|
564
|
+
)
|
|
565
|
+
values["time_zone_format"].time_zone = time_zone
|
|
566
|
+
elif isinstance(values["time_zone_format"], LocalTimeMultipleTimeZones):
|
|
567
|
+
msg = "Cannot set single time_zone for LocalTimeMultipleTimeZones time_zone_format."
|
|
568
|
+
raise ValueError(msg)
|
|
569
|
+
else:
|
|
570
|
+
msg = f"Unexpected time_zone_format type: {values['time_zone_format']}"
|
|
571
|
+
raise ValueError(msg)
|
|
572
|
+
else:
|
|
573
|
+
values["time_zone_format"] = {
|
|
574
|
+
"format_type": TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value,
|
|
575
|
+
"time_zone": time_zone,
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
if "time_zone_format" in values:
|
|
579
|
+
if isinstance(values["time_zone_format"], dict):
|
|
580
|
+
if values["time_zone_format"].get("format_type") == "aligned":
|
|
581
|
+
logger.warning(
|
|
582
|
+
"Renaming legacy format_type 'aligned' to 'aligned_in_absolute_time' within the datetime config."
|
|
583
|
+
)
|
|
584
|
+
values["time_zone_format"][
|
|
585
|
+
"format_type"
|
|
586
|
+
] = TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value
|
|
587
|
+
elif isinstance(values["time_zone_format"], AlignedTimeSingleTimeZone):
|
|
588
|
+
# already correct
|
|
589
|
+
pass
|
|
590
|
+
elif isinstance(values["time_zone_format"], LocalTimeMultipleTimeZones):
|
|
591
|
+
# already correct
|
|
592
|
+
pass
|
|
593
|
+
else:
|
|
594
|
+
msg = f"Unexpected time_zone_format type: {values['time_zone_format']}"
|
|
595
|
+
raise ValueError(msg)
|
|
596
|
+
|
|
597
|
+
if "str_format" in values:
|
|
598
|
+
logger.warning(
|
|
599
|
+
"Moving legacy str_format field to ranges struct within the datetime config."
|
|
600
|
+
)
|
|
601
|
+
str_format = values.pop("str_format")
|
|
602
|
+
for trange in values.get("ranges", []):
|
|
603
|
+
if isinstance(trange, TimeRangeModel):
|
|
604
|
+
trange.str_format = str_format
|
|
605
|
+
elif isinstance(trange, dict):
|
|
606
|
+
trange["str_format"] = str_format
|
|
607
|
+
else:
|
|
608
|
+
msg = f"Unexpected ranges type: {type(trange)}"
|
|
609
|
+
raise ValueError(msg)
|
|
610
|
+
|
|
611
|
+
if "frequency" in values:
|
|
612
|
+
logger.warning(
|
|
613
|
+
"Moving legacy frequency field to ranges struct within the datetime config."
|
|
614
|
+
)
|
|
615
|
+
frequency = values.pop("frequency")
|
|
616
|
+
for trange in values.get("ranges", []):
|
|
617
|
+
if isinstance(trange, TimeRangeModel):
|
|
618
|
+
trange.frequency = frequency
|
|
619
|
+
elif isinstance(trange, dict):
|
|
620
|
+
trange["frequency"] = frequency
|
|
621
|
+
else:
|
|
622
|
+
msg = f"Unexpected ranges type: {type(trange)}"
|
|
623
|
+
raise ValueError(msg)
|
|
624
|
+
return values
|
|
625
|
+
|
|
626
|
+
# @model_validator(mode="after")
|
|
627
|
+
# def check_frequency(self) -> "DateTimeDimensionModel":
|
|
628
|
+
# if self.frequency in [timedelta(days=365), timedelta(days=366)]:
|
|
629
|
+
# raise ValueError(
|
|
630
|
+
# f"frequency={self.frequency}, datetime config does not allow 365 or 366 days frequency, "
|
|
631
|
+
# "use class=AnnualTime, time_type=annual to specify a year series."
|
|
632
|
+
# )
|
|
633
|
+
# return self
|
|
634
|
+
|
|
635
|
+
@field_validator("ranges")
|
|
636
|
+
@classmethod
|
|
637
|
+
def check_times(cls, ranges: list[TimeRangeModel]) -> list[TimeRangeModel]:
|
|
638
|
+
return _check_time_ranges(ranges)
|
|
639
|
+
|
|
640
|
+
def is_time_zone_required_in_geography(self) -> bool:
|
|
641
|
+
if self.time_zone_format.format_type == TimeZoneFormat.ALIGNED_IN_CLOCK_TIME:
|
|
642
|
+
return True
|
|
643
|
+
return False
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
class AnnualTimeDimensionModel(TimeDimensionBaseModel):
|
|
647
|
+
"""Defines an annual time dimension where timestamps are years.
|
|
648
|
+
Each value associated with a year represents the MEASUREMENT_TYPE over the entire year.
|
|
649
|
+
i.e., MEASUREMENT_TYPE = total means the value is the total over the year, not over the range frequency.
|
|
650
|
+
"""
|
|
651
|
+
|
|
652
|
+
time_type: TimeDimensionType = Field(default=TimeDimensionType.ANNUAL)
|
|
653
|
+
measurement_type: MeasurementType = Field(
|
|
654
|
+
title="measurement_type",
|
|
655
|
+
default=MeasurementType.TOTAL,
|
|
656
|
+
description="""
|
|
657
|
+
The type of measurement represented by a value associated with an annual time:
|
|
658
|
+
e.g., total
|
|
659
|
+
""",
|
|
660
|
+
json_schema_extra={
|
|
661
|
+
"options": MeasurementType.format_for_docs(),
|
|
662
|
+
},
|
|
663
|
+
)
|
|
664
|
+
|
|
665
|
+
ranges: list[AnnualRangeModel] = Field(
|
|
666
|
+
default=[],
|
|
667
|
+
title="ranges",
|
|
668
|
+
description="Defines the contiguous ranges of annual time in the data, inclusive of start and end time.",
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
include_leap_day: bool = Field(
|
|
672
|
+
title="include_leap_day",
|
|
673
|
+
default=False,
|
|
674
|
+
description="Whether annual time includes leap day.",
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
@model_validator(mode="before")
|
|
678
|
+
@classmethod
|
|
679
|
+
def handle_legacy_fields(cls, values):
|
|
680
|
+
if "str_format" in values:
|
|
681
|
+
logger.warning(
|
|
682
|
+
"Moving legacy str_format field to ranges struct within the annual time config."
|
|
683
|
+
)
|
|
684
|
+
str_format = values.pop("str_format")
|
|
685
|
+
for trange in values.get("ranges", []):
|
|
686
|
+
if isinstance(trange, AnnualRangeModel):
|
|
687
|
+
trange.str_format = str_format
|
|
688
|
+
elif isinstance(trange, dict):
|
|
689
|
+
trange["str_format"] = str_format
|
|
690
|
+
else:
|
|
691
|
+
msg = f"Unexpected ranges type: {type(trange)}"
|
|
692
|
+
raise ValueError(msg)
|
|
693
|
+
|
|
694
|
+
return values
|
|
695
|
+
|
|
696
|
+
@field_validator("ranges")
|
|
697
|
+
@classmethod
|
|
698
|
+
def check_times(cls, ranges: list[AnnualRangeModel]) -> list[AnnualRangeModel]:
|
|
699
|
+
return _check_annual_ranges(ranges)
|
|
700
|
+
|
|
701
|
+
@field_validator("measurement_type")
|
|
702
|
+
@classmethod
|
|
703
|
+
def check_measurement_type(cls, measurement_type: MeasurementType) -> MeasurementType:
|
|
704
|
+
# This restriction exists because any other measurement type would require a frequency,
|
|
705
|
+
# and that isn't part of the model definition.
|
|
706
|
+
if measurement_type != MeasurementType.TOTAL:
|
|
707
|
+
msg = f"Annual time currently only supports MeasurementType total: {measurement_type}"
|
|
708
|
+
raise ValueError(msg)
|
|
709
|
+
return measurement_type
|
|
710
|
+
|
|
711
|
+
def is_time_zone_required_in_geography(self) -> bool:
|
|
712
|
+
return False
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
class RepresentativePeriodTimeDimensionModel(TimeDimensionBaseModel):
|
|
716
|
+
"""Defines a representative time dimension."""
|
|
717
|
+
|
|
718
|
+
time_type: TimeDimensionType = Field(default=TimeDimensionType.REPRESENTATIVE_PERIOD)
|
|
719
|
+
measurement_type: MeasurementType = Field(
|
|
720
|
+
title="measurement_type",
|
|
721
|
+
default=MeasurementType.TOTAL,
|
|
722
|
+
description="""
|
|
723
|
+
The type of measurement represented by a value associated with a timestamp:
|
|
724
|
+
e.g., mean, total
|
|
725
|
+
""",
|
|
726
|
+
json_schema_extra={
|
|
727
|
+
"options": MeasurementType.format_for_docs(),
|
|
728
|
+
},
|
|
729
|
+
)
|
|
730
|
+
format: RepresentativePeriodFormat = Field(
|
|
731
|
+
title="format",
|
|
732
|
+
description="Format of the timestamps in the load data",
|
|
733
|
+
)
|
|
734
|
+
ranges: list[MonthRangeModel] = Field(
|
|
735
|
+
title="ranges",
|
|
736
|
+
description="Defines the continuous ranges of datetime in the data, inclusive of start and end time.",
|
|
737
|
+
)
|
|
738
|
+
time_interval_type: TimeIntervalType = Field(
|
|
739
|
+
title="time_interval",
|
|
740
|
+
description="The range of time that the value associated with a timestamp represents",
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
def is_time_zone_required_in_geography(self) -> bool:
|
|
744
|
+
return True
|
|
745
|
+
|
|
746
|
+
|
|
747
|
+
class DatetimeExternalTimeZoneDimensionModel(TimeDimensionBaseModel):
|
|
748
|
+
"""Defines a time dimension where timestamps are tz-naive and require localizing to a time zone
|
|
749
|
+
using a time zone column."""
|
|
750
|
+
|
|
751
|
+
time_zone_format: Union[AlignedTimeSingleTimeZone, LocalTimeMultipleTimeZones] = Field(
|
|
752
|
+
title="time_zone_format",
|
|
753
|
+
discriminator="format_type",
|
|
754
|
+
description="Specifies whether timestamps are aligned in absolute time or in local time when adjusted for time zone.",
|
|
755
|
+
)
|
|
756
|
+
time_type: TimeDimensionType = Field(default=TimeDimensionType.DATETIME_EXTERNAL_TZ)
|
|
757
|
+
measurement_type: MeasurementType = Field(
|
|
758
|
+
title="measurement_type",
|
|
759
|
+
default=MeasurementType.TOTAL,
|
|
760
|
+
description="""
|
|
761
|
+
The type of measurement represented by a value associated with a timestamp:
|
|
762
|
+
e.g., mean, total
|
|
763
|
+
""",
|
|
764
|
+
json_schema_extra={
|
|
765
|
+
"options": MeasurementType.format_for_docs(),
|
|
766
|
+
},
|
|
767
|
+
)
|
|
768
|
+
ranges: list[TimeRangeModel] = Field(
|
|
769
|
+
title="time_ranges",
|
|
770
|
+
description="""
|
|
771
|
+
Defines the continuous ranges of time in the data, inclusive of start and end time.
|
|
772
|
+
If the timestamps are tz-naive, they will be localized to the time zones provided in the geography dimension records.
|
|
773
|
+
""",
|
|
774
|
+
)
|
|
775
|
+
time_interval_type: TimeIntervalType = Field(
|
|
776
|
+
title="time_interval",
|
|
777
|
+
description="The range of time that the value associated with a timestamp represents, e.g., period-beginning",
|
|
778
|
+
json_schema_extra={
|
|
779
|
+
"options": TimeIntervalType.format_descriptions_for_docs(),
|
|
780
|
+
},
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
@field_validator("ranges")
|
|
784
|
+
@classmethod
|
|
785
|
+
def check_times(cls, ranges: list[TimeRangeModel]) -> list[TimeRangeModel]:
|
|
786
|
+
return _check_time_ranges(ranges)
|
|
787
|
+
|
|
788
|
+
def is_time_zone_required_in_geography(self) -> bool:
|
|
789
|
+
return True
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
class IndexTimeDimensionModel(TimeDimensionBaseModel):
|
|
793
|
+
"""Defines a time dimension where timestamps are indices and requires converting to datetime."""
|
|
794
|
+
|
|
795
|
+
time_type: TimeDimensionType = Field(default=TimeDimensionType.INDEX)
|
|
796
|
+
measurement_type: MeasurementType = Field(
|
|
797
|
+
title="measurement_type",
|
|
798
|
+
default=MeasurementType.TOTAL,
|
|
799
|
+
description="""
|
|
800
|
+
The type of measurement represented by a value associated with a timestamp:
|
|
801
|
+
e.g., mean, total
|
|
802
|
+
""",
|
|
803
|
+
json_schema_extra={
|
|
804
|
+
"options": MeasurementType.format_for_docs(),
|
|
805
|
+
},
|
|
806
|
+
)
|
|
807
|
+
ranges: list[IndexRangeModel] = Field(
|
|
808
|
+
title="ranges",
|
|
809
|
+
description="Defines the continuous ranges of indices of the data, inclusive of start and end index.",
|
|
810
|
+
)
|
|
811
|
+
time_interval_type: TimeIntervalType = Field(
|
|
812
|
+
title="time_interval",
|
|
813
|
+
description="The range of time that the value associated with a timestamp represents, e.g., period-beginning",
|
|
814
|
+
json_schema_extra={
|
|
815
|
+
"options": TimeIntervalType.format_descriptions_for_docs(),
|
|
816
|
+
},
|
|
817
|
+
)
|
|
818
|
+
|
|
819
|
+
@model_validator(mode="before")
|
|
820
|
+
@classmethod
|
|
821
|
+
def handle_legacy_fields(cls, values):
|
|
822
|
+
if "starting_timestamps" in values:
|
|
823
|
+
logger.warning(
|
|
824
|
+
"Moving legacy starting_timestamps field to ranges struct within the index time config."
|
|
825
|
+
)
|
|
826
|
+
assert len(values.get("starting_timestamps", [])) == len(values.get("ranges", []))
|
|
827
|
+
for trange, st in zip(values.get("ranges", []), values.get("starting_timestamps", [])):
|
|
828
|
+
trange["starting_timestamp"] = st
|
|
829
|
+
values.pop("starting_timestamps")
|
|
830
|
+
|
|
831
|
+
if "str_format" in values:
|
|
832
|
+
logger.warning(
|
|
833
|
+
"Moving legacy str_format field to ranges struct within the index time config."
|
|
834
|
+
)
|
|
835
|
+
str_format = values.pop("str_format")
|
|
836
|
+
for trange in values.get("ranges", []):
|
|
837
|
+
trange["str_format"] = str_format
|
|
838
|
+
|
|
839
|
+
if "frequency" in values:
|
|
840
|
+
logger.warning(
|
|
841
|
+
"Moving legacy frequency field to ranges struct within the index time config."
|
|
842
|
+
)
|
|
843
|
+
frequency = values.pop("frequency")
|
|
844
|
+
for trange in values.get("ranges", []):
|
|
845
|
+
trange["frequency"] = frequency
|
|
846
|
+
|
|
847
|
+
return values
|
|
848
|
+
|
|
849
|
+
@field_validator("ranges")
|
|
850
|
+
@classmethod
|
|
851
|
+
def check_indices(cls, ranges: list[IndexRangeModel]) -> list[IndexRangeModel]:
|
|
852
|
+
return _check_index_ranges(ranges)
|
|
853
|
+
|
|
854
|
+
def is_time_zone_required_in_geography(self) -> bool:
|
|
855
|
+
return True
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
class NoOpTimeDimensionModel(TimeDimensionBaseModel):
|
|
859
|
+
"""Defines a NoOp time dimension."""
|
|
860
|
+
|
|
861
|
+
time_type: TimeDimensionType = TimeDimensionType.NOOP
|
|
862
|
+
|
|
863
|
+
def is_time_zone_required_in_geography(self) -> bool:
|
|
864
|
+
return False
|
|
865
|
+
|
|
866
|
+
|
|
867
|
+
class DimensionReferenceModel(DSGBaseModel):
|
|
868
|
+
"""Reference to a dimension stored in the registry"""
|
|
869
|
+
|
|
870
|
+
dimension_type: DimensionType = Field(
|
|
871
|
+
title="dimension_type",
|
|
872
|
+
alias="type",
|
|
873
|
+
description="Type of the dimension",
|
|
874
|
+
json_schema_extra={
|
|
875
|
+
"options": DimensionType.format_for_docs(),
|
|
876
|
+
},
|
|
877
|
+
)
|
|
878
|
+
dimension_id: str = Field(
|
|
879
|
+
title="dimension_id",
|
|
880
|
+
description="Unique ID of the dimension in the registry. "
|
|
881
|
+
"The dimension ID is generated by dsgrid when a dimension is registered. "
|
|
882
|
+
"Only alphanumerics and dashes are supported.",
|
|
883
|
+
)
|
|
884
|
+
version: str = Field(
|
|
885
|
+
title="version",
|
|
886
|
+
# TODO: add notes about warnings for outdated versions DSGRID-189 & DSGRID-148
|
|
887
|
+
description="Version of the dimension. "
|
|
888
|
+
"The version string must be in semver format (e.g., '1.0.0') and it must be "
|
|
889
|
+
" a valid/existing version in the registry.",
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
|
|
893
|
+
def handle_dimension_union(values):
|
|
894
|
+
values = copy.deepcopy(values)
|
|
895
|
+
for i, value in enumerate(values):
|
|
896
|
+
if isinstance(value, DimensionBaseModel):
|
|
897
|
+
continue
|
|
898
|
+
|
|
899
|
+
dim_type = value.get("type")
|
|
900
|
+
if dim_type is None:
|
|
901
|
+
dim_type = value["dimension_type"]
|
|
902
|
+
# NOTE: Errors inside DimensionModel or DateTimeDimensionModel will be duplicated by Pydantic
|
|
903
|
+
if dim_type == DimensionType.TIME.value:
|
|
904
|
+
# TODO add support for DatetimeExternalTimeZoneDimensionModel
|
|
905
|
+
if value["time_type"] == TimeDimensionType.DATETIME.value:
|
|
906
|
+
values[i] = DateTimeDimensionModel(**value)
|
|
907
|
+
elif value["time_type"] == TimeDimensionType.ANNUAL.value:
|
|
908
|
+
values[i] = AnnualTimeDimensionModel(**value)
|
|
909
|
+
elif value["time_type"] == TimeDimensionType.REPRESENTATIVE_PERIOD.value:
|
|
910
|
+
values[i] = RepresentativePeriodTimeDimensionModel(**value)
|
|
911
|
+
elif value["time_type"] == TimeDimensionType.INDEX.value:
|
|
912
|
+
values[i] = IndexTimeDimensionModel(**value)
|
|
913
|
+
elif value["time_type"] == TimeDimensionType.NOOP.value:
|
|
914
|
+
values[i] = NoOpTimeDimensionModel(**value)
|
|
915
|
+
else:
|
|
916
|
+
options = [x.value for x in TimeDimensionType]
|
|
917
|
+
msg = f"{value['time_type']} not supported, valid options: {options}"
|
|
918
|
+
raise ValueError(msg)
|
|
919
|
+
else:
|
|
920
|
+
values[i] = DimensionModel(**value)
|
|
921
|
+
return values
|
|
922
|
+
|
|
923
|
+
|
|
924
|
+
DimensionsListModel = Annotated[
|
|
925
|
+
list[
|
|
926
|
+
Union[
|
|
927
|
+
DimensionModel,
|
|
928
|
+
DateTimeDimensionModel,
|
|
929
|
+
AnnualTimeDimensionModel,
|
|
930
|
+
RepresentativePeriodTimeDimensionModel,
|
|
931
|
+
DatetimeExternalTimeZoneDimensionModel,
|
|
932
|
+
IndexTimeDimensionModel,
|
|
933
|
+
NoOpTimeDimensionModel,
|
|
934
|
+
]
|
|
935
|
+
],
|
|
936
|
+
BeforeValidator(handle_dimension_union),
|
|
937
|
+
]
|
|
938
|
+
|
|
939
|
+
|
|
940
|
+
def _check_time_ranges(ranges: list[TimeRangeModel]) -> list[TimeRangeModel]:
|
|
941
|
+
for trange in ranges:
|
|
942
|
+
assert isinstance(trange.frequency, timedelta)
|
|
943
|
+
if trange.frequency in [timedelta(days=365), timedelta(days=366)]:
|
|
944
|
+
msg = (
|
|
945
|
+
f"{trange.frequency=}, datetime config does not allow 365 or 366 days frequency, "
|
|
946
|
+
"use class=AnnualTime, time_type=annual to specify a year series."
|
|
947
|
+
)
|
|
948
|
+
raise ValueError(msg)
|
|
949
|
+
|
|
950
|
+
# Make sure start and end time parse.
|
|
951
|
+
start = datetime.strptime(trange.start, trange.str_format)
|
|
952
|
+
end = datetime.strptime(trange.end, trange.str_format)
|
|
953
|
+
# Make sure start and end is tz-naive.
|
|
954
|
+
if start.tzinfo is not None or end.tzinfo is not None:
|
|
955
|
+
msg = (
|
|
956
|
+
f"datetime range {trange} start and end need to be tz-naive. "
|
|
957
|
+
"Pass in the time zone info via the time_zone_format parameter"
|
|
958
|
+
)
|
|
959
|
+
raise ValueError(msg)
|
|
960
|
+
if end < start:
|
|
961
|
+
msg = f"datetime range {trange} end must not be less than start."
|
|
962
|
+
raise ValueError(msg)
|
|
963
|
+
if (end - start) % trange.frequency != timedelta(0):
|
|
964
|
+
msg = f"datetime range {trange} is inconsistent with {trange.frequency}"
|
|
965
|
+
raise ValueError(msg)
|
|
966
|
+
|
|
967
|
+
return ranges
|
|
968
|
+
|
|
969
|
+
|
|
970
|
+
def _check_annual_ranges(ranges: list[AnnualRangeModel]) -> list[AnnualRangeModel]:
|
|
971
|
+
for trange in ranges:
|
|
972
|
+
# Make sure start and end time parse.
|
|
973
|
+
start = datetime.strptime(trange.start, trange.str_format)
|
|
974
|
+
end = datetime.strptime(trange.end, trange.str_format)
|
|
975
|
+
freq = trange.frequency
|
|
976
|
+
if end < start:
|
|
977
|
+
msg = f"annual time range {trange} end must not be less than start."
|
|
978
|
+
raise ValueError(msg)
|
|
979
|
+
|
|
980
|
+
assert isinstance(freq, int)
|
|
981
|
+
if (end.year - start.year) % freq != 0:
|
|
982
|
+
msg = f"annual time range start and end are inconsistent with frequency: \n{trange}"
|
|
983
|
+
raise ValueError(msg)
|
|
984
|
+
return ranges
|
|
985
|
+
|
|
986
|
+
|
|
987
|
+
def _check_index_ranges(ranges: list[IndexRangeModel]):
|
|
988
|
+
for trange in ranges:
|
|
989
|
+
if trange.end < trange.start:
|
|
990
|
+
msg = f"index range {trange} end must not be less than start."
|
|
991
|
+
raise ValueError(msg)
|
|
992
|
+
|
|
993
|
+
return ranges
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
class DimensionCommonModel(DSGBaseModel):
|
|
997
|
+
"""Common attributes for all dimensions"""
|
|
998
|
+
|
|
999
|
+
name: str
|
|
1000
|
+
dimension_type: DimensionType
|
|
1001
|
+
dimension_id: str
|
|
1002
|
+
class_name: str
|
|
1003
|
+
description: str
|
|
1004
|
+
|
|
1005
|
+
|
|
1006
|
+
class ProjectDimensionModel(DimensionCommonModel):
|
|
1007
|
+
"""Common attributes for all dimensions that are assigned to a project"""
|
|
1008
|
+
|
|
1009
|
+
category: DimensionCategory
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
def create_dimension_common_model(model) -> DimensionCommonModel:
|
|
1013
|
+
"""Constructs an instance of DimensionBaseModel from subclasses in order to give the API
|
|
1014
|
+
one common model for all dimensions. Avoids the complexity of dealing with
|
|
1015
|
+
DimensionBaseModel validators.
|
|
1016
|
+
"""
|
|
1017
|
+
fields = set(DimensionCommonModel.model_fields)
|
|
1018
|
+
data = {x: getattr(model, x) for x in type(model).model_fields if x in fields}
|
|
1019
|
+
return DimensionCommonModel(**data)
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
def create_project_dimension_model(model, category: DimensionCategory) -> ProjectDimensionModel:
|
|
1023
|
+
data = create_dimension_common_model(model).model_dump()
|
|
1024
|
+
data["category"] = category.value
|
|
1025
|
+
return ProjectDimensionModel(**data)
|