dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,1025 @@
1
+ import abc
2
+ import csv
3
+ import importlib
4
+ import logging
5
+ import os
6
+ from datetime import datetime, timedelta
7
+ from typing import Any, Union, Literal
8
+ import copy
9
+
10
+ from pydantic import field_serializer, field_validator, model_validator, Field, ValidationInfo
11
+ from pydantic.functional_validators import BeforeValidator
12
+ from typing_extensions import Annotated
13
+
14
+ from dsgrid.data_models import DSGBaseDatabaseModel, DSGBaseModel
15
+ from dsgrid.dimension.base_models import DimensionType, DimensionCategory
16
+ from dsgrid.dimension.time import (
17
+ TimeIntervalType,
18
+ MeasurementType,
19
+ TimeDimensionType,
20
+ RepresentativePeriodFormat,
21
+ TimeZoneFormat,
22
+ )
23
+ from dsgrid.time.types import DatetimeTimestampType
24
+ from dsgrid.registry.common import REGEX_VALID_REGISTRY_NAME
25
+ from dsgrid.utils.files import compute_file_hash
26
+ from dsgrid.utils.utilities import convert_record_dicts_to_classes
27
+
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ class DimensionBaseModel(DSGBaseDatabaseModel):
33
+ """Common attributes for all dimensions"""
34
+
35
+ name: str = Field(
36
+ title="name",
37
+ description="Dimension name",
38
+ )
39
+ dimension_type: DimensionType = Field(
40
+ title="dimension_type",
41
+ alias="type",
42
+ description="Type of the dimension",
43
+ json_schema_extra={
44
+ "options": DimensionType.format_for_docs(),
45
+ },
46
+ )
47
+ dimension_id: str | None = Field(
48
+ default=None,
49
+ title="dimension_id",
50
+ description="Unique identifier, generated by dsgrid",
51
+ json_schema_extra={
52
+ "dsg_internal": True,
53
+ "updateable": False,
54
+ },
55
+ )
56
+ module: str = Field(
57
+ title="module",
58
+ description="Python module with the dimension class",
59
+ default="dsgrid.dimension.standard",
60
+ )
61
+ class_name: str = Field(
62
+ title="class_name",
63
+ description="Dimension record model class name. "
64
+ "The dimension class defines the expected and allowable fields (and their data types)"
65
+ " for the dimension records file."
66
+ "All dimension records must have a 'id' and 'name' field."
67
+ "Some dimension classes support additional fields that can be used for mapping,"
68
+ " querying, display, etc."
69
+ "dsgrid in online-mode only supports dimension classes defined in the"
70
+ " :mod:`dsgrid.dimension.standard` module. If dsgrid does not currently support a"
71
+ " dimension class that you require, please contact the dsgrid-coordination team to"
72
+ " request a new class feature",
73
+ alias="class",
74
+ )
75
+ cls: Any = Field(
76
+ default=None,
77
+ title="cls",
78
+ description="Dimension record model class",
79
+ alias="dimension_class",
80
+ json_schema_extra={
81
+ "dsgrid_internal": True,
82
+ },
83
+ )
84
+ description: str | None = Field(
85
+ default=None,
86
+ title="description",
87
+ description="A description of the dimension records that is helpful, memorable, and "
88
+ "identifiable",
89
+ )
90
+ id: int | None = Field(
91
+ default=None,
92
+ description="Registry database ID",
93
+ json_schema_extra={
94
+ "dsgrid_internal": True,
95
+ },
96
+ )
97
+
98
+ @field_validator("name")
99
+ @classmethod
100
+ def check_name(cls, name: str) -> str:
101
+ if REGEX_VALID_REGISTRY_NAME.search(name) is None:
102
+ msg = f"dimension name={name} does not meet the requirements"
103
+ raise ValueError(msg)
104
+ return name
105
+
106
+ @field_validator("module")
107
+ @classmethod
108
+ def check_module(cls, module) -> "DimensionBaseModel":
109
+ if not module.startswith("dsgrid"):
110
+ msg = "Only dsgrid modules are supported as a dimension module."
111
+ raise ValueError(msg)
112
+ return module
113
+
114
+ @field_validator("class_name")
115
+ @classmethod
116
+ def get_dimension_class_name(cls, class_name, info: ValidationInfo):
117
+ """Set class_name based on inputs."""
118
+ if "module" not in info.data:
119
+ return class_name
120
+
121
+ mod = importlib.import_module(info.data["module"])
122
+ if not hasattr(mod, class_name):
123
+ if class_name is None:
124
+ msg = (
125
+ f'There is no class "{class_name}" in module: {mod}.'
126
+ "\nIf you are using a unique dimension name, you must "
127
+ "specify the dimension class."
128
+ )
129
+ else:
130
+ msg = f"dimension class {class_name} not in {mod}"
131
+ raise ValueError(msg)
132
+
133
+ return class_name
134
+
135
+ @field_validator("cls")
136
+ @classmethod
137
+ def get_dimension_class(cls, dim_class, info: ValidationInfo):
138
+ if "module" not in info.data or "class_name" not in info.data:
139
+ return dim_class
140
+
141
+ if dim_class is not None:
142
+ msg = f"cls={dim_class} should not be set"
143
+ raise ValueError(msg)
144
+
145
+ return getattr(
146
+ importlib.import_module(info.data["module"]),
147
+ info.data["class_name"],
148
+ )
149
+
150
+ @property
151
+ def label(self) -> str:
152
+ """Return a label for the dimension to be used in user messages."""
153
+ return f"{self.dimension_type} {self.name}"
154
+
155
+
156
+ class DimensionModel(DimensionBaseModel):
157
+ """Defines a non-time dimension"""
158
+
159
+ filename: str | None = Field(
160
+ title="filename",
161
+ alias="file",
162
+ default=None,
163
+ description="Filename containing dimension records. Only assigned for user input and "
164
+ "output purposes. The registry database stores records in the dimension JSON document.",
165
+ )
166
+ file_hash: str | None = Field(
167
+ title="file_hash",
168
+ description="Hash of the contents of the file",
169
+ json_schema_extra={
170
+ "dsgrid_internal": True,
171
+ },
172
+ default=None,
173
+ )
174
+ records: list = Field(
175
+ title="records",
176
+ description="Dimension records that can either be loaded from filename at "
177
+ "runtime or provided directly. Example of records provided directly:\n"
178
+ "records: [\n"
179
+ " {id: 'scenario_1', name: 'Scenario 1'},\n"
180
+ " {id: 'scenario_2', name: 'Scenario 2'},\n"
181
+ "],",
182
+ default=[],
183
+ )
184
+
185
+ @field_validator("filename")
186
+ @classmethod
187
+ def check_file(cls, filename: str) -> str:
188
+ """Validate that dimension file exists and has no errors"""
189
+ if filename is not None:
190
+ if not os.path.isfile(filename):
191
+ msg = f"file {filename} does not exist"
192
+ raise ValueError(msg)
193
+ if filename.startswith("s3://"):
194
+ msg = "records must exist in the local filesystem, not on S3"
195
+ raise ValueError(msg)
196
+ if not filename.endswith(".csv"):
197
+ msg = f"only CSV is supported: {filename}"
198
+ raise ValueError(msg)
199
+
200
+ return filename
201
+
202
+ @field_validator("file_hash")
203
+ @classmethod
204
+ def compute_file_hash(cls, file_hash: str, info: ValidationInfo) -> str:
205
+ if info.data.get("filename") is None:
206
+ return file_hash
207
+
208
+ if file_hash is None:
209
+ file_hash = compute_file_hash(info.data["filename"])
210
+ return file_hash
211
+
212
+ @field_validator("records")
213
+ @classmethod
214
+ def add_records(
215
+ cls, records: list[dict[str, Any]], info: ValidationInfo
216
+ ) -> list[dict[str, Any]]:
217
+ """Add records from the file."""
218
+ dim_class = info.data.get("cls")
219
+ if "filename" not in info.data or dim_class is None:
220
+ return records
221
+
222
+ if records:
223
+ if isinstance(records[0], dict):
224
+ records = convert_record_dicts_to_classes(
225
+ records, dim_class, check_duplicates=["id"]
226
+ )
227
+ return records
228
+
229
+ with open(info.data["filename"], encoding="utf-8-sig") as f_in:
230
+ records = convert_record_dicts_to_classes(
231
+ csv.DictReader(f_in), dim_class, check_duplicates=["id"]
232
+ )
233
+ return records
234
+
235
+ @field_serializer("cls", "filename")
236
+ def serialize_cls(self, val: str, _) -> None:
237
+ return None
238
+
239
+
240
+ class TimeFormatDateTimeTZModel(DSGBaseModel):
241
+ """Format of timestamps in a dataset is timezone-aware datetime."""
242
+
243
+ dtype: Literal["TIMESTAMP_TZ"] = "TIMESTAMP_TZ"
244
+ time_column: str = Field(
245
+ title="time_column",
246
+ description="Name of the timestamp column in the dataset.",
247
+ default=next(iter(DatetimeTimestampType._fields)),
248
+ )
249
+
250
+ def get_time_columns(self) -> list[str]:
251
+ return [self.time_column]
252
+
253
+
254
+ class TimeFormatDateTimeNTZModel(DSGBaseModel):
255
+ """Format of timestamps in a dataset is timezone-naive datetime,
256
+ requiring localization to time zones."""
257
+
258
+ dtype: Literal["TIMESTAMP_NTZ"] = "TIMESTAMP_NTZ"
259
+ time_column: str = Field(
260
+ title="time_column",
261
+ description="Name of the timestamp column in the dataset.",
262
+ default=next(iter(DatetimeTimestampType._fields)),
263
+ )
264
+
265
+ def get_time_columns(self) -> list[str]:
266
+ return [self.time_column]
267
+
268
+
269
+ class TimeFormatInPartsModel(DSGBaseModel):
270
+ """Format of timestamps in a dataset is in parts, e.g., month-day-hour format,
271
+ requiring conversion to datetime."""
272
+
273
+ dtype: Literal["time_format_in_parts"] = "time_format_in_parts"
274
+ # TODO: we may allow more columns to be None
275
+ year_column: str = Field(
276
+ title="year_column",
277
+ description="Name of the year column in the dataset.",
278
+ )
279
+ month_column: str = Field(
280
+ title="month_column",
281
+ description="Name of the month column in the dataset. Value is the month in a year (1 - 12)",
282
+ )
283
+ day_column: str = Field(
284
+ title="day_column",
285
+ description="Name of the day column in the dataset. Value is the day in a month (1 - 31).",
286
+ )
287
+ hour_column: str | None = Field(
288
+ title="hour_column",
289
+ description="Name of the hour column in the dataset. Value is the hour in a day (0 - 23). "
290
+ "If None, the hour will be set to 0 for all rows.",
291
+ default=None,
292
+ )
293
+ time_zone: str | None = Field(
294
+ default=None,
295
+ title="time_zone",
296
+ description="IANA time zone of the timestamps. Use None for time zone-naive timestamps.",
297
+ )
298
+
299
+ def get_time_columns(self) -> list[str]:
300
+ cols = [self.year_column, self.month_column, self.day_column, self.hour_column]
301
+ return [col for col in cols if col is not None]
302
+
303
+
304
+ DateTimeFormat = Annotated[
305
+ TimeFormatDateTimeTZModel | TimeFormatDateTimeNTZModel | TimeFormatInPartsModel,
306
+ Field(discriminator="dtype"),
307
+ ]
308
+
309
+
310
+ class TimeRangeModel(DSGBaseModel):
311
+ """Defines a continuous range of time."""
312
+
313
+ # This uses str instead of datetime because this object doesn't have the ability
314
+ # to serialize/deserialize by itself.
315
+ # We use the DatetimeRange object during processing.
316
+ start: str = Field(
317
+ title="start",
318
+ description="First timestamp in the data",
319
+ )
320
+ end: str = Field(
321
+ title="end",
322
+ description="Last timestamp in the data (inclusive)",
323
+ )
324
+ str_format: str = Field(
325
+ title="str_format",
326
+ default="%Y-%m-%d %H:%M:%S",
327
+ description="Timestamp string format (for parsing the time ranges). "
328
+ "The string format is used to parse the timestamps provided in the time ranges."
329
+ "Cheatsheet reference: `<https://strftime.org/>`_.",
330
+ )
331
+ frequency: timedelta = Field(
332
+ title="frequency",
333
+ default=timedelta(hours=1),
334
+ description="Resolution of the timestamps",
335
+ )
336
+
337
+
338
+ class AnnualRangeModel(DSGBaseModel):
339
+ """Defines a continuous range of annual time."""
340
+
341
+ start: str = Field(
342
+ title="start",
343
+ description="First year in the data",
344
+ )
345
+ end: str = Field(
346
+ title="end",
347
+ description="Last year in the data (inclusive)",
348
+ )
349
+ str_format: str = Field(
350
+ title="str_format",
351
+ default="%Y",
352
+ description="Timestamp string format. "
353
+ "The string format is used to parse the timestamps provided in the time ranges. "
354
+ "Cheatsheet reference: `<https://strftime.org/>`_.",
355
+ )
356
+ frequency: int = Field(
357
+ title="frequency",
358
+ default=1,
359
+ description="Resolution of the annual time in number of years",
360
+ )
361
+
362
+
363
+ class MonthRangeModel(DSGBaseModel):
364
+ """Defines a continuous range of time."""
365
+
366
+ # This uses str instead of datetime because this object doesn't have the ability
367
+ # to serialize/deserialize by itself.
368
+ # We use the DatetimeRange object during processing.
369
+ start: int = Field(
370
+ title="start",
371
+ description="First month in the data (January is 1, December is 12)",
372
+ )
373
+ end: int = Field(
374
+ title="end",
375
+ description="Last month in the data (inclusive)",
376
+ )
377
+
378
+
379
+ class IndexRangeModel(DSGBaseModel):
380
+ """Defines a continuous range of indices."""
381
+
382
+ start: int = Field(
383
+ title="start",
384
+ description="First of indices",
385
+ )
386
+ end: int = Field(
387
+ title="end",
388
+ description="Last of indices (inclusive)",
389
+ )
390
+ starting_timestamp: str = Field(
391
+ title="starting timestamp",
392
+ description="Timestamp the start index corresponds to.",
393
+ )
394
+ str_format: str = Field(
395
+ title="str_format",
396
+ default="%Y-%m-%d %H:%M:%S",
397
+ description="Timestamp string format. "
398
+ "The string format is used to parse the starting timestamp provided. "
399
+ "Cheatsheet reference: `<https://strftime.org/>`_.",
400
+ )
401
+ frequency: timedelta = Field(
402
+ title="frequency",
403
+ default=timedelta(hours=1),
404
+ description="Resolution of the timestamps for which the index range represents.",
405
+ )
406
+
407
+
408
+ class TimeDimensionBaseModel(DimensionBaseModel, abc.ABC):
409
+ """Defines a base model common to all time dimensions."""
410
+
411
+ time_type: TimeDimensionType = Field(
412
+ title="time_type",
413
+ default=TimeDimensionType.DATETIME,
414
+ description="Type of time dimension",
415
+ json_schema_extra={
416
+ "options": TimeDimensionType.format_for_docs(),
417
+ },
418
+ )
419
+
420
+ @field_serializer("cls")
421
+ def serialize_cls(self, val, _):
422
+ return None
423
+
424
+ @abc.abstractmethod
425
+ def is_time_zone_required_in_geography(self):
426
+ """Returns True if the geography dimension records must contain a time_zone column."""
427
+
428
+
429
+ class AlignedTimeSingleTimeZone(DSGBaseModel):
430
+ """For each geography, data has the same set of timestamps in absolute time.
431
+ Timestamps in the data must be tz-aware.
432
+
433
+ E.g., data in CA and NY both start in 2018-01-01 00:00 EST.
434
+ """
435
+
436
+ format_type: Literal[
437
+ TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME
438
+ ] = TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME
439
+ time_zone: str = Field(
440
+ title="time_zone",
441
+ description="IANA time zone of data",
442
+ )
443
+
444
+ @model_validator(mode="before")
445
+ @classmethod
446
+ def handle_legacy_fields(cls, values):
447
+ if values.get("format_type") == "aligned":
448
+ logger.warning(
449
+ "Renaming legacy format_type 'aligned' to 'aligned_in_absolute_time' within the datetime config time_zone_format parameter."
450
+ )
451
+ values["format_type"] = TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value
452
+
453
+ if "timezone" in values:
454
+ logger.warning(
455
+ "Renaming legacy timezone field to time_zone within the aligned_in_absolute_time single time zone time_zone_format."
456
+ )
457
+ values["time_zone"] = values.pop("timezone")
458
+ return values
459
+
460
+
461
+ class LocalTimeMultipleTimeZones(DSGBaseModel):
462
+ """For each geography, data has the same set of timestamps when interpreted as local clock time by adjusting
463
+ for the time zone of each geography.
464
+ Timestamps in the data must be tz-aware.
465
+
466
+ E.g., data in CA may start in 2018-01-01 00:00 PST while data in NY may start in 2018-01-01 00:00 EST.
467
+ They are aligned in clock time but not in absolute time.
468
+
469
+ """
470
+
471
+ format_type: Literal[
472
+ TimeZoneFormat.ALIGNED_IN_CLOCK_TIME
473
+ ] = TimeZoneFormat.ALIGNED_IN_CLOCK_TIME
474
+ time_zones: list[str] = Field(
475
+ title="time_zones",
476
+ description="List of unique IANA time zones in the dataset",
477
+ )
478
+
479
+
480
+ class DateTimeDimensionModel(TimeDimensionBaseModel):
481
+ """Defines a time dimension where timestamps translate to datetime objects."""
482
+
483
+ column_format: DateTimeFormat = Field(
484
+ default=TimeFormatDateTimeTZModel(),
485
+ title="time_format",
486
+ description="Specifies the format of the timestamps in the dataset.",
487
+ )
488
+ time_zone_format: Union[AlignedTimeSingleTimeZone, LocalTimeMultipleTimeZones] = Field(
489
+ title="time_zone_format",
490
+ discriminator="format_type",
491
+ description="Specifies whether timestamps are aligned in absolute time or in local time when adjusted for time zone.",
492
+ )
493
+
494
+ measurement_type: MeasurementType = Field(
495
+ title="measurement_type",
496
+ default=MeasurementType.TOTAL,
497
+ description="""
498
+ The type of measurement represented by a value associated with a timestamp:
499
+ mean, min, max, measured, total
500
+ """,
501
+ json_schema_extra={
502
+ "options": MeasurementType.format_for_docs(),
503
+ },
504
+ )
505
+
506
+ ranges: list[TimeRangeModel] = Field(
507
+ title="time_ranges",
508
+ description="Defines the continuous ranges of datetime in the data, inclusive of start and end time.",
509
+ )
510
+ time_interval_type: TimeIntervalType = Field(
511
+ title="time_interval",
512
+ description="The range of time that the value associated with a timestamp represents, e.g., period-beginning",
513
+ json_schema_extra={
514
+ "options": TimeIntervalType.format_descriptions_for_docs(),
515
+ },
516
+ )
517
+ time_column: str = Field(
518
+ title="time_column",
519
+ description="Name of time column in the dataframe. It should be updated during the query process to reflect "
520
+ "any changes to the dataframe time column.",
521
+ default=next(iter(DatetimeTimestampType._fields)),
522
+ )
523
+ localize_to_time_zone: bool = Field(
524
+ title="localize_to_time_zone",
525
+ default=True,
526
+ description="Whether to localize timestamps to time zone(s). If True, timestamps in the dataframe must be tz-naive.",
527
+ )
528
+
529
+ @model_validator(mode="before")
530
+ @classmethod
531
+ def handle_legacy_fields(cls, values):
532
+ if "leap_day_adjustment" in values:
533
+ if values["leap_day_adjustment"] != "none":
534
+ msg = f"Unknown data_schema format: {values=}"
535
+ raise ValueError(msg)
536
+ logger.warning(
537
+ "Dropping deprecated leap_day_adjustment field from the datetime config."
538
+ )
539
+ values.pop("leap_day_adjustment")
540
+
541
+ if "datetime_format" in values:
542
+ logger.warning(
543
+ "Moving legacy datetime_format field to new time_zone_format struct within the datetime config."
544
+ )
545
+ datetime_format = values.pop("datetime_format")
546
+ values["time_zone_format"] = datetime_format
547
+
548
+ if "timezone" in values:
549
+ logger.warning(
550
+ "Renaming legacy timezone field to time_zone and moving it to new time_zone_format struct within the datetime config."
551
+ )
552
+ time_zone = values.pop("timezone")
553
+ if "time_zone_format" in values:
554
+ if isinstance(values["time_zone_format"], dict):
555
+ assert (
556
+ values["time_zone_format"].get("format_type")
557
+ == TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value
558
+ )
559
+ values["time_zone_format"]["time_zone"] = time_zone
560
+ elif isinstance(values["time_zone_format"], AlignedTimeSingleTimeZone):
561
+ assert (
562
+ values["time_zone_format"].format_type
563
+ == TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME
564
+ )
565
+ values["time_zone_format"].time_zone = time_zone
566
+ elif isinstance(values["time_zone_format"], LocalTimeMultipleTimeZones):
567
+ msg = "Cannot set single time_zone for LocalTimeMultipleTimeZones time_zone_format."
568
+ raise ValueError(msg)
569
+ else:
570
+ msg = f"Unexpected time_zone_format type: {values['time_zone_format']}"
571
+ raise ValueError(msg)
572
+ else:
573
+ values["time_zone_format"] = {
574
+ "format_type": TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value,
575
+ "time_zone": time_zone,
576
+ }
577
+
578
+ if "time_zone_format" in values:
579
+ if isinstance(values["time_zone_format"], dict):
580
+ if values["time_zone_format"].get("format_type") == "aligned":
581
+ logger.warning(
582
+ "Renaming legacy format_type 'aligned' to 'aligned_in_absolute_time' within the datetime config."
583
+ )
584
+ values["time_zone_format"][
585
+ "format_type"
586
+ ] = TimeZoneFormat.ALIGNED_IN_ABSOLUTE_TIME.value
587
+ elif isinstance(values["time_zone_format"], AlignedTimeSingleTimeZone):
588
+ # already correct
589
+ pass
590
+ elif isinstance(values["time_zone_format"], LocalTimeMultipleTimeZones):
591
+ # already correct
592
+ pass
593
+ else:
594
+ msg = f"Unexpected time_zone_format type: {values['time_zone_format']}"
595
+ raise ValueError(msg)
596
+
597
+ if "str_format" in values:
598
+ logger.warning(
599
+ "Moving legacy str_format field to ranges struct within the datetime config."
600
+ )
601
+ str_format = values.pop("str_format")
602
+ for trange in values.get("ranges", []):
603
+ if isinstance(trange, TimeRangeModel):
604
+ trange.str_format = str_format
605
+ elif isinstance(trange, dict):
606
+ trange["str_format"] = str_format
607
+ else:
608
+ msg = f"Unexpected ranges type: {type(trange)}"
609
+ raise ValueError(msg)
610
+
611
+ if "frequency" in values:
612
+ logger.warning(
613
+ "Moving legacy frequency field to ranges struct within the datetime config."
614
+ )
615
+ frequency = values.pop("frequency")
616
+ for trange in values.get("ranges", []):
617
+ if isinstance(trange, TimeRangeModel):
618
+ trange.frequency = frequency
619
+ elif isinstance(trange, dict):
620
+ trange["frequency"] = frequency
621
+ else:
622
+ msg = f"Unexpected ranges type: {type(trange)}"
623
+ raise ValueError(msg)
624
+ return values
625
+
626
+ # @model_validator(mode="after")
627
+ # def check_frequency(self) -> "DateTimeDimensionModel":
628
+ # if self.frequency in [timedelta(days=365), timedelta(days=366)]:
629
+ # raise ValueError(
630
+ # f"frequency={self.frequency}, datetime config does not allow 365 or 366 days frequency, "
631
+ # "use class=AnnualTime, time_type=annual to specify a year series."
632
+ # )
633
+ # return self
634
+
635
+ @field_validator("ranges")
636
+ @classmethod
637
+ def check_times(cls, ranges: list[TimeRangeModel]) -> list[TimeRangeModel]:
638
+ return _check_time_ranges(ranges)
639
+
640
+ def is_time_zone_required_in_geography(self) -> bool:
641
+ if self.time_zone_format.format_type == TimeZoneFormat.ALIGNED_IN_CLOCK_TIME:
642
+ return True
643
+ return False
644
+
645
+
646
+ class AnnualTimeDimensionModel(TimeDimensionBaseModel):
647
+ """Defines an annual time dimension where timestamps are years.
648
+ Each value associated with a year represents the MEASUREMENT_TYPE over the entire year.
649
+ i.e., MEASUREMENT_TYPE = total means the value is the total over the year, not over the range frequency.
650
+ """
651
+
652
+ time_type: TimeDimensionType = Field(default=TimeDimensionType.ANNUAL)
653
+ measurement_type: MeasurementType = Field(
654
+ title="measurement_type",
655
+ default=MeasurementType.TOTAL,
656
+ description="""
657
+ The type of measurement represented by a value associated with an annual time:
658
+ e.g., total
659
+ """,
660
+ json_schema_extra={
661
+ "options": MeasurementType.format_for_docs(),
662
+ },
663
+ )
664
+
665
+ ranges: list[AnnualRangeModel] = Field(
666
+ default=[],
667
+ title="ranges",
668
+ description="Defines the contiguous ranges of annual time in the data, inclusive of start and end time.",
669
+ )
670
+
671
+ include_leap_day: bool = Field(
672
+ title="include_leap_day",
673
+ default=False,
674
+ description="Whether annual time includes leap day.",
675
+ )
676
+
677
+ @model_validator(mode="before")
678
+ @classmethod
679
+ def handle_legacy_fields(cls, values):
680
+ if "str_format" in values:
681
+ logger.warning(
682
+ "Moving legacy str_format field to ranges struct within the annual time config."
683
+ )
684
+ str_format = values.pop("str_format")
685
+ for trange in values.get("ranges", []):
686
+ if isinstance(trange, AnnualRangeModel):
687
+ trange.str_format = str_format
688
+ elif isinstance(trange, dict):
689
+ trange["str_format"] = str_format
690
+ else:
691
+ msg = f"Unexpected ranges type: {type(trange)}"
692
+ raise ValueError(msg)
693
+
694
+ return values
695
+
696
+ @field_validator("ranges")
697
+ @classmethod
698
+ def check_times(cls, ranges: list[AnnualRangeModel]) -> list[AnnualRangeModel]:
699
+ return _check_annual_ranges(ranges)
700
+
701
+ @field_validator("measurement_type")
702
+ @classmethod
703
+ def check_measurement_type(cls, measurement_type: MeasurementType) -> MeasurementType:
704
+ # This restriction exists because any other measurement type would require a frequency,
705
+ # and that isn't part of the model definition.
706
+ if measurement_type != MeasurementType.TOTAL:
707
+ msg = f"Annual time currently only supports MeasurementType total: {measurement_type}"
708
+ raise ValueError(msg)
709
+ return measurement_type
710
+
711
+ def is_time_zone_required_in_geography(self) -> bool:
712
+ return False
713
+
714
+
715
+ class RepresentativePeriodTimeDimensionModel(TimeDimensionBaseModel):
716
+ """Defines a representative time dimension."""
717
+
718
+ time_type: TimeDimensionType = Field(default=TimeDimensionType.REPRESENTATIVE_PERIOD)
719
+ measurement_type: MeasurementType = Field(
720
+ title="measurement_type",
721
+ default=MeasurementType.TOTAL,
722
+ description="""
723
+ The type of measurement represented by a value associated with a timestamp:
724
+ e.g., mean, total
725
+ """,
726
+ json_schema_extra={
727
+ "options": MeasurementType.format_for_docs(),
728
+ },
729
+ )
730
+ format: RepresentativePeriodFormat = Field(
731
+ title="format",
732
+ description="Format of the timestamps in the load data",
733
+ )
734
+ ranges: list[MonthRangeModel] = Field(
735
+ title="ranges",
736
+ description="Defines the continuous ranges of datetime in the data, inclusive of start and end time.",
737
+ )
738
+ time_interval_type: TimeIntervalType = Field(
739
+ title="time_interval",
740
+ description="The range of time that the value associated with a timestamp represents",
741
+ )
742
+
743
+ def is_time_zone_required_in_geography(self) -> bool:
744
+ return True
745
+
746
+
747
+ class DatetimeExternalTimeZoneDimensionModel(TimeDimensionBaseModel):
748
+ """Defines a time dimension where timestamps are tz-naive and require localizing to a time zone
749
+ using a time zone column."""
750
+
751
+ time_zone_format: Union[AlignedTimeSingleTimeZone, LocalTimeMultipleTimeZones] = Field(
752
+ title="time_zone_format",
753
+ discriminator="format_type",
754
+ description="Specifies whether timestamps are aligned in absolute time or in local time when adjusted for time zone.",
755
+ )
756
+ time_type: TimeDimensionType = Field(default=TimeDimensionType.DATETIME_EXTERNAL_TZ)
757
+ measurement_type: MeasurementType = Field(
758
+ title="measurement_type",
759
+ default=MeasurementType.TOTAL,
760
+ description="""
761
+ The type of measurement represented by a value associated with a timestamp:
762
+ e.g., mean, total
763
+ """,
764
+ json_schema_extra={
765
+ "options": MeasurementType.format_for_docs(),
766
+ },
767
+ )
768
+ ranges: list[TimeRangeModel] = Field(
769
+ title="time_ranges",
770
+ description="""
771
+ Defines the continuous ranges of time in the data, inclusive of start and end time.
772
+ If the timestamps are tz-naive, they will be localized to the time zones provided in the geography dimension records.
773
+ """,
774
+ )
775
+ time_interval_type: TimeIntervalType = Field(
776
+ title="time_interval",
777
+ description="The range of time that the value associated with a timestamp represents, e.g., period-beginning",
778
+ json_schema_extra={
779
+ "options": TimeIntervalType.format_descriptions_for_docs(),
780
+ },
781
+ )
782
+
783
+ @field_validator("ranges")
784
+ @classmethod
785
+ def check_times(cls, ranges: list[TimeRangeModel]) -> list[TimeRangeModel]:
786
+ return _check_time_ranges(ranges)
787
+
788
+ def is_time_zone_required_in_geography(self) -> bool:
789
+ return True
790
+
791
+
792
+ class IndexTimeDimensionModel(TimeDimensionBaseModel):
793
+ """Defines a time dimension where timestamps are indices and requires converting to datetime."""
794
+
795
+ time_type: TimeDimensionType = Field(default=TimeDimensionType.INDEX)
796
+ measurement_type: MeasurementType = Field(
797
+ title="measurement_type",
798
+ default=MeasurementType.TOTAL,
799
+ description="""
800
+ The type of measurement represented by a value associated with a timestamp:
801
+ e.g., mean, total
802
+ """,
803
+ json_schema_extra={
804
+ "options": MeasurementType.format_for_docs(),
805
+ },
806
+ )
807
+ ranges: list[IndexRangeModel] = Field(
808
+ title="ranges",
809
+ description="Defines the continuous ranges of indices of the data, inclusive of start and end index.",
810
+ )
811
+ time_interval_type: TimeIntervalType = Field(
812
+ title="time_interval",
813
+ description="The range of time that the value associated with a timestamp represents, e.g., period-beginning",
814
+ json_schema_extra={
815
+ "options": TimeIntervalType.format_descriptions_for_docs(),
816
+ },
817
+ )
818
+
819
+ @model_validator(mode="before")
820
+ @classmethod
821
+ def handle_legacy_fields(cls, values):
822
+ if "starting_timestamps" in values:
823
+ logger.warning(
824
+ "Moving legacy starting_timestamps field to ranges struct within the index time config."
825
+ )
826
+ assert len(values.get("starting_timestamps", [])) == len(values.get("ranges", []))
827
+ for trange, st in zip(values.get("ranges", []), values.get("starting_timestamps", [])):
828
+ trange["starting_timestamp"] = st
829
+ values.pop("starting_timestamps")
830
+
831
+ if "str_format" in values:
832
+ logger.warning(
833
+ "Moving legacy str_format field to ranges struct within the index time config."
834
+ )
835
+ str_format = values.pop("str_format")
836
+ for trange in values.get("ranges", []):
837
+ trange["str_format"] = str_format
838
+
839
+ if "frequency" in values:
840
+ logger.warning(
841
+ "Moving legacy frequency field to ranges struct within the index time config."
842
+ )
843
+ frequency = values.pop("frequency")
844
+ for trange in values.get("ranges", []):
845
+ trange["frequency"] = frequency
846
+
847
+ return values
848
+
849
+ @field_validator("ranges")
850
+ @classmethod
851
+ def check_indices(cls, ranges: list[IndexRangeModel]) -> list[IndexRangeModel]:
852
+ return _check_index_ranges(ranges)
853
+
854
+ def is_time_zone_required_in_geography(self) -> bool:
855
+ return True
856
+
857
+
858
+ class NoOpTimeDimensionModel(TimeDimensionBaseModel):
859
+ """Defines a NoOp time dimension."""
860
+
861
+ time_type: TimeDimensionType = TimeDimensionType.NOOP
862
+
863
+ def is_time_zone_required_in_geography(self) -> bool:
864
+ return False
865
+
866
+
867
+ class DimensionReferenceModel(DSGBaseModel):
868
+ """Reference to a dimension stored in the registry"""
869
+
870
+ dimension_type: DimensionType = Field(
871
+ title="dimension_type",
872
+ alias="type",
873
+ description="Type of the dimension",
874
+ json_schema_extra={
875
+ "options": DimensionType.format_for_docs(),
876
+ },
877
+ )
878
+ dimension_id: str = Field(
879
+ title="dimension_id",
880
+ description="Unique ID of the dimension in the registry. "
881
+ "The dimension ID is generated by dsgrid when a dimension is registered. "
882
+ "Only alphanumerics and dashes are supported.",
883
+ )
884
+ version: str = Field(
885
+ title="version",
886
+ # TODO: add notes about warnings for outdated versions DSGRID-189 & DSGRID-148
887
+ description="Version of the dimension. "
888
+ "The version string must be in semver format (e.g., '1.0.0') and it must be "
889
+ " a valid/existing version in the registry.",
890
+ )
891
+
892
+
893
+ def handle_dimension_union(values):
894
+ values = copy.deepcopy(values)
895
+ for i, value in enumerate(values):
896
+ if isinstance(value, DimensionBaseModel):
897
+ continue
898
+
899
+ dim_type = value.get("type")
900
+ if dim_type is None:
901
+ dim_type = value["dimension_type"]
902
+ # NOTE: Errors inside DimensionModel or DateTimeDimensionModel will be duplicated by Pydantic
903
+ if dim_type == DimensionType.TIME.value:
904
+ # TODO add support for DatetimeExternalTimeZoneDimensionModel
905
+ if value["time_type"] == TimeDimensionType.DATETIME.value:
906
+ values[i] = DateTimeDimensionModel(**value)
907
+ elif value["time_type"] == TimeDimensionType.ANNUAL.value:
908
+ values[i] = AnnualTimeDimensionModel(**value)
909
+ elif value["time_type"] == TimeDimensionType.REPRESENTATIVE_PERIOD.value:
910
+ values[i] = RepresentativePeriodTimeDimensionModel(**value)
911
+ elif value["time_type"] == TimeDimensionType.INDEX.value:
912
+ values[i] = IndexTimeDimensionModel(**value)
913
+ elif value["time_type"] == TimeDimensionType.NOOP.value:
914
+ values[i] = NoOpTimeDimensionModel(**value)
915
+ else:
916
+ options = [x.value for x in TimeDimensionType]
917
+ msg = f"{value['time_type']} not supported, valid options: {options}"
918
+ raise ValueError(msg)
919
+ else:
920
+ values[i] = DimensionModel(**value)
921
+ return values
922
+
923
+
924
+ DimensionsListModel = Annotated[
925
+ list[
926
+ Union[
927
+ DimensionModel,
928
+ DateTimeDimensionModel,
929
+ AnnualTimeDimensionModel,
930
+ RepresentativePeriodTimeDimensionModel,
931
+ DatetimeExternalTimeZoneDimensionModel,
932
+ IndexTimeDimensionModel,
933
+ NoOpTimeDimensionModel,
934
+ ]
935
+ ],
936
+ BeforeValidator(handle_dimension_union),
937
+ ]
938
+
939
+
940
+ def _check_time_ranges(ranges: list[TimeRangeModel]) -> list[TimeRangeModel]:
941
+ for trange in ranges:
942
+ assert isinstance(trange.frequency, timedelta)
943
+ if trange.frequency in [timedelta(days=365), timedelta(days=366)]:
944
+ msg = (
945
+ f"{trange.frequency=}, datetime config does not allow 365 or 366 days frequency, "
946
+ "use class=AnnualTime, time_type=annual to specify a year series."
947
+ )
948
+ raise ValueError(msg)
949
+
950
+ # Make sure start and end time parse.
951
+ start = datetime.strptime(trange.start, trange.str_format)
952
+ end = datetime.strptime(trange.end, trange.str_format)
953
+ # Make sure start and end is tz-naive.
954
+ if start.tzinfo is not None or end.tzinfo is not None:
955
+ msg = (
956
+ f"datetime range {trange} start and end need to be tz-naive. "
957
+ "Pass in the time zone info via the time_zone_format parameter"
958
+ )
959
+ raise ValueError(msg)
960
+ if end < start:
961
+ msg = f"datetime range {trange} end must not be less than start."
962
+ raise ValueError(msg)
963
+ if (end - start) % trange.frequency != timedelta(0):
964
+ msg = f"datetime range {trange} is inconsistent with {trange.frequency}"
965
+ raise ValueError(msg)
966
+
967
+ return ranges
968
+
969
+
970
+ def _check_annual_ranges(ranges: list[AnnualRangeModel]) -> list[AnnualRangeModel]:
971
+ for trange in ranges:
972
+ # Make sure start and end time parse.
973
+ start = datetime.strptime(trange.start, trange.str_format)
974
+ end = datetime.strptime(trange.end, trange.str_format)
975
+ freq = trange.frequency
976
+ if end < start:
977
+ msg = f"annual time range {trange} end must not be less than start."
978
+ raise ValueError(msg)
979
+
980
+ assert isinstance(freq, int)
981
+ if (end.year - start.year) % freq != 0:
982
+ msg = f"annual time range start and end are inconsistent with frequency: \n{trange}"
983
+ raise ValueError(msg)
984
+ return ranges
985
+
986
+
987
+ def _check_index_ranges(ranges: list[IndexRangeModel]):
988
+ for trange in ranges:
989
+ if trange.end < trange.start:
990
+ msg = f"index range {trange} end must not be less than start."
991
+ raise ValueError(msg)
992
+
993
+ return ranges
994
+
995
+
996
+ class DimensionCommonModel(DSGBaseModel):
997
+ """Common attributes for all dimensions"""
998
+
999
+ name: str
1000
+ dimension_type: DimensionType
1001
+ dimension_id: str
1002
+ class_name: str
1003
+ description: str
1004
+
1005
+
1006
+ class ProjectDimensionModel(DimensionCommonModel):
1007
+ """Common attributes for all dimensions that are assigned to a project"""
1008
+
1009
+ category: DimensionCategory
1010
+
1011
+
1012
+ def create_dimension_common_model(model) -> DimensionCommonModel:
1013
+ """Constructs an instance of DimensionBaseModel from subclasses in order to give the API
1014
+ one common model for all dimensions. Avoids the complexity of dealing with
1015
+ DimensionBaseModel validators.
1016
+ """
1017
+ fields = set(DimensionCommonModel.model_fields)
1018
+ data = {x: getattr(model, x) for x in type(model).model_fields if x in fields}
1019
+ return DimensionCommonModel(**data)
1020
+
1021
+
1022
+ def create_project_dimension_model(model, category: DimensionCategory) -> ProjectDimensionModel:
1023
+ data = create_dimension_common_model(model).model_dump()
1024
+ data["category"] = category.value
1025
+ return ProjectDimensionModel(**data)