dsgrid-toolkit 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dsgrid-toolkit might be problematic. Click here for more details.
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +420 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +22 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +177 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +142 -0
- dsgrid/cli/dsgrid_admin.py +349 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +711 -0
- dsgrid/cli/registry.py +1773 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +35 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +187 -0
- dsgrid/config/common.py +131 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +684 -0
- dsgrid/config/dataset_schema_handler_factory.py +41 -0
- dsgrid/config/date_time_dimension_config.py +108 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +349 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +775 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/index_time_dimension_config.py +76 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1457 -0
- dsgrid/config/registration_models.py +199 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +200 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +899 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +196 -0
- dsgrid/dataset/dataset_schema_handler_standard.py +303 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +44 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +218 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +213 -0
- dsgrid/dimension/time.py +531 -0
- dsgrid/dimension/time_utils.py +88 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +950 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +384 -0
- dsgrid/query/models.py +726 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +847 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +161 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +69 -0
- dsgrid/registry/dataset_config_generator.py +156 -0
- dsgrid/registry/dataset_registry_manager.py +734 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +185 -0
- dsgrid/registry/filesystem_data_store.py +141 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1616 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +662 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +544 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +545 -0
- dsgrid/spark/types.py +50 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +139 -0
- dsgrid/tests/make_us_data_registry.py +204 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +612 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +64 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +184 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.2.0.dist-info/METADATA +216 -0
- dsgrid_toolkit-0.2.0.dist-info/RECORD +152 -0
- dsgrid_toolkit-0.2.0.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.2.0.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.2.0.dist-info/licenses/LICENSE +29 -0
dsgrid/dimension/time.py
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
"""Dimensions related to time"""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
|
|
5
|
+
import logging
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from enum import Enum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
from dsgrid.data_models import DSGEnum, EnumValue, DSGBaseModel
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class TimeDimensionType(DSGEnum):
|
|
16
|
+
"""Defines the supported time formats in the load data."""
|
|
17
|
+
|
|
18
|
+
DATETIME = "datetime"
|
|
19
|
+
ANNUAL = "annual"
|
|
20
|
+
REPRESENTATIVE_PERIOD = "representative_period"
|
|
21
|
+
INDEX = "index"
|
|
22
|
+
NOOP = "noop"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class DatetimeFormat(str, Enum):
|
|
26
|
+
"""Defines the time format of the datetime config model"""
|
|
27
|
+
|
|
28
|
+
ALIGNED = "aligned"
|
|
29
|
+
LOCAL = "local"
|
|
30
|
+
LOCAL_AS_STRINGS = "local_as_strings"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class RepresentativePeriodFormat(DSGEnum):
|
|
34
|
+
"""Defines the supported formats for representative period data."""
|
|
35
|
+
|
|
36
|
+
# All instances of this Enum must declare frequency.
|
|
37
|
+
# This Enum may be replaced by a generic implementation in order to support a large
|
|
38
|
+
# number of permutations (seasons, weekend day vs week day, sub-hour time, etc).
|
|
39
|
+
|
|
40
|
+
ONE_WEEK_PER_MONTH_BY_HOUR = EnumValue(
|
|
41
|
+
value="one_week_per_month_by_hour",
|
|
42
|
+
frequency=timedelta(hours=1),
|
|
43
|
+
description="load_data columns use 'month', 'day_of_week', 'hour' to specify time",
|
|
44
|
+
)
|
|
45
|
+
ONE_WEEKDAY_DAY_AND_ONE_WEEKEND_DAY_PER_MONTH_BY_HOUR = EnumValue(
|
|
46
|
+
value="one_weekday_day_and_one_weekend_day_per_month_by_hour",
|
|
47
|
+
frequency=timedelta(hours=1),
|
|
48
|
+
description="load_data columns use 'month', 'hour', 'is_weekday' to specify time",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class LeapDayAdjustmentType(DSGEnum):
|
|
53
|
+
"""Leap day adjustment enum types"""
|
|
54
|
+
|
|
55
|
+
DROP_DEC31 = EnumValue(
|
|
56
|
+
value="drop_dec31",
|
|
57
|
+
description="To adjust for leap years, December 31st timestamps and data get dropped.",
|
|
58
|
+
)
|
|
59
|
+
DROP_FEB29 = EnumValue(
|
|
60
|
+
value="drop_feb29",
|
|
61
|
+
description="Feburary 29th timestamps and data are dropped. Currently not yet supported by dsgrid.",
|
|
62
|
+
)
|
|
63
|
+
DROP_JAN1 = EnumValue(
|
|
64
|
+
value="drop_jan1",
|
|
65
|
+
description="To adjust for leap years, January 1st timestamps and data get dropped.",
|
|
66
|
+
)
|
|
67
|
+
NONE = EnumValue(value="none", description="No leap day adjustment made.")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DaylightSavingSpringForwardType(DSGEnum):
|
|
71
|
+
"""Daylight saving spring forward adjustment enum types"""
|
|
72
|
+
|
|
73
|
+
DROP = EnumValue(
|
|
74
|
+
value="drop",
|
|
75
|
+
description="Drop timestamp(s) and associated data for the spring forward hour (2AM in March)",
|
|
76
|
+
)
|
|
77
|
+
NONE = EnumValue(value="none", description="No daylight saving adjustment for data.")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class DaylightSavingFallBackType(DSGEnum):
|
|
81
|
+
"""Daylight saving fall back adjustment enum types"""
|
|
82
|
+
|
|
83
|
+
INTERPOLATE = EnumValue(
|
|
84
|
+
value="interpolate",
|
|
85
|
+
description="Fill data by interpolating between the left and right edges of the dataframe.",
|
|
86
|
+
)
|
|
87
|
+
DUPLICATE = EnumValue(
|
|
88
|
+
value="duplicate",
|
|
89
|
+
description="Fill data by duplicating the fall-back hour (1AM in November)",
|
|
90
|
+
)
|
|
91
|
+
NONE = EnumValue(value="none", description="No daylight saving adjustment for data.")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class TimeIntervalType(DSGEnum):
|
|
95
|
+
"""Time interval enum types"""
|
|
96
|
+
|
|
97
|
+
# TODO: R2PD uses a different set; do we want to align?
|
|
98
|
+
# https://github.com/Smart-DS/R2PD/blob/master/R2PD/tshelpers.py#L15
|
|
99
|
+
|
|
100
|
+
PERIOD_ENDING = EnumValue(
|
|
101
|
+
value="period_ending",
|
|
102
|
+
description="A time interval that is period ending is coded by the end time. E.g., 2pm (with"
|
|
103
|
+
" freq=1h) represents a period of time between 1-2pm.",
|
|
104
|
+
)
|
|
105
|
+
PERIOD_BEGINNING = EnumValue(
|
|
106
|
+
value="period_beginning",
|
|
107
|
+
description="A time interval that is period beginning is coded by the beginning time. E.g.,"
|
|
108
|
+
" 2pm (with freq=01:00:00) represents a period of time between 2-3pm. This is the dsgrid"
|
|
109
|
+
" default.",
|
|
110
|
+
)
|
|
111
|
+
INSTANTANEOUS = EnumValue(
|
|
112
|
+
value="instantaneous",
|
|
113
|
+
description="The time record value represents measured, instantaneous time",
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class MeasurementType(DSGEnum):
|
|
118
|
+
"""Time value measurement enum types"""
|
|
119
|
+
|
|
120
|
+
MEAN = EnumValue(
|
|
121
|
+
value="mean",
|
|
122
|
+
description="Data values represent the average value in a time range",
|
|
123
|
+
)
|
|
124
|
+
MIN = EnumValue(
|
|
125
|
+
value="min",
|
|
126
|
+
description="Data values represent the minimum value in a time range",
|
|
127
|
+
)
|
|
128
|
+
MAX = EnumValue(
|
|
129
|
+
value="max",
|
|
130
|
+
description="Data values represent the maximum value in a time range",
|
|
131
|
+
)
|
|
132
|
+
MEASURED = EnumValue(
|
|
133
|
+
value="measured",
|
|
134
|
+
description="Data values represent the measured value at that reported time",
|
|
135
|
+
)
|
|
136
|
+
TOTAL = EnumValue(
|
|
137
|
+
value="total",
|
|
138
|
+
description="Data values represent the sum of values in a time range",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class TimeZone(DSGEnum):
|
|
143
|
+
"""Time zone enum types
|
|
144
|
+
- tz: zoneinfo.available_timezones()
|
|
145
|
+
- tz_name: spark uses Java timezones: https://jenkov.com/tutorials/java-date-time/java-util-timezone.html
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
UTC = EnumValue(
|
|
149
|
+
value="UTC",
|
|
150
|
+
description="Coordinated Universal Time",
|
|
151
|
+
tz=ZoneInfo("UTC"),
|
|
152
|
+
tz_name="UTC",
|
|
153
|
+
)
|
|
154
|
+
HST = EnumValue(
|
|
155
|
+
value="HawaiiAleutianStandard",
|
|
156
|
+
description="Hawaii Standard Time (UTC=-10). No daylight saving shifts.",
|
|
157
|
+
tz=ZoneInfo("US/Hawaii"),
|
|
158
|
+
tz_name="Etc/GMT+10",
|
|
159
|
+
)
|
|
160
|
+
AST = EnumValue(
|
|
161
|
+
value="AlaskaStandard",
|
|
162
|
+
description="Alaskan Standard Time (UTC=-9). No daylight saving shifts.",
|
|
163
|
+
tz=ZoneInfo("Etc/GMT+9"),
|
|
164
|
+
tz_name="Etc/GMT+9",
|
|
165
|
+
)
|
|
166
|
+
APT = EnumValue(
|
|
167
|
+
value="AlaskaPrevailing",
|
|
168
|
+
description="Alaska Prevailing Time. Commonly called Alaska Local Time. "
|
|
169
|
+
"Includes daylight saving.",
|
|
170
|
+
tz=ZoneInfo("US/Alaska"),
|
|
171
|
+
tz_name="US/Alaska",
|
|
172
|
+
)
|
|
173
|
+
PST = EnumValue(
|
|
174
|
+
value="PacificStandard",
|
|
175
|
+
description="Pacific Standard Time (UTC=-8). No daylight saving shifts.",
|
|
176
|
+
tz=ZoneInfo("Etc/GMT+8"),
|
|
177
|
+
tz_name="Etc/GMT+8",
|
|
178
|
+
)
|
|
179
|
+
PPT = EnumValue(
|
|
180
|
+
value="PacificPrevailing",
|
|
181
|
+
description="Pacific Prevailing Time. Commonly called Pacific Local Time. "
|
|
182
|
+
"Includes daylight saving.",
|
|
183
|
+
tz=ZoneInfo("US/Pacific"),
|
|
184
|
+
tz_name="US/Pacific",
|
|
185
|
+
)
|
|
186
|
+
MST = EnumValue(
|
|
187
|
+
value="MountainStandard",
|
|
188
|
+
description="Mountain Standard Time (UTC=-7). No daylight saving shifts.",
|
|
189
|
+
tz=ZoneInfo("Etc/GMT+7"),
|
|
190
|
+
tz_name="Etc/GMT+7",
|
|
191
|
+
)
|
|
192
|
+
MPT = EnumValue(
|
|
193
|
+
value="MountainPrevailing",
|
|
194
|
+
description="Mountain Prevailing Time. Commonly called Mountain Local Time. "
|
|
195
|
+
"Includes daylight saving.",
|
|
196
|
+
tz=ZoneInfo("US/Mountain"),
|
|
197
|
+
tz_name="US/Mountain",
|
|
198
|
+
)
|
|
199
|
+
CST = EnumValue(
|
|
200
|
+
value="CentralStandard",
|
|
201
|
+
description="Central Standard Time (UTC=-6). No daylight saving shifts.",
|
|
202
|
+
tz=ZoneInfo("Etc/GMT+6"),
|
|
203
|
+
tz_name="Etc/GMT+6",
|
|
204
|
+
)
|
|
205
|
+
CPT = EnumValue(
|
|
206
|
+
value="CentralPrevailing",
|
|
207
|
+
description="Central Prevailing Time. Commonly called Central Local Time. "
|
|
208
|
+
"Includes daylight saving.",
|
|
209
|
+
tz=ZoneInfo("US/Central"),
|
|
210
|
+
tz_name="US/Central",
|
|
211
|
+
)
|
|
212
|
+
EST = EnumValue(
|
|
213
|
+
value="EasternStandard",
|
|
214
|
+
description="Eastern Standard Time (UTC=-5). No daylight saving shifts.",
|
|
215
|
+
tz=ZoneInfo("Etc/GMT+5"),
|
|
216
|
+
tz_name="Etc/GMT+5",
|
|
217
|
+
)
|
|
218
|
+
EPT = EnumValue(
|
|
219
|
+
value="EasternPrevailing",
|
|
220
|
+
description="Eastern Prevailing Time. Commonly called Eastern Local Time. "
|
|
221
|
+
"Includes daylight saving.",
|
|
222
|
+
tz=ZoneInfo("US/Eastern"),
|
|
223
|
+
tz_name="US/Eastern",
|
|
224
|
+
)
|
|
225
|
+
ARIZONA = EnumValue(
|
|
226
|
+
value="USArizona",
|
|
227
|
+
description="US/Arizona = Mountain Standard Time (UTC=-7). No daylight saving shifts. "
|
|
228
|
+
"For Arizona state except Navajo County",
|
|
229
|
+
tz=ZoneInfo("US/Arizona"),
|
|
230
|
+
tz_name="US/Arizona",
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def get_standard_time(self):
|
|
234
|
+
"""get equivalent standard time"""
|
|
235
|
+
if self == TimeZone.UTC:
|
|
236
|
+
return TimeZone.UTC
|
|
237
|
+
if self == TimeZone.HST:
|
|
238
|
+
return TimeZone.HST
|
|
239
|
+
if self in [TimeZone.AST, TimeZone.APT]:
|
|
240
|
+
return TimeZone.AST
|
|
241
|
+
if self in [TimeZone.PST, TimeZone.PPT]:
|
|
242
|
+
return TimeZone.PST
|
|
243
|
+
if self in [TimeZone.MST, TimeZone.MPT]:
|
|
244
|
+
return TimeZone.MST
|
|
245
|
+
if self in [TimeZone.CST, TimeZone.CPT]:
|
|
246
|
+
return TimeZone.CST
|
|
247
|
+
if self in [TimeZone.EST, TimeZone.EPT]:
|
|
248
|
+
return TimeZone.EST
|
|
249
|
+
if self == TimeZone.ARIZONA:
|
|
250
|
+
return TimeZone.ARIZONA
|
|
251
|
+
msg = f"BUG: case not covered: {self}"
|
|
252
|
+
raise NotImplementedError(msg)
|
|
253
|
+
|
|
254
|
+
def get_prevailing_time(self):
|
|
255
|
+
"""get equivalent prevailing time"""
|
|
256
|
+
if self == TimeZone.UTC:
|
|
257
|
+
return TimeZone.UTC
|
|
258
|
+
if self == TimeZone.HST:
|
|
259
|
+
return TimeZone.HST
|
|
260
|
+
if self in [TimeZone.AST, TimeZone.APT]:
|
|
261
|
+
return TimeZone.APT
|
|
262
|
+
if self in [TimeZone.PST, TimeZone.PPT]:
|
|
263
|
+
return TimeZone.PPT
|
|
264
|
+
if self in [TimeZone.MST, TimeZone.MPT]:
|
|
265
|
+
return TimeZone.MPT
|
|
266
|
+
if self in [TimeZone.CST, TimeZone.CPT]:
|
|
267
|
+
return TimeZone.CPT
|
|
268
|
+
if self in [TimeZone.EST, TimeZone.EPT]:
|
|
269
|
+
return TimeZone.EPT
|
|
270
|
+
if self == TimeZone.ARIZONA:
|
|
271
|
+
return TimeZone.ARIZONA
|
|
272
|
+
msg = f"BUG: case not covered: {self}"
|
|
273
|
+
raise NotImplementedError(msg)
|
|
274
|
+
|
|
275
|
+
def is_standard(self):
|
|
276
|
+
lst = [
|
|
277
|
+
TimeZone.UTC,
|
|
278
|
+
TimeZone.HST,
|
|
279
|
+
TimeZone.AST,
|
|
280
|
+
TimeZone.PST,
|
|
281
|
+
TimeZone.MST,
|
|
282
|
+
TimeZone.CST,
|
|
283
|
+
TimeZone.EST,
|
|
284
|
+
TimeZone.ARIZONA,
|
|
285
|
+
]
|
|
286
|
+
if self in lst:
|
|
287
|
+
return True
|
|
288
|
+
return False
|
|
289
|
+
|
|
290
|
+
def is_prevailing(self):
|
|
291
|
+
lst = [
|
|
292
|
+
TimeZone.APT,
|
|
293
|
+
TimeZone.PPT,
|
|
294
|
+
TimeZone.MPT,
|
|
295
|
+
TimeZone.CPT,
|
|
296
|
+
TimeZone.EPT,
|
|
297
|
+
TimeZone.ARIZONA,
|
|
298
|
+
]
|
|
299
|
+
if self in lst:
|
|
300
|
+
return True
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
_TIME_ZONE_NAME_TO_ZONE_INFO = {x.tz_name: x.tz for x in TimeZone}
|
|
305
|
+
|
|
306
|
+
assert len(_TIME_ZONE_NAME_TO_ZONE_INFO) == len(TimeZone)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def get_zone_info_from_tz_name(tz_name: str) -> ZoneInfo:
|
|
310
|
+
"""Return the ZoneInfo matching tz_name."""
|
|
311
|
+
return _TIME_ZONE_NAME_TO_ZONE_INFO[tz_name]
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def get_zone_info_from_spark_session(tz_name: str) -> ZoneInfo:
|
|
315
|
+
"""Return the ZoneInfo matching tz_name, which must have been read from the Spark session."""
|
|
316
|
+
try:
|
|
317
|
+
# We set the Spark session time zone to tz_name which is incompatible with ZoneInfo.
|
|
318
|
+
return ZoneInfo(key=tz_name)
|
|
319
|
+
except ZoneInfoNotFoundError:
|
|
320
|
+
return get_zone_info_from_tz_name(tz_name)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
class DaylightSavingAdjustmentModel(DSGBaseModel):
|
|
324
|
+
"""Defines how to drop and add data along with timestamps to convert standard time
|
|
325
|
+
load profiles to clock time"""
|
|
326
|
+
|
|
327
|
+
spring_forward_hour: DaylightSavingSpringForwardType = Field(
|
|
328
|
+
title="spring_forward_hour",
|
|
329
|
+
description="Data adjustment for spring forward hour (a 2AM in March)",
|
|
330
|
+
default=DaylightSavingSpringForwardType.NONE,
|
|
331
|
+
json_schema_extra={
|
|
332
|
+
"options": DaylightSavingSpringForwardType.format_descriptions_for_docs(),
|
|
333
|
+
},
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
fall_back_hour: DaylightSavingFallBackType = Field(
|
|
337
|
+
title="fall_back_hour",
|
|
338
|
+
description="Data adjustment for spring forward hour (a 2AM in November)",
|
|
339
|
+
default=DaylightSavingFallBackType.NONE,
|
|
340
|
+
json_schema_extra={
|
|
341
|
+
"options": DaylightSavingFallBackType.format_descriptions_for_docs(),
|
|
342
|
+
},
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
class TimeBasedDataAdjustmentModel(DSGBaseModel):
|
|
347
|
+
"""Defines how data needs to be adjusted with respect to time.
|
|
348
|
+
For leap day adjustment, up to one full day of timestamps and data are dropped.
|
|
349
|
+
For daylight savings, the dataframe is adjusted alongside the timestamps.
|
|
350
|
+
This is useful when the load profiles are modeled in standard time and
|
|
351
|
+
need to be converted to get clock time load profiles.
|
|
352
|
+
"""
|
|
353
|
+
|
|
354
|
+
leap_day_adjustment: LeapDayAdjustmentType = Field(
|
|
355
|
+
default=LeapDayAdjustmentType.NONE,
|
|
356
|
+
title="leap_day_adjustment",
|
|
357
|
+
description="Leap day adjustment method applied to time data. The dsgrid default is None, "
|
|
358
|
+
"i.e., no adjustment made to leap years. Adjustments are made to leap years only.",
|
|
359
|
+
)
|
|
360
|
+
daylight_saving_adjustment: DaylightSavingAdjustmentModel = Field(
|
|
361
|
+
title="daylight_saving_adjustment",
|
|
362
|
+
description="Daylight saving adjustment method applied to time data",
|
|
363
|
+
default=DaylightSavingAdjustmentModel(
|
|
364
|
+
spring_forward_hour=DaylightSavingSpringForwardType.NONE,
|
|
365
|
+
fall_back_hour=DaylightSavingFallBackType.NONE,
|
|
366
|
+
),
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
class DatetimeRange:
|
|
371
|
+
def __init__(
|
|
372
|
+
self,
|
|
373
|
+
start,
|
|
374
|
+
end,
|
|
375
|
+
frequency,
|
|
376
|
+
time_based_data_adjustment: TimeBasedDataAdjustmentModel | None = None,
|
|
377
|
+
):
|
|
378
|
+
if time_based_data_adjustment is None:
|
|
379
|
+
time_based_data_adjustment = TimeBasedDataAdjustmentModel()
|
|
380
|
+
self.start = start
|
|
381
|
+
self.end = end
|
|
382
|
+
self.tzinfo = start.tzinfo
|
|
383
|
+
self.frequency = frequency
|
|
384
|
+
self.leap_day_adjustment = time_based_data_adjustment.leap_day_adjustment
|
|
385
|
+
self.dls_springforward_adjustment = (
|
|
386
|
+
time_based_data_adjustment.daylight_saving_adjustment.spring_forward_hour
|
|
387
|
+
)
|
|
388
|
+
self.dls_fallback_adjustment = (
|
|
389
|
+
time_based_data_adjustment.daylight_saving_adjustment.fall_back_hour
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
def __repr__(self):
|
|
393
|
+
return (
|
|
394
|
+
self.__class__.__qualname__
|
|
395
|
+
+ f"(start={self.start}, end={self.end}, frequency={self.frequency}, "
|
|
396
|
+
+ f"leap_day_adjustment={self.leap_day_adjustment}, "
|
|
397
|
+
+ f"dls_springforward_adjustment={self.dls_springforward_adjustment}, "
|
|
398
|
+
+ f"dls_fallback_adjustment={self.dls_fallback_adjustment}."
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def __str__(self):
|
|
402
|
+
return self.show_range()
|
|
403
|
+
|
|
404
|
+
def show_range(self, n_show=5):
|
|
405
|
+
output = self.list_time_range()
|
|
406
|
+
n_show = min(len(output) // 2, n_show)
|
|
407
|
+
n_head = ", ".join([str(x) for x in output[:n_show]])
|
|
408
|
+
n_tail = ", ".join([str(x) for x in output[-n_show:]])
|
|
409
|
+
return n_head + ",\n ... , \n" + n_tail
|
|
410
|
+
|
|
411
|
+
def _iter_timestamps(self):
|
|
412
|
+
"""Return a generator of datetimes for a time range ('start' and 'end' times are inclusive).
|
|
413
|
+
There could be duplicates.
|
|
414
|
+
|
|
415
|
+
TODO: for future-selves, test functionality of LeapDayAdjustmentType in relation to TimeIntervalType to make sure drop behavior is expected.
|
|
416
|
+
|
|
417
|
+
Yields
|
|
418
|
+
------
|
|
419
|
+
datetime
|
|
420
|
+
|
|
421
|
+
"""
|
|
422
|
+
cur = self.start.to_pydatetime().astimezone(ZoneInfo("UTC"))
|
|
423
|
+
end = self.end.to_pydatetime().astimezone(ZoneInfo("UTC")) + self.frequency
|
|
424
|
+
|
|
425
|
+
while cur < end:
|
|
426
|
+
cur_tz = cur.astimezone(self.tzinfo)
|
|
427
|
+
cur_tz = adjust_timestamp_by_dst_offset(cur_tz, self.frequency)
|
|
428
|
+
month = cur_tz.month
|
|
429
|
+
day = cur_tz.day
|
|
430
|
+
if not (
|
|
431
|
+
self.leap_day_adjustment == LeapDayAdjustmentType.DROP_FEB29
|
|
432
|
+
and month == 2
|
|
433
|
+
and day == 29
|
|
434
|
+
):
|
|
435
|
+
if not (
|
|
436
|
+
self.leap_day_adjustment == LeapDayAdjustmentType.DROP_DEC31
|
|
437
|
+
and month == 12
|
|
438
|
+
and day == 31
|
|
439
|
+
):
|
|
440
|
+
if not (
|
|
441
|
+
self.leap_day_adjustment == LeapDayAdjustmentType.DROP_JAN1
|
|
442
|
+
and month == 1
|
|
443
|
+
and day == 1
|
|
444
|
+
):
|
|
445
|
+
yield cur_tz
|
|
446
|
+
|
|
447
|
+
cur += self.frequency
|
|
448
|
+
|
|
449
|
+
def list_time_range(self):
|
|
450
|
+
"""Return a list of timestamps for a time range.
|
|
451
|
+
|
|
452
|
+
Returns
|
|
453
|
+
-------
|
|
454
|
+
list[datetime]
|
|
455
|
+
"""
|
|
456
|
+
return list(self._iter_timestamps())
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
class AnnualTimeRange(DatetimeRange):
|
|
460
|
+
def _iter_timestamps(self):
|
|
461
|
+
"""
|
|
462
|
+
Return a list of years (datetime obj) on Jan 1st
|
|
463
|
+
Might be okay to not convert to UTC for iteration, since it's annual
|
|
464
|
+
|
|
465
|
+
"""
|
|
466
|
+
start = self.start.to_pydatetime()
|
|
467
|
+
end = self.end.to_pydatetime()
|
|
468
|
+
tz = self.tzinfo
|
|
469
|
+
for year in range(start.year, end.year + 1):
|
|
470
|
+
yield datetime(year=year, month=1, day=1, tzinfo=tz)
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
class IndexTimeRange(DatetimeRange):
|
|
474
|
+
def __init__(
|
|
475
|
+
self,
|
|
476
|
+
start,
|
|
477
|
+
end,
|
|
478
|
+
frequency,
|
|
479
|
+
start_index,
|
|
480
|
+
time_based_data_adjustment: TimeBasedDataAdjustmentModel | None = None,
|
|
481
|
+
):
|
|
482
|
+
super().__init__(
|
|
483
|
+
start, end, frequency, time_based_data_adjustment=time_based_data_adjustment
|
|
484
|
+
)
|
|
485
|
+
self.start_index = start_index
|
|
486
|
+
|
|
487
|
+
def _iter_timestamps(self):
|
|
488
|
+
cur = self.start.to_pydatetime().astimezone(ZoneInfo("UTC"))
|
|
489
|
+
cur_idx = self.start_index
|
|
490
|
+
end = (
|
|
491
|
+
self.end.to_pydatetime().astimezone(ZoneInfo("UTC")) + self.frequency
|
|
492
|
+
) # to make end time inclusive
|
|
493
|
+
|
|
494
|
+
while cur < end:
|
|
495
|
+
cur_tz = cur.astimezone(self.tzinfo)
|
|
496
|
+
cur_tz = adjust_timestamp_by_dst_offset(cur_tz, self.frequency)
|
|
497
|
+
month = cur_tz.month
|
|
498
|
+
day = cur_tz.day
|
|
499
|
+
if not (
|
|
500
|
+
self.leap_day_adjustment == LeapDayAdjustmentType.DROP_FEB29
|
|
501
|
+
and month == 2
|
|
502
|
+
and day == 29
|
|
503
|
+
):
|
|
504
|
+
if not (
|
|
505
|
+
self.leap_day_adjustment == LeapDayAdjustmentType.DROP_DEC31
|
|
506
|
+
and month == 12
|
|
507
|
+
and day == 31
|
|
508
|
+
):
|
|
509
|
+
if not (
|
|
510
|
+
self.leap_day_adjustment == LeapDayAdjustmentType.DROP_JAN1
|
|
511
|
+
and month == 1
|
|
512
|
+
and day == 1
|
|
513
|
+
):
|
|
514
|
+
yield cur_idx
|
|
515
|
+
cur += self.frequency
|
|
516
|
+
cur_idx += 1
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def adjust_timestamp_by_dst_offset(timestamp, frequency):
|
|
520
|
+
"""Reduce the timestamps within the daylight saving range by 1 hour.
|
|
521
|
+
Used to ensure that a time series at daily (or lower) frequency returns each day at the
|
|
522
|
+
same timestamp in prevailing time, an expected behavior in most standard libraries.
|
|
523
|
+
(e.g., ensure a time series can return 2018-03-11 00:00, 2018-03-12 00:00...
|
|
524
|
+
instead of 2018-03-11 00:00, 2018-03-12 01:00...)
|
|
525
|
+
|
|
526
|
+
"""
|
|
527
|
+
if frequency < timedelta(hours=24):
|
|
528
|
+
return timestamp
|
|
529
|
+
|
|
530
|
+
offset = timestamp.dst() or timedelta(hours=0)
|
|
531
|
+
return timestamp - offset
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Functions related to time"""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from dsgrid.dimension.time import (
|
|
11
|
+
DatetimeRange,
|
|
12
|
+
TimeZone,
|
|
13
|
+
TimeBasedDataAdjustmentModel,
|
|
14
|
+
TimeDimensionType,
|
|
15
|
+
)
|
|
16
|
+
from dsgrid.config.dimensions import TimeRangeModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_time_ranges(
|
|
23
|
+
time_ranges: TimeRangeModel,
|
|
24
|
+
str_format: str,
|
|
25
|
+
tz: TimeZone | None = None,
|
|
26
|
+
):
|
|
27
|
+
ranges = []
|
|
28
|
+
for time_range in time_ranges:
|
|
29
|
+
start = datetime.strptime(time_range.start, str_format)
|
|
30
|
+
end = datetime.strptime(time_range.end, str_format)
|
|
31
|
+
start_adj = datetime(
|
|
32
|
+
year=start.year,
|
|
33
|
+
month=start.month,
|
|
34
|
+
day=start.day,
|
|
35
|
+
hour=start.hour,
|
|
36
|
+
minute=start.minute,
|
|
37
|
+
second=start.second,
|
|
38
|
+
microsecond=start.microsecond,
|
|
39
|
+
)
|
|
40
|
+
end_adj = datetime(
|
|
41
|
+
year=end.year,
|
|
42
|
+
month=end.month,
|
|
43
|
+
day=end.day,
|
|
44
|
+
hour=end.hour,
|
|
45
|
+
minute=end.minute,
|
|
46
|
+
second=end.second,
|
|
47
|
+
microsecond=end.microsecond,
|
|
48
|
+
)
|
|
49
|
+
ranges.append((pd.Timestamp(start_adj, tz=tz), pd.Timestamp(end_adj, tz=tz)))
|
|
50
|
+
|
|
51
|
+
ranges.sort(key=lambda x: x[0])
|
|
52
|
+
return ranges
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_time_ranges(
|
|
56
|
+
time_dimension_config, #: DateTimeDimensionConfig,
|
|
57
|
+
timezone: TimeZone = None,
|
|
58
|
+
time_based_data_adjustment: TimeBasedDataAdjustmentModel = None,
|
|
59
|
+
):
|
|
60
|
+
dim_model = time_dimension_config.model
|
|
61
|
+
if timezone is None:
|
|
62
|
+
timezone = time_dimension_config.get_tzinfo()
|
|
63
|
+
|
|
64
|
+
if dim_model.time_type == TimeDimensionType.DATETIME:
|
|
65
|
+
dt_ranges = dim_model.ranges
|
|
66
|
+
elif dim_model.time_type == TimeDimensionType.INDEX:
|
|
67
|
+
dt_ranges = time_dimension_config._create_represented_time_ranges()
|
|
68
|
+
else:
|
|
69
|
+
msg = f"Cannot support time_dimension_config model of time_typ {dim_model.time_type}."
|
|
70
|
+
raise ValueError(msg)
|
|
71
|
+
|
|
72
|
+
ranges = []
|
|
73
|
+
for start, end in build_time_ranges(dt_ranges, dim_model.str_format, tz=timezone):
|
|
74
|
+
ranges.append(
|
|
75
|
+
DatetimeRange(
|
|
76
|
+
start=start,
|
|
77
|
+
end=end,
|
|
78
|
+
frequency=dim_model.frequency,
|
|
79
|
+
time_based_data_adjustment=time_based_data_adjustment,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return ranges
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def is_leap_year(year: int) -> bool:
|
|
87
|
+
"""Return True if the year is a leap year."""
|
|
88
|
+
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
|
dsgrid/dsgrid_rc.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""Manages the dsgrid runtime configuration file"""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
from warnings import warn
|
|
9
|
+
|
|
10
|
+
import json5
|
|
11
|
+
from pydantic import model_validator
|
|
12
|
+
|
|
13
|
+
from dsgrid.common import BackendEngine, DEFAULT_DB_PASSWORD, DEFAULT_SCRATCH_DIR
|
|
14
|
+
from dsgrid.data_models import DSGBaseModel
|
|
15
|
+
|
|
16
|
+
RC_FILENAME = ".dsgrid.json5"
|
|
17
|
+
DEFAULT_BACKEND = BackendEngine.DUCKDB
|
|
18
|
+
DEFAULT_THRIFT_SERVER_URL = "hive://localhost:10000/default"
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class DsgridRuntimeConfig(DSGBaseModel):
|
|
24
|
+
"""Defines the runtime config that can be stored in users' home directories."""
|
|
25
|
+
|
|
26
|
+
database_url: str | None = None
|
|
27
|
+
database_user: str = "root"
|
|
28
|
+
database_password: str = DEFAULT_DB_PASSWORD
|
|
29
|
+
offline: bool = True
|
|
30
|
+
backend_engine: BackendEngine = DEFAULT_BACKEND
|
|
31
|
+
thrift_server_url: str = DEFAULT_THRIFT_SERVER_URL
|
|
32
|
+
use_hive_metastore: bool = False
|
|
33
|
+
console_level: str = "info"
|
|
34
|
+
file_level: str = "info"
|
|
35
|
+
timings: bool = False
|
|
36
|
+
reraise_exceptions: bool = False
|
|
37
|
+
scratch_dir: None | Path = None
|
|
38
|
+
|
|
39
|
+
@model_validator(mode="before")
|
|
40
|
+
@classmethod
|
|
41
|
+
def environment_overrides(cls, values: dict[str, Any]) -> dict[str, Any]:
|
|
42
|
+
for env, field in (
|
|
43
|
+
("DSGRID_BACKEND_ENGINE", "backend_engine"),
|
|
44
|
+
("THRIFT_SERVER_URL", "thrift_server_url"),
|
|
45
|
+
):
|
|
46
|
+
if env in os.environ:
|
|
47
|
+
values[field] = os.environ[env]
|
|
48
|
+
return values
|
|
49
|
+
|
|
50
|
+
@model_validator(mode="before")
|
|
51
|
+
@classmethod
|
|
52
|
+
def remove_legacy_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
53
|
+
for field in ("database_name",):
|
|
54
|
+
res = data.pop(field, None)
|
|
55
|
+
if res is not None:
|
|
56
|
+
warn(
|
|
57
|
+
f"The dsgrid runtime config field {field} is deprecated. Please remove it. "
|
|
58
|
+
"This will cause an error in a future release.",
|
|
59
|
+
)
|
|
60
|
+
return data
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def load(cls) -> "DsgridRuntimeConfig":
|
|
64
|
+
"""Load the dsgrid runtime config if it exists or one with default values."""
|
|
65
|
+
rc_file = cls.path()
|
|
66
|
+
if rc_file.exists():
|
|
67
|
+
data = json5.loads(rc_file.read_text(encoding="utf-8-sig"))
|
|
68
|
+
return cls(**data)
|
|
69
|
+
return cls()
|
|
70
|
+
|
|
71
|
+
def dump(self) -> None:
|
|
72
|
+
"""Dump the config to the user's home directory."""
|
|
73
|
+
path = self.path()
|
|
74
|
+
data = self.model_dump()
|
|
75
|
+
data.pop("database_user")
|
|
76
|
+
data.pop("database_password")
|
|
77
|
+
with open(path, "w") as f_out:
|
|
78
|
+
json5.dump(data, f_out, indent=2)
|
|
79
|
+
print(f"Wrote dsgrid config to {path}", file=sys.stderr)
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
def path() -> Path:
|
|
83
|
+
"""Return the path to the config file."""
|
|
84
|
+
return Path.home() / RC_FILENAME
|
|
85
|
+
|
|
86
|
+
def get_scratch_dir(self) -> Path:
|
|
87
|
+
"""Return the scratch_dir to use."""
|
|
88
|
+
return self.scratch_dir or Path(DEFAULT_SCRATCH_DIR)
|