dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import logging
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Union, Literal
|
|
5
|
+
|
|
6
|
+
from pydantic import field_validator, model_validator, Field
|
|
7
|
+
|
|
8
|
+
from dsgrid.data_models import DSGBaseModel
|
|
9
|
+
from dsgrid.dimension.base_models import DimensionType
|
|
10
|
+
from dsgrid.exceptions import DSGInvalidField, DSGInvalidParameter
|
|
11
|
+
from dsgrid.spark.types import DataFrame, F
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class DimensionFilterType(str, Enum):
|
|
18
|
+
"""Filter types that can be specified in queries."""
|
|
19
|
+
|
|
20
|
+
EXPRESSION = "expression"
|
|
21
|
+
EXPRESSION_RAW = "expression_raw"
|
|
22
|
+
COLUMN_OPERATOR = "column_operator"
|
|
23
|
+
BETWEEN_COLUMN_OPERATOR = "between_column_operator"
|
|
24
|
+
SUBSET = "subset"
|
|
25
|
+
SUPPLEMENTAL_COLUMN_OPERATOR = "supplemental_column_operator"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DimensionFilterBaseModel(DSGBaseModel, abc.ABC):
|
|
29
|
+
"""Base model for all filters"""
|
|
30
|
+
|
|
31
|
+
dimension_type: DimensionType
|
|
32
|
+
column: str = Field(
|
|
33
|
+
title="column", description="Column of dimension records to use", default="id"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
@abc.abstractmethod
|
|
37
|
+
def apply_filter(self, df, column=None):
|
|
38
|
+
"""Apply the filter to a DataFrame"""
|
|
39
|
+
|
|
40
|
+
@model_validator(mode="before")
|
|
41
|
+
@classmethod
|
|
42
|
+
def remove_filter_type(cls, values):
|
|
43
|
+
values.pop("filter_type", None)
|
|
44
|
+
return values
|
|
45
|
+
|
|
46
|
+
def _make_value_str(self, value):
|
|
47
|
+
if isinstance(value, str):
|
|
48
|
+
return f"'{value}'"
|
|
49
|
+
elif isinstance(value, int) or isinstance(value, float):
|
|
50
|
+
return str(value)
|
|
51
|
+
else:
|
|
52
|
+
msg = f"Unsupported type: {type(value)}"
|
|
53
|
+
raise DSGInvalidField(msg)
|
|
54
|
+
|
|
55
|
+
def _make_values_str(self, values):
|
|
56
|
+
return ", ".join((f"{self._make_value_str(x)}" for x in values))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class DimensionFilterSingleQueryNameBaseModel(DimensionFilterBaseModel, abc.ABC):
|
|
60
|
+
"""Base model for all filters based on expressions with a single dimension."""
|
|
61
|
+
|
|
62
|
+
dimension_name: str
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class DimensionFilterMultipleQueryNameBaseModel(DimensionFilterBaseModel, abc.ABC):
|
|
66
|
+
"""Base model for all filters based on expressions with multiple dimensions."""
|
|
67
|
+
|
|
68
|
+
dimension_names: list[str]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class _DimensionFilterWithWhereClauseModel(DimensionFilterSingleQueryNameBaseModel, abc.ABC):
|
|
72
|
+
def apply_filter(self, df, column=None):
|
|
73
|
+
return df.filter(self.where_clause(column=column))
|
|
74
|
+
|
|
75
|
+
@abc.abstractmethod
|
|
76
|
+
def where_clause(self, column=None):
|
|
77
|
+
"""Returns the text for a where clause in a filter statement.
|
|
78
|
+
|
|
79
|
+
Parameters
|
|
80
|
+
----------
|
|
81
|
+
column : None or str
|
|
82
|
+
Column to use. If None, use the dimension type.
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
str
|
|
87
|
+
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class DimensionFilterExpressionModel(_DimensionFilterWithWhereClauseModel):
|
|
92
|
+
"""Filters a table where a dimension column matches an expression.
|
|
93
|
+
|
|
94
|
+
Example:
|
|
95
|
+
DimensionFilterExpressionModel(
|
|
96
|
+
dimension_type=DimensionType.GEOGRAPHY,
|
|
97
|
+
dimension_name="county",
|
|
98
|
+
operator="==",
|
|
99
|
+
value="06037",
|
|
100
|
+
),
|
|
101
|
+
is equivalent to
|
|
102
|
+
df.filter("county == '06037'")
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
operator: str
|
|
107
|
+
value: Union[str, int, float]
|
|
108
|
+
filter_type: Literal[DimensionFilterType.EXPRESSION] = DimensionFilterType.EXPRESSION
|
|
109
|
+
|
|
110
|
+
def where_clause(self, column=None):
|
|
111
|
+
column = column or self.column
|
|
112
|
+
value = self._make_value_str(self.value)
|
|
113
|
+
text = f"({column} {self.operator} {value})"
|
|
114
|
+
return text
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class DimensionFilterExpressionRawModel(_DimensionFilterWithWhereClauseModel):
|
|
118
|
+
"""Filters a table where a dimension column matches an expression.
|
|
119
|
+
Uses the passed string with no modification.
|
|
120
|
+
|
|
121
|
+
Example:
|
|
122
|
+
DimensionFilterExpressionRawModel(
|
|
123
|
+
dimension_type=DimensionType.GEOGRAPHY,
|
|
124
|
+
dimension_name="county",
|
|
125
|
+
value="== '06037'",
|
|
126
|
+
),
|
|
127
|
+
is equivalent to
|
|
128
|
+
df.filter("county == '06037'")
|
|
129
|
+
|
|
130
|
+
The difference between this class and DimensionFilterExpressionModel is that the latter
|
|
131
|
+
will attempt to add quotes as necessary.
|
|
132
|
+
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
value: Union[str, int, float]
|
|
136
|
+
filter_type: Literal[DimensionFilterType.EXPRESSION_RAW] = DimensionFilterType.EXPRESSION_RAW
|
|
137
|
+
|
|
138
|
+
def where_clause(self, column=None):
|
|
139
|
+
column = column or self.column
|
|
140
|
+
text = f"({column} {self.value})"
|
|
141
|
+
return text
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
DIMENSION_COLUMN_FILTER_OPERATORS = {
|
|
145
|
+
"contains",
|
|
146
|
+
"endswith",
|
|
147
|
+
"isNotNull",
|
|
148
|
+
"isNull",
|
|
149
|
+
"isin",
|
|
150
|
+
"like",
|
|
151
|
+
"rlike",
|
|
152
|
+
"startswith",
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def check_operator(operator):
|
|
157
|
+
if operator not in DIMENSION_COLUMN_FILTER_OPERATORS:
|
|
158
|
+
msg = f"operator={operator} is not supported. Allowed={DIMENSION_COLUMN_FILTER_OPERATORS}"
|
|
159
|
+
raise ValueError(msg)
|
|
160
|
+
return operator
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class DimensionFilterColumnOperatorModel(DimensionFilterSingleQueryNameBaseModel):
|
|
164
|
+
"""Filters a table where a dimension column matches a Spark SQL operator.
|
|
165
|
+
|
|
166
|
+
Examples:
|
|
167
|
+
import pyspark.sql.functions as F
|
|
168
|
+
df.filter(F.col("geography").like("abc%"))
|
|
169
|
+
df.filter(~F.col("sector").startswith("com"))
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
operator: str = Field(
|
|
173
|
+
title="operator", description="Method on pyspark.sql.functions.col to invoke"
|
|
174
|
+
)
|
|
175
|
+
value: Any = Field(
|
|
176
|
+
default=None,
|
|
177
|
+
title="value",
|
|
178
|
+
description="Value to filter on. Use a two-element list for the between operator.",
|
|
179
|
+
)
|
|
180
|
+
negate: bool = Field(
|
|
181
|
+
title="negate",
|
|
182
|
+
description="Change the filter to match the negation of the value.",
|
|
183
|
+
default=False,
|
|
184
|
+
)
|
|
185
|
+
filter_type: Literal[DimensionFilterType.COLUMN_OPERATOR] = DimensionFilterType.COLUMN_OPERATOR
|
|
186
|
+
|
|
187
|
+
@field_validator("operator")
|
|
188
|
+
@classmethod
|
|
189
|
+
def check_operator(cls, operator):
|
|
190
|
+
return check_operator(operator)
|
|
191
|
+
|
|
192
|
+
def apply_filter(self, df, column=None):
|
|
193
|
+
column = column or self.column
|
|
194
|
+
col = F.col(column)
|
|
195
|
+
method = getattr(col, self.operator)
|
|
196
|
+
if self.negate:
|
|
197
|
+
return df.filter(~method(self.value))
|
|
198
|
+
return df.filter(method(self.value))
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
class DimensionFilterBetweenColumnOperatorModel(DimensionFilterSingleQueryNameBaseModel):
|
|
202
|
+
"""Filters a table where a dimension column is between the lower bound and upper bound,
|
|
203
|
+
inclusive.
|
|
204
|
+
|
|
205
|
+
Examples:
|
|
206
|
+
import pyspark.sql.functions as F
|
|
207
|
+
df.filter(F.col("timestamp").between("2012-07-01 00:00:00", "2012-08-01 00:00:00"))
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
lower_bound: Any = Field(
|
|
211
|
+
default=None, title="lower_bound", description="Lower bound, inclusive"
|
|
212
|
+
)
|
|
213
|
+
upper_bound: Any = Field(
|
|
214
|
+
default=None, title="upper_bound", description="Upper bound, inclusive"
|
|
215
|
+
)
|
|
216
|
+
negate: bool = Field(
|
|
217
|
+
title="negate",
|
|
218
|
+
description="Change the filter to match the negation of the value.",
|
|
219
|
+
default=False,
|
|
220
|
+
)
|
|
221
|
+
filter_type: Literal[
|
|
222
|
+
DimensionFilterType.BETWEEN_COLUMN_OPERATOR
|
|
223
|
+
] = DimensionFilterType.BETWEEN_COLUMN_OPERATOR
|
|
224
|
+
|
|
225
|
+
def apply_filter(self, df, column=None):
|
|
226
|
+
column = column or self.column
|
|
227
|
+
if self.negate:
|
|
228
|
+
return df.filter(~F.col(column).between(self.lower_bound, self.upper_bound))
|
|
229
|
+
return df.filter(F.col(column).between(self.lower_bound, self.upper_bound))
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class SubsetDimensionFilterModel(DimensionFilterMultipleQueryNameBaseModel):
|
|
233
|
+
"""Filters base dimension records that match a subset dimension."""
|
|
234
|
+
|
|
235
|
+
dimension_names: list[str]
|
|
236
|
+
filter_type: Literal[DimensionFilterType.SUBSET] = DimensionFilterType.SUBSET
|
|
237
|
+
|
|
238
|
+
@field_validator("dimension_names")
|
|
239
|
+
@classmethod
|
|
240
|
+
def check_dimension_names(cls, dimension_names):
|
|
241
|
+
if not dimension_names:
|
|
242
|
+
msg = "dimension_names cannot be empty"
|
|
243
|
+
raise ValueError(msg)
|
|
244
|
+
return dimension_names
|
|
245
|
+
|
|
246
|
+
def apply_filter(self, df, column=None):
|
|
247
|
+
msg = f"apply_filter must not be called on {self.__class__.__name__}"
|
|
248
|
+
raise NotImplementedError(msg)
|
|
249
|
+
|
|
250
|
+
def get_filtered_records_dataframe(self, dimension_accessor) -> DataFrame:
|
|
251
|
+
"""Return a dataframe containing the filter records."""
|
|
252
|
+
df = None
|
|
253
|
+
dim_type = None
|
|
254
|
+
for query_name in self.dimension_names:
|
|
255
|
+
dim = dimension_accessor(query_name)
|
|
256
|
+
records = dim.get_records_dataframe()
|
|
257
|
+
if df is None:
|
|
258
|
+
df = records
|
|
259
|
+
dim_type = dim.model.dimension_type
|
|
260
|
+
else:
|
|
261
|
+
if dim.model.dimension_type != dim_type:
|
|
262
|
+
msg = (
|
|
263
|
+
f"Mismatch in dimension types for {self}: "
|
|
264
|
+
f"{dim_type} != {dim.model.dimension_type}"
|
|
265
|
+
)
|
|
266
|
+
raise DSGInvalidParameter(msg)
|
|
267
|
+
if records.columns != df.columns:
|
|
268
|
+
msg = (
|
|
269
|
+
f"Mismatch in records columns for {self}: "
|
|
270
|
+
f"{df.columns} != {records.columns}"
|
|
271
|
+
)
|
|
272
|
+
raise DSGInvalidParameter(msg)
|
|
273
|
+
df = df.union(records)
|
|
274
|
+
|
|
275
|
+
assert df is not None
|
|
276
|
+
return df
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
class SupplementalDimensionFilterColumnOperatorModel(DimensionFilterSingleQueryNameBaseModel):
|
|
280
|
+
"""Filters base dimension records that have a valid mapping to a supplemental dimension."""
|
|
281
|
+
|
|
282
|
+
value: Any = Field(title="value", description="Value to filter on", default="%")
|
|
283
|
+
operator: str = Field(
|
|
284
|
+
title="operator",
|
|
285
|
+
description="Method on pyspark.sql.functions.col to invoke",
|
|
286
|
+
default="like",
|
|
287
|
+
)
|
|
288
|
+
negate: bool = Field(
|
|
289
|
+
title="negate",
|
|
290
|
+
description="Filter out valid mappings to this supplemental dimension.",
|
|
291
|
+
default=False,
|
|
292
|
+
)
|
|
293
|
+
filter_type: Literal[
|
|
294
|
+
DimensionFilterType.SUPPLEMENTAL_COLUMN_OPERATOR
|
|
295
|
+
] = DimensionFilterType.SUPPLEMENTAL_COLUMN_OPERATOR
|
|
296
|
+
|
|
297
|
+
@field_validator("operator")
|
|
298
|
+
@classmethod
|
|
299
|
+
def check_operator(cls, operator):
|
|
300
|
+
return check_operator(operator)
|
|
301
|
+
|
|
302
|
+
def apply_filter(self, df, column=None):
|
|
303
|
+
column = column or self.column
|
|
304
|
+
col = F.col(column)
|
|
305
|
+
method = getattr(col, self.operator)
|
|
306
|
+
if self.negate:
|
|
307
|
+
return df.filter(~method(self.value))
|
|
308
|
+
return df.filter(method(self.value))
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Standard dimension classes for dsgrid"""
|
|
2
|
+
|
|
3
|
+
from enum import StrEnum
|
|
4
|
+
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
|
|
7
|
+
from dsgrid.config.dimensions import (
|
|
8
|
+
DateTimeDimensionModel,
|
|
9
|
+
AnnualTimeDimensionModel,
|
|
10
|
+
NoOpTimeDimensionModel,
|
|
11
|
+
)
|
|
12
|
+
from dsgrid.dimension.base_models import (
|
|
13
|
+
MetricDimensionBaseModel,
|
|
14
|
+
GeographyDimensionBaseModel,
|
|
15
|
+
ModelYearDimensionBaseModel,
|
|
16
|
+
ScenarioDimensionBaseModel,
|
|
17
|
+
SectorDimensionBaseModel,
|
|
18
|
+
SubsectorDimensionBaseModel,
|
|
19
|
+
WeatherYearDimensionBaseModel,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ---------------------------
|
|
24
|
+
# GEOGRAPHIC DIMENSIONS
|
|
25
|
+
# ---------------------------
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Geography(GeographyDimensionBaseModel):
|
|
29
|
+
"""Generic geography with optional time_zone"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# TODO: Deprecate. Replace instances with Geography
|
|
33
|
+
class CensusDivision(GeographyDimensionBaseModel):
|
|
34
|
+
"""Census Region attributes"""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# TODO: Deprecate. Replace instances with Geography
|
|
38
|
+
class CensusRegion(GeographyDimensionBaseModel):
|
|
39
|
+
"""Census Region attributes"""
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class State(GeographyDimensionBaseModel):
|
|
43
|
+
"""State attributes"""
|
|
44
|
+
|
|
45
|
+
is_conus: bool | None = None
|
|
46
|
+
census_division: str = ""
|
|
47
|
+
census_region: str = ""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class County(GeographyDimensionBaseModel):
|
|
51
|
+
"""County attributes"""
|
|
52
|
+
|
|
53
|
+
state: str
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------------------------
|
|
57
|
+
# SECTOR DIMENSIONS
|
|
58
|
+
# ---------------------------
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Sector(SectorDimensionBaseModel):
|
|
62
|
+
"""Sector attributes"""
|
|
63
|
+
|
|
64
|
+
category: str = Field(
|
|
65
|
+
title="sector",
|
|
66
|
+
description="Sector dimension",
|
|
67
|
+
default="",
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
# ---------------------------
|
|
72
|
+
# SUBSECTOR DIMENSIONS
|
|
73
|
+
# ---------------------------
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Subsector(SubsectorDimensionBaseModel):
|
|
77
|
+
"""Subsector attributes"""
|
|
78
|
+
|
|
79
|
+
sector: str = ""
|
|
80
|
+
abbr: str = ""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# ---------------------------
|
|
84
|
+
# METRIC DIMENSIONS
|
|
85
|
+
# ---------------------------
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class FunctionalForm(StrEnum):
|
|
89
|
+
"""Functional forms for regression parameters"""
|
|
90
|
+
|
|
91
|
+
# y = a0 + a1 * x + a2 * x^2 + ...
|
|
92
|
+
LINEAR = "linear"
|
|
93
|
+
# ln y = a0 + a1 * x + a2 * x^2 + ...
|
|
94
|
+
# y = exp(a0 + a1 * x + a2 * x^2 + ...)
|
|
95
|
+
EXPONENTIAL = "exponential"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class EnergyEndUse(MetricDimensionBaseModel):
|
|
99
|
+
"""Energy Demand End Use attributes"""
|
|
100
|
+
|
|
101
|
+
fuel_id: str
|
|
102
|
+
unit: str
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class EnergyServiceDemand(MetricDimensionBaseModel):
|
|
106
|
+
"""Energy Service Demand attributes"""
|
|
107
|
+
|
|
108
|
+
unit: str
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class EnergyServiceDemandRegression(MetricDimensionBaseModel):
|
|
112
|
+
"""Energy Service Demand, can be per floor area, vehicle, etc., regression
|
|
113
|
+
over time or other variables
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
regression_type: FunctionalForm = Field(
|
|
117
|
+
default=FunctionalForm.LINEAR,
|
|
118
|
+
description="Specifies the functional form of the regression model",
|
|
119
|
+
)
|
|
120
|
+
unit: str
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class EnergyEfficiency(MetricDimensionBaseModel):
|
|
124
|
+
"""Energy Efficiency of building stock or equipment"""
|
|
125
|
+
|
|
126
|
+
fuel_id: str
|
|
127
|
+
unit: str
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class EnergyIntensityRegression(MetricDimensionBaseModel):
|
|
131
|
+
"""Energy Intensity per capita, GDP, etc. regression over time or other variables"""
|
|
132
|
+
|
|
133
|
+
regression_type: FunctionalForm = Field(
|
|
134
|
+
default=FunctionalForm.LINEAR,
|
|
135
|
+
description="Specifies the functional form of the regression model",
|
|
136
|
+
)
|
|
137
|
+
unit: str
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class EnergyIntensity(MetricDimensionBaseModel):
|
|
141
|
+
"""Energy Intensity per capita, GDP, etc."""
|
|
142
|
+
|
|
143
|
+
unit: str
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class Population(MetricDimensionBaseModel):
|
|
147
|
+
"""Population attributes"""
|
|
148
|
+
|
|
149
|
+
unit: str
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class Stock(MetricDimensionBaseModel):
|
|
153
|
+
"""Stock attributes - e.g., GDP, building stock, equipment"""
|
|
154
|
+
|
|
155
|
+
unit: str
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
class StockRegression(MetricDimensionBaseModel):
|
|
159
|
+
"""Stock, can be per capita, GDP, etc., regression over time or other variables"""
|
|
160
|
+
|
|
161
|
+
regression_type: FunctionalForm = Field(
|
|
162
|
+
default=FunctionalForm.LINEAR,
|
|
163
|
+
description="Specifies the functional form of the regression model",
|
|
164
|
+
)
|
|
165
|
+
unit: str
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class StockShare(MetricDimensionBaseModel):
|
|
169
|
+
"""Stock Share attributes - e.g., market share of a technology
|
|
170
|
+
|
|
171
|
+
Generally dimensionless, but a unit string can be provided to assist with
|
|
172
|
+
calculations.
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
unit: str
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class FractionalIndex(MetricDimensionBaseModel):
|
|
179
|
+
"""Fractional Index attributes - e.g., human development index (HDI)
|
|
180
|
+
|
|
181
|
+
Generally dimensionless, but a unit string can be provided to assist with
|
|
182
|
+
calculations.
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
unit: str
|
|
186
|
+
min_value: float
|
|
187
|
+
max_value: float
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
class PeggedIndex(MetricDimensionBaseModel):
|
|
191
|
+
"""Pegged Index attributes
|
|
192
|
+
|
|
193
|
+
Data relative to a base year that is normalized to a value like 1 or 100.
|
|
194
|
+
|
|
195
|
+
Generally dimensionless, but a unit string can be provided to assist with
|
|
196
|
+
calculations.
|
|
197
|
+
"""
|
|
198
|
+
|
|
199
|
+
unit: str
|
|
200
|
+
base_year: int
|
|
201
|
+
base_value: float
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class WeatherVariable(MetricDimensionBaseModel):
|
|
205
|
+
"""Weather attributes - e.g., dry bulb temperature, relative humidity"""
|
|
206
|
+
|
|
207
|
+
unit: str
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# ---------------------------
|
|
211
|
+
# TIME DIMENSIONS
|
|
212
|
+
# ---------------------------
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class Time(DateTimeDimensionModel):
|
|
216
|
+
"""Time attributes"""
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
# It is unclear if we need the next few classes. They would need model definitions in
|
|
220
|
+
# order to be used.
|
|
221
|
+
#
|
|
222
|
+
# class DayType(TimeDimensionModel):
|
|
223
|
+
# """Day Type attributes"""
|
|
224
|
+
#
|
|
225
|
+
#
|
|
226
|
+
# class Season(TimeDimensionModel):
|
|
227
|
+
# """Season attributes"""
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class AnnualTime(AnnualTimeDimensionModel):
|
|
231
|
+
"""Annual Time attributes"""
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class NoOpTime(NoOpTimeDimensionModel):
|
|
235
|
+
"""NoOp Time attributes"""
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
# ---------------------------
|
|
239
|
+
# OTHER DIMENSIONS
|
|
240
|
+
# ---------------------------
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class WeatherYear(WeatherYearDimensionBaseModel):
|
|
244
|
+
"""Weather Year attributes"""
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class ModelYear(ModelYearDimensionBaseModel):
|
|
248
|
+
"""Model Year attributes"""
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class Scenario(ScenarioDimensionBaseModel):
|
|
252
|
+
"""Scenario attributes"""
|