dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
dsgrid/config/common.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from typing import Any, Iterable
|
|
2
|
+
|
|
3
|
+
from dsgrid.dimension.base_models import DimensionType
|
|
4
|
+
from dsgrid.dimension.time import MeasurementType, TimeDimensionType, TimeIntervalType
|
|
5
|
+
from dsgrid.dimension.standard import (
|
|
6
|
+
EnergyEfficiency,
|
|
7
|
+
EnergyEndUse,
|
|
8
|
+
EnergyIntensity,
|
|
9
|
+
EnergyIntensityRegression,
|
|
10
|
+
EnergyServiceDemand,
|
|
11
|
+
EnergyServiceDemandRegression,
|
|
12
|
+
FractionalIndex,
|
|
13
|
+
PeggedIndex,
|
|
14
|
+
Population,
|
|
15
|
+
Stock,
|
|
16
|
+
StockShare,
|
|
17
|
+
StockRegression,
|
|
18
|
+
WeatherVariable,
|
|
19
|
+
)
|
|
20
|
+
from dsgrid.exceptions import DSGInvalidParameter
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
SUPPORTED_METRIC_TYPES = {
|
|
24
|
+
x.__name__
|
|
25
|
+
for x in (
|
|
26
|
+
EnergyEfficiency,
|
|
27
|
+
EnergyEndUse,
|
|
28
|
+
EnergyIntensity,
|
|
29
|
+
EnergyIntensityRegression,
|
|
30
|
+
EnergyServiceDemand,
|
|
31
|
+
EnergyServiceDemandRegression,
|
|
32
|
+
FractionalIndex,
|
|
33
|
+
PeggedIndex,
|
|
34
|
+
Population,
|
|
35
|
+
Stock,
|
|
36
|
+
StockShare,
|
|
37
|
+
StockRegression,
|
|
38
|
+
WeatherVariable,
|
|
39
|
+
)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
DIMENSION_CLASS_MAP = {
|
|
43
|
+
DimensionType.GEOGRAPHY: "Geography",
|
|
44
|
+
DimensionType.MODEL_YEAR: "ModelYear",
|
|
45
|
+
DimensionType.SCENARIO: "Scenario",
|
|
46
|
+
DimensionType.SECTOR: "Sector",
|
|
47
|
+
DimensionType.SUBSECTOR: "Subsector",
|
|
48
|
+
DimensionType.TIME: "Time",
|
|
49
|
+
DimensionType.WEATHER_YEAR: "WeatherYear",
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def make_base_dimension_template(
|
|
54
|
+
metric_types: Iterable[str],
|
|
55
|
+
exclude_dimension_types: set[DimensionType] | None = None,
|
|
56
|
+
time_type: TimeDimensionType | None = None,
|
|
57
|
+
) -> list[dict[str, Any]]:
|
|
58
|
+
exclude: set[DimensionType] = exclude_dimension_types or set()
|
|
59
|
+
exclude.update({DimensionType.METRIC, DimensionType.TIME})
|
|
60
|
+
|
|
61
|
+
dimensions: list[dict[str, Any]] = []
|
|
62
|
+
for metric_type in metric_types:
|
|
63
|
+
if metric_type not in SUPPORTED_METRIC_TYPES:
|
|
64
|
+
msg = f"{metric_type=} is not one of the {SUPPORTED_METRIC_TYPES=}"
|
|
65
|
+
raise DSGInvalidParameter(msg)
|
|
66
|
+
dim = {
|
|
67
|
+
"type": DimensionType.METRIC.value,
|
|
68
|
+
"class": metric_type,
|
|
69
|
+
"name": DimensionType.METRIC.value,
|
|
70
|
+
"description": DimensionType.METRIC.value,
|
|
71
|
+
"file": f"dimensions/{metric_type}.csv",
|
|
72
|
+
"module": "dsgrid.dimension.standard",
|
|
73
|
+
}
|
|
74
|
+
dimensions.append(dim)
|
|
75
|
+
|
|
76
|
+
dimensions += [
|
|
77
|
+
{
|
|
78
|
+
"type": x.value,
|
|
79
|
+
"class": DIMENSION_CLASS_MAP[x],
|
|
80
|
+
"name": x.value,
|
|
81
|
+
"description": x.value,
|
|
82
|
+
"file": f"dimensions/{x.value}.csv",
|
|
83
|
+
"module": "dsgrid.dimension.standard",
|
|
84
|
+
}
|
|
85
|
+
for x in DimensionType
|
|
86
|
+
if x not in exclude
|
|
87
|
+
]
|
|
88
|
+
if time_type is not None:
|
|
89
|
+
time_dim = make_base_time_dimension_template(time_type)
|
|
90
|
+
dimensions.append(time_dim)
|
|
91
|
+
|
|
92
|
+
return dimensions
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def make_base_time_dimension_template(time_type: TimeDimensionType) -> dict[str, Any]:
|
|
96
|
+
time_dim = {
|
|
97
|
+
"type": DimensionType.TIME.value,
|
|
98
|
+
"time_type": time_type.value,
|
|
99
|
+
"time_interval_type": TimeIntervalType.PERIOD_BEGINNING.value,
|
|
100
|
+
"name": time_type.value,
|
|
101
|
+
"description": time_type.value,
|
|
102
|
+
"module": "dsgrid.dimension.standard",
|
|
103
|
+
}
|
|
104
|
+
match time_type:
|
|
105
|
+
case TimeDimensionType.DATETIME:
|
|
106
|
+
time_dim["class"] = "Time"
|
|
107
|
+
time_dim["time_zone_format"] = {
|
|
108
|
+
"format_type": "aligned_in_absolute_time",
|
|
109
|
+
"time_zone": "Etc/GMT+5",
|
|
110
|
+
}
|
|
111
|
+
time_dim["measurement_type"] = MeasurementType.TOTAL.value
|
|
112
|
+
time_dim["ranges"] = [
|
|
113
|
+
{
|
|
114
|
+
"start": "2018-01-01 00:00:00",
|
|
115
|
+
"end": "2018-12-31 23:00:00",
|
|
116
|
+
"frequency": "P0DT1H",
|
|
117
|
+
"str_format": "%Y-%m-%d %H:%M:%S",
|
|
118
|
+
},
|
|
119
|
+
]
|
|
120
|
+
case TimeDimensionType.ANNUAL:
|
|
121
|
+
time_dim["class"] = "AnnualTime"
|
|
122
|
+
time_dim["include_leap_day"] = True
|
|
123
|
+
time_dim["ranges"] = [
|
|
124
|
+
{
|
|
125
|
+
"start": "2010",
|
|
126
|
+
"end": "2024",
|
|
127
|
+
"frequency": 1,
|
|
128
|
+
"str_format": "%Y",
|
|
129
|
+
},
|
|
130
|
+
]
|
|
131
|
+
case TimeDimensionType.INDEX:
|
|
132
|
+
time_dim["class"] = "IndexTime"
|
|
133
|
+
time_dim["ranges"] = [
|
|
134
|
+
{
|
|
135
|
+
"start": 0,
|
|
136
|
+
"end": 8759,
|
|
137
|
+
"frequency": "P0DT1H",
|
|
138
|
+
"starting_timestamp": "2018-01-01 00:00:00",
|
|
139
|
+
"str_format": "%Y-%m-%d %H:%M:%S",
|
|
140
|
+
},
|
|
141
|
+
]
|
|
142
|
+
return time_dim
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import logging
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Type
|
|
5
|
+
|
|
6
|
+
import json5
|
|
7
|
+
|
|
8
|
+
from dsgrid.exceptions import DSGInvalidOperation
|
|
9
|
+
from dsgrid.spark.types import (
|
|
10
|
+
DataFrame,
|
|
11
|
+
)
|
|
12
|
+
from dsgrid.utils.spark import models_to_dataframe
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ConfigBase(abc.ABC):
|
|
19
|
+
"""Base class for all config classes"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, model):
|
|
22
|
+
self._model = model
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def load(cls, config_file, *args, **kwargs):
|
|
26
|
+
"""Load the config from a file.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
---------
|
|
30
|
+
config_file : str
|
|
31
|
+
|
|
32
|
+
Returns
|
|
33
|
+
-------
|
|
34
|
+
ConfigBase
|
|
35
|
+
|
|
36
|
+
"""
|
|
37
|
+
# Subclasses can reimplement this method if they need more arguments.
|
|
38
|
+
return cls._load(config_file, *args, **kwargs)
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def load_from_model(cls, model):
|
|
42
|
+
"""Load the config from a model.
|
|
43
|
+
|
|
44
|
+
Parameters
|
|
45
|
+
---------
|
|
46
|
+
model : DSGBaseModel
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
ConfigBase
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
return cls(model)
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def _load(cls, config_file):
|
|
57
|
+
model = cls.model_class().load(config_file)
|
|
58
|
+
return cls(model)
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
@abc.abstractmethod
|
|
62
|
+
def config_filename() -> str:
|
|
63
|
+
"""Return the config filename.
|
|
64
|
+
|
|
65
|
+
Returns
|
|
66
|
+
-------
|
|
67
|
+
str
|
|
68
|
+
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
@abc.abstractmethod
|
|
73
|
+
def config_id(self) -> str:
|
|
74
|
+
"""Return the configuration ID.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
str
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def model(self):
|
|
84
|
+
"""Return the data model for the config.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
DSGBaseModel
|
|
89
|
+
|
|
90
|
+
"""
|
|
91
|
+
return self._model
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
@abc.abstractmethod
|
|
95
|
+
def model_class() -> Type:
|
|
96
|
+
"""Return the data model class backing the config"""
|
|
97
|
+
|
|
98
|
+
def serialize(self, path, force=False):
|
|
99
|
+
"""Serialize the configuration to a path.
|
|
100
|
+
|
|
101
|
+
path : str
|
|
102
|
+
Directory
|
|
103
|
+
force : bool
|
|
104
|
+
If True, overwrite files.
|
|
105
|
+
|
|
106
|
+
"""
|
|
107
|
+
filename = Path(path) / self.config_filename()
|
|
108
|
+
if filename.exists() and not force:
|
|
109
|
+
msg = f"{filename} exists. Set force=True to overwrite."
|
|
110
|
+
raise DSGInvalidOperation(msg)
|
|
111
|
+
filename.write_text(self.model.model_dump_json(indent=2))
|
|
112
|
+
return filename
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class ConfigWithRecordFileBase(ConfigBase, abc.ABC):
|
|
116
|
+
"""Intermediate-level base class to provide serialization of record files."""
|
|
117
|
+
|
|
118
|
+
def __init__(self, *args, **kwargs):
|
|
119
|
+
super().__init__(*args, **kwargs)
|
|
120
|
+
|
|
121
|
+
def get_records_dataframe(self) -> DataFrame:
|
|
122
|
+
"""Return the records in a spark dataframe. Cached on first call."""
|
|
123
|
+
# id provides uniqueness and the config_id could help inspect what's in cache in case we
|
|
124
|
+
# ever need that.
|
|
125
|
+
# Spark doesn't allow dashes in the table name.
|
|
126
|
+
table_name = f"{self.config_id}__{id(self)}".replace("-", "_")
|
|
127
|
+
df = models_to_dataframe(self.model.records, table_name=table_name)
|
|
128
|
+
logger.debug("Loaded %s records dataframe", self.config_id)
|
|
129
|
+
return df
|
|
130
|
+
|
|
131
|
+
@classmethod
|
|
132
|
+
def load(cls, config_file):
|
|
133
|
+
config = super().load(config_file)
|
|
134
|
+
return config
|
|
135
|
+
|
|
136
|
+
def serialize(self, path, force=False):
|
|
137
|
+
dst_config_file = path / self.config_filename()
|
|
138
|
+
records_file = path / "records.csv"
|
|
139
|
+
for filename in (dst_config_file, records_file):
|
|
140
|
+
if filename.exists() and not force:
|
|
141
|
+
msg = f"{filename} exists. Set force=True to overwrite."
|
|
142
|
+
raise DSGInvalidOperation(msg)
|
|
143
|
+
|
|
144
|
+
self.get_records_dataframe().toPandas().to_csv(records_file, index=False)
|
|
145
|
+
model_data = self.model.serialize()
|
|
146
|
+
model_data["file"] = records_file.name
|
|
147
|
+
dst_config_file.write_text(json5.dumps(model_data, indent=2))
|
|
148
|
+
return dst_config_file
|