dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- build_backend.py +93 -0
- dsgrid/__init__.py +22 -0
- dsgrid/api/__init__.py +0 -0
- dsgrid/api/api_manager.py +179 -0
- dsgrid/api/app.py +419 -0
- dsgrid/api/models.py +60 -0
- dsgrid/api/response_models.py +116 -0
- dsgrid/apps/__init__.py +0 -0
- dsgrid/apps/project_viewer/app.py +216 -0
- dsgrid/apps/registration_gui.py +444 -0
- dsgrid/chronify.py +32 -0
- dsgrid/cli/__init__.py +0 -0
- dsgrid/cli/common.py +120 -0
- dsgrid/cli/config.py +176 -0
- dsgrid/cli/download.py +13 -0
- dsgrid/cli/dsgrid.py +157 -0
- dsgrid/cli/dsgrid_admin.py +92 -0
- dsgrid/cli/install_notebooks.py +62 -0
- dsgrid/cli/query.py +729 -0
- dsgrid/cli/registry.py +1862 -0
- dsgrid/cloud/__init__.py +0 -0
- dsgrid/cloud/cloud_storage_interface.py +140 -0
- dsgrid/cloud/factory.py +31 -0
- dsgrid/cloud/fake_storage_interface.py +37 -0
- dsgrid/cloud/s3_storage_interface.py +156 -0
- dsgrid/common.py +36 -0
- dsgrid/config/__init__.py +0 -0
- dsgrid/config/annual_time_dimension_config.py +194 -0
- dsgrid/config/common.py +142 -0
- dsgrid/config/config_base.py +148 -0
- dsgrid/config/dataset_config.py +907 -0
- dsgrid/config/dataset_schema_handler_factory.py +46 -0
- dsgrid/config/date_time_dimension_config.py +136 -0
- dsgrid/config/dimension_config.py +54 -0
- dsgrid/config/dimension_config_factory.py +65 -0
- dsgrid/config/dimension_mapping_base.py +350 -0
- dsgrid/config/dimension_mappings_config.py +48 -0
- dsgrid/config/dimensions.py +1025 -0
- dsgrid/config/dimensions_config.py +71 -0
- dsgrid/config/file_schema.py +190 -0
- dsgrid/config/index_time_dimension_config.py +80 -0
- dsgrid/config/input_dataset_requirements.py +31 -0
- dsgrid/config/mapping_tables.py +209 -0
- dsgrid/config/noop_time_dimension_config.py +42 -0
- dsgrid/config/project_config.py +1462 -0
- dsgrid/config/registration_models.py +188 -0
- dsgrid/config/representative_period_time_dimension_config.py +194 -0
- dsgrid/config/simple_models.py +49 -0
- dsgrid/config/supplemental_dimension.py +29 -0
- dsgrid/config/time_dimension_base_config.py +192 -0
- dsgrid/data_models.py +155 -0
- dsgrid/dataset/__init__.py +0 -0
- dsgrid/dataset/dataset.py +123 -0
- dsgrid/dataset/dataset_expression_handler.py +86 -0
- dsgrid/dataset/dataset_mapping_manager.py +121 -0
- dsgrid/dataset/dataset_schema_handler_base.py +945 -0
- dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
- dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
- dsgrid/dataset/growth_rates.py +162 -0
- dsgrid/dataset/models.py +51 -0
- dsgrid/dataset/table_format_handler_base.py +257 -0
- dsgrid/dataset/table_format_handler_factory.py +17 -0
- dsgrid/dataset/unpivoted_table.py +121 -0
- dsgrid/dimension/__init__.py +0 -0
- dsgrid/dimension/base_models.py +230 -0
- dsgrid/dimension/dimension_filters.py +308 -0
- dsgrid/dimension/standard.py +252 -0
- dsgrid/dimension/time.py +352 -0
- dsgrid/dimension/time_utils.py +103 -0
- dsgrid/dsgrid_rc.py +88 -0
- dsgrid/exceptions.py +105 -0
- dsgrid/filesystem/__init__.py +0 -0
- dsgrid/filesystem/cloud_filesystem.py +32 -0
- dsgrid/filesystem/factory.py +32 -0
- dsgrid/filesystem/filesystem_interface.py +136 -0
- dsgrid/filesystem/local_filesystem.py +74 -0
- dsgrid/filesystem/s3_filesystem.py +118 -0
- dsgrid/loggers.py +132 -0
- dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
- dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
- dsgrid/notebooks/registration.ipynb +48 -0
- dsgrid/notebooks/start_notebook.sh +11 -0
- dsgrid/project.py +451 -0
- dsgrid/query/__init__.py +0 -0
- dsgrid/query/dataset_mapping_plan.py +142 -0
- dsgrid/query/derived_dataset.py +388 -0
- dsgrid/query/models.py +728 -0
- dsgrid/query/query_context.py +287 -0
- dsgrid/query/query_submitter.py +994 -0
- dsgrid/query/report_factory.py +19 -0
- dsgrid/query/report_peak_load.py +70 -0
- dsgrid/query/reports_base.py +20 -0
- dsgrid/registry/__init__.py +0 -0
- dsgrid/registry/bulk_register.py +165 -0
- dsgrid/registry/common.py +287 -0
- dsgrid/registry/config_update_checker_base.py +63 -0
- dsgrid/registry/data_store_factory.py +34 -0
- dsgrid/registry/data_store_interface.py +74 -0
- dsgrid/registry/dataset_config_generator.py +158 -0
- dsgrid/registry/dataset_registry_manager.py +950 -0
- dsgrid/registry/dataset_update_checker.py +16 -0
- dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
- dsgrid/registry/dimension_mapping_update_checker.py +16 -0
- dsgrid/registry/dimension_registry_manager.py +413 -0
- dsgrid/registry/dimension_update_checker.py +16 -0
- dsgrid/registry/duckdb_data_store.py +207 -0
- dsgrid/registry/filesystem_data_store.py +150 -0
- dsgrid/registry/filter_registry_manager.py +123 -0
- dsgrid/registry/project_config_generator.py +57 -0
- dsgrid/registry/project_registry_manager.py +1623 -0
- dsgrid/registry/project_update_checker.py +48 -0
- dsgrid/registry/registration_context.py +223 -0
- dsgrid/registry/registry_auto_updater.py +316 -0
- dsgrid/registry/registry_database.py +667 -0
- dsgrid/registry/registry_interface.py +446 -0
- dsgrid/registry/registry_manager.py +558 -0
- dsgrid/registry/registry_manager_base.py +367 -0
- dsgrid/registry/versioning.py +92 -0
- dsgrid/rust_ext/__init__.py +14 -0
- dsgrid/rust_ext/find_minimal_patterns.py +129 -0
- dsgrid/spark/__init__.py +0 -0
- dsgrid/spark/functions.py +589 -0
- dsgrid/spark/types.py +110 -0
- dsgrid/tests/__init__.py +0 -0
- dsgrid/tests/common.py +140 -0
- dsgrid/tests/make_us_data_registry.py +265 -0
- dsgrid/tests/register_derived_datasets.py +103 -0
- dsgrid/tests/utils.py +25 -0
- dsgrid/time/__init__.py +0 -0
- dsgrid/time/time_conversions.py +80 -0
- dsgrid/time/types.py +67 -0
- dsgrid/units/__init__.py +0 -0
- dsgrid/units/constants.py +113 -0
- dsgrid/units/convert.py +71 -0
- dsgrid/units/energy.py +145 -0
- dsgrid/units/power.py +87 -0
- dsgrid/utils/__init__.py +0 -0
- dsgrid/utils/dataset.py +830 -0
- dsgrid/utils/files.py +179 -0
- dsgrid/utils/filters.py +125 -0
- dsgrid/utils/id_remappings.py +100 -0
- dsgrid/utils/py_expression_eval/LICENSE +19 -0
- dsgrid/utils/py_expression_eval/README.md +8 -0
- dsgrid/utils/py_expression_eval/__init__.py +847 -0
- dsgrid/utils/py_expression_eval/tests.py +283 -0
- dsgrid/utils/run_command.py +70 -0
- dsgrid/utils/scratch_dir_context.py +65 -0
- dsgrid/utils/spark.py +918 -0
- dsgrid/utils/spark_partition.py +98 -0
- dsgrid/utils/timing.py +239 -0
- dsgrid/utils/utilities.py +221 -0
- dsgrid/utils/versioning.py +36 -0
- dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
- dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
- dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
- dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
- dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
KILO = 1_000
|
|
2
|
+
MEGA = 1_000_000
|
|
3
|
+
GIGA = 1_000_000_000
|
|
4
|
+
TERA = 1_000_000_000_000
|
|
5
|
+
|
|
6
|
+
KILO_TO_MEGA = KILO / MEGA
|
|
7
|
+
KILO_TO_GIGA = KILO / GIGA
|
|
8
|
+
KILO_TO_TERA = KILO / TERA
|
|
9
|
+
|
|
10
|
+
MEGA_TO_KILO = MEGA / KILO
|
|
11
|
+
MEGA_TO_GIGA = MEGA / GIGA
|
|
12
|
+
MEGA_TO_TERA = MEGA / TERA
|
|
13
|
+
|
|
14
|
+
GIGA_TO_KILO = GIGA / KILO
|
|
15
|
+
GIGA_TO_MEGA = GIGA / MEGA
|
|
16
|
+
GIGA_TO_TERA = GIGA / TERA
|
|
17
|
+
|
|
18
|
+
TERA_TO_KILO = TERA / KILO
|
|
19
|
+
TERA_TO_MEGA = TERA / MEGA
|
|
20
|
+
TERA_TO_GIGA = TERA / GIGA
|
|
21
|
+
|
|
22
|
+
KWH = "kWh"
|
|
23
|
+
MWH = "MWh"
|
|
24
|
+
GWH = "GWh"
|
|
25
|
+
TWH = "TWh"
|
|
26
|
+
THERM = "therm"
|
|
27
|
+
MBTU = "MBtu"
|
|
28
|
+
|
|
29
|
+
KW = "kW"
|
|
30
|
+
MW = "MW"
|
|
31
|
+
GW = "GW"
|
|
32
|
+
TW = "TW"
|
|
33
|
+
|
|
34
|
+
THERM_TO_KWH = 29.307107017222222
|
|
35
|
+
THERM_TO_MWH = THERM_TO_KWH * KILO_TO_MEGA
|
|
36
|
+
THERM_TO_GWH = THERM_TO_KWH * KILO_TO_GIGA
|
|
37
|
+
THERM_TO_TWH = THERM_TO_KWH * KILO_TO_TERA
|
|
38
|
+
|
|
39
|
+
KWH_TO_THERM = 1 / THERM_TO_KWH
|
|
40
|
+
MWH_TO_THERM = 1 / THERM_TO_MWH
|
|
41
|
+
GWH_TO_THERM = 1 / THERM_TO_GWH
|
|
42
|
+
TWH_TO_THERM = 1 / THERM_TO_TWH
|
|
43
|
+
|
|
44
|
+
# BTU conversion is based on EIA. This website says 1 kWh = 3,412 BTU.
|
|
45
|
+
# https://www.eia.gov/energyexplained/units-and-calculators/energy-conversion-calculators.php
|
|
46
|
+
# The more precise number below comes from ResStock at
|
|
47
|
+
# https://github.com/NREL/resstock/blob/2e0a82a7bfad0f17ff75a3c66c91a5d72265a847/resources/hpxml-measures/HPXMLtoOpenStudio/resources/unit_conversions.rb
|
|
48
|
+
MBTU_TO_KWH = 293.0710701722222
|
|
49
|
+
MBTU_TO_MWH = MBTU_TO_KWH * KILO_TO_MEGA
|
|
50
|
+
MBTU_TO_GWH = MBTU_TO_KWH * KILO_TO_GIGA
|
|
51
|
+
MBTU_TO_TWH = MBTU_TO_KWH * KILO_TO_TERA
|
|
52
|
+
|
|
53
|
+
KWH_TO_MBTU = 1 / MBTU_TO_KWH
|
|
54
|
+
MWH_TO_MBTU = 1 / MBTU_TO_MWH
|
|
55
|
+
GWH_TO_MBTU = 1 / MBTU_TO_GWH
|
|
56
|
+
TWH_TO_MBTU = 1 / MBTU_TO_TWH
|
|
57
|
+
|
|
58
|
+
MBTU_TO_THERM = 10.0
|
|
59
|
+
THERM_TO_MBTU = 1 / MBTU_TO_THERM
|
|
60
|
+
|
|
61
|
+
ENERGY_UNITS = (KWH, MWH, GWH, TWH, THERM, MBTU)
|
|
62
|
+
POWER_UNITS = (KW, MW, GW, TW)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Constants for unit conversions
|
|
66
|
+
__all__ = (
|
|
67
|
+
"ENERGY_UNITS",
|
|
68
|
+
"POWER_UNITS",
|
|
69
|
+
"KILO",
|
|
70
|
+
"MEGA",
|
|
71
|
+
"GIGA",
|
|
72
|
+
"TERA",
|
|
73
|
+
"KILO_TO_MEGA",
|
|
74
|
+
"KILO_TO_GIGA",
|
|
75
|
+
"KILO_TO_TERA",
|
|
76
|
+
"MEGA_TO_KILO",
|
|
77
|
+
"MEGA_TO_GIGA",
|
|
78
|
+
"MEGA_TO_TERA",
|
|
79
|
+
"GIGA_TO_KILO",
|
|
80
|
+
"GIGA_TO_MEGA",
|
|
81
|
+
"GIGA_TO_TERA",
|
|
82
|
+
"TERA_TO_KILO",
|
|
83
|
+
"TERA_TO_MEGA",
|
|
84
|
+
"TERA_TO_GIGA",
|
|
85
|
+
"KWH",
|
|
86
|
+
"MWH",
|
|
87
|
+
"GWH",
|
|
88
|
+
"TWH",
|
|
89
|
+
"THERM",
|
|
90
|
+
"MBTU",
|
|
91
|
+
"KW",
|
|
92
|
+
"MW",
|
|
93
|
+
"GW",
|
|
94
|
+
"TW",
|
|
95
|
+
"THERM_TO_KWH",
|
|
96
|
+
"THERM_TO_MWH",
|
|
97
|
+
"THERM_TO_GWH",
|
|
98
|
+
"THERM_TO_TWH",
|
|
99
|
+
"KWH_TO_THERM",
|
|
100
|
+
"MWH_TO_THERM",
|
|
101
|
+
"GWH_TO_THERM",
|
|
102
|
+
"TWH_TO_THERM",
|
|
103
|
+
"MBTU_TO_KWH",
|
|
104
|
+
"MBTU_TO_MWH",
|
|
105
|
+
"MBTU_TO_GWH",
|
|
106
|
+
"MBTU_TO_TWH",
|
|
107
|
+
"KWH_TO_MBTU",
|
|
108
|
+
"MWH_TO_MBTU",
|
|
109
|
+
"GWH_TO_MBTU",
|
|
110
|
+
"TWH_TO_MBTU",
|
|
111
|
+
"MBTU_TO_THERM",
|
|
112
|
+
"THERM_TO_MBTU",
|
|
113
|
+
)
|
dsgrid/units/convert.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import dsgrid.units.energy as energy
|
|
4
|
+
import dsgrid.units.power as power
|
|
5
|
+
from dsgrid.common import VALUE_COLUMN
|
|
6
|
+
from dsgrid.spark.functions import except_all, is_dataframe_empty, join
|
|
7
|
+
from dsgrid.spark.types import DataFrame, F
|
|
8
|
+
from dsgrid.units.constants import ENERGY_UNITS, POWER_UNITS
|
|
9
|
+
from dsgrid.utils.spark import get_unique_values
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def convert_units_unpivoted(
|
|
16
|
+
df: DataFrame,
|
|
17
|
+
metric_column: str,
|
|
18
|
+
from_records: DataFrame,
|
|
19
|
+
from_to_records: DataFrame | None,
|
|
20
|
+
to_unit_records: DataFrame,
|
|
21
|
+
) -> DataFrame:
|
|
22
|
+
"""Convert the value column of the dataframe to the target units.
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
df : DataFrame
|
|
27
|
+
Load data table
|
|
28
|
+
metric_column : str
|
|
29
|
+
Column in dataframe with metric record IDs
|
|
30
|
+
from_records : DataFrame
|
|
31
|
+
Metric dimension records for the columns being converted
|
|
32
|
+
from_to_records : DataFrame | None
|
|
33
|
+
Records that map the dimension IDs in columns to the target IDs
|
|
34
|
+
If None, mapping is not required and from_records contain the units.
|
|
35
|
+
to_unit_records : DataFrame
|
|
36
|
+
Metric dimension records for the target IDs
|
|
37
|
+
"""
|
|
38
|
+
unit_col = "unit" # must match EnergyEndUse.unit
|
|
39
|
+
tmp1 = from_records.select("id", unit_col).withColumnRenamed(unit_col, "from_unit")
|
|
40
|
+
if from_to_records is None:
|
|
41
|
+
unit_df = tmp1.select("id", "from_unit")
|
|
42
|
+
else:
|
|
43
|
+
tmp2 = from_to_records.select("from_id", "to_id")
|
|
44
|
+
unit_df = (
|
|
45
|
+
join(tmp1, tmp2, "id", "from_id")
|
|
46
|
+
.select(F.col("to_id").alias("id"), "from_unit")
|
|
47
|
+
.distinct()
|
|
48
|
+
)
|
|
49
|
+
if is_dataframe_empty(
|
|
50
|
+
except_all(unit_df, to_unit_records.select("id", F.col("unit").alias("from_unit")))
|
|
51
|
+
):
|
|
52
|
+
logger.debug("Return early because the units match.")
|
|
53
|
+
return df
|
|
54
|
+
|
|
55
|
+
df = join(df, unit_df, metric_column, "id").drop("id")
|
|
56
|
+
tmp3 = to_unit_records.select("id", "unit").withColumnRenamed(unit_col, "to_unit")
|
|
57
|
+
df = join(df, tmp3, metric_column, "id").drop("id")
|
|
58
|
+
logger.debug("Converting units from column %s", metric_column)
|
|
59
|
+
|
|
60
|
+
units = get_unique_values(to_unit_records, unit_col)
|
|
61
|
+
if units.issubset(ENERGY_UNITS):
|
|
62
|
+
func = energy.from_any_to_any
|
|
63
|
+
elif units.issubset(POWER_UNITS):
|
|
64
|
+
func = power.from_any_to_any
|
|
65
|
+
else:
|
|
66
|
+
msg = f"Unsupported unit conversion: {units}"
|
|
67
|
+
raise ValueError(msg)
|
|
68
|
+
|
|
69
|
+
return df.withColumn(VALUE_COLUMN, func("from_unit", "to_unit", VALUE_COLUMN)).drop(
|
|
70
|
+
"from_unit", "to_unit"
|
|
71
|
+
)
|
dsgrid/units/energy.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Contains functions to perform unit conversion of energy."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from dsgrid.spark.types import DataFrame, F
|
|
6
|
+
from dsgrid.units.constants import (
|
|
7
|
+
GIGA_TO_KILO,
|
|
8
|
+
GIGA_TO_MEGA,
|
|
9
|
+
GIGA_TO_TERA,
|
|
10
|
+
GWH,
|
|
11
|
+
GWH_TO_MBTU,
|
|
12
|
+
GWH_TO_THERM,
|
|
13
|
+
KILO_TO_GIGA,
|
|
14
|
+
KILO_TO_MEGA,
|
|
15
|
+
KILO_TO_TERA,
|
|
16
|
+
KWH,
|
|
17
|
+
KWH_TO_MBTU,
|
|
18
|
+
KWH_TO_THERM,
|
|
19
|
+
MBTU,
|
|
20
|
+
MBTU_TO_GWH,
|
|
21
|
+
MBTU_TO_KWH,
|
|
22
|
+
MBTU_TO_MWH,
|
|
23
|
+
MBTU_TO_THERM,
|
|
24
|
+
MBTU_TO_TWH,
|
|
25
|
+
MEGA_TO_GIGA,
|
|
26
|
+
MEGA_TO_KILO,
|
|
27
|
+
MEGA_TO_TERA,
|
|
28
|
+
MWH,
|
|
29
|
+
MWH_TO_MBTU,
|
|
30
|
+
MWH_TO_THERM,
|
|
31
|
+
TERA_TO_GIGA,
|
|
32
|
+
TERA_TO_KILO,
|
|
33
|
+
TERA_TO_MEGA,
|
|
34
|
+
THERM,
|
|
35
|
+
THERM_TO_GWH,
|
|
36
|
+
THERM_TO_KWH,
|
|
37
|
+
THERM_TO_MBTU,
|
|
38
|
+
THERM_TO_MWH,
|
|
39
|
+
THERM_TO_TWH,
|
|
40
|
+
TWH,
|
|
41
|
+
TWH_TO_MBTU,
|
|
42
|
+
TWH_TO_THERM,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
logger = logging.getLogger(__name__)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def to_kwh(unit_col: str, value_col: str) -> DataFrame:
|
|
50
|
+
"""Convert a column to kWh."""
|
|
51
|
+
return (
|
|
52
|
+
F.when(F.col(unit_col) == KWH, F.col(value_col))
|
|
53
|
+
.when(F.col(unit_col) == MWH, (F.col(value_col) * MEGA_TO_KILO))
|
|
54
|
+
.when(F.col(unit_col) == GWH, (F.col(value_col) * GIGA_TO_KILO))
|
|
55
|
+
.when(F.col(unit_col) == TWH, (F.col(value_col) * TERA_TO_KILO))
|
|
56
|
+
.when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_KWH))
|
|
57
|
+
.when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_KWH))
|
|
58
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
59
|
+
.otherwise(None)
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def to_mwh(unit_col: str, value_col: str) -> DataFrame:
|
|
64
|
+
"""Convert a column to mWh."""
|
|
65
|
+
return (
|
|
66
|
+
F.when(F.col(unit_col) == KWH, (F.col(value_col) * KILO_TO_MEGA))
|
|
67
|
+
.when(F.col(unit_col) == MWH, F.col(value_col))
|
|
68
|
+
.when(F.col(unit_col) == GWH, (F.col(value_col) * GIGA_TO_MEGA))
|
|
69
|
+
.when(F.col(unit_col) == TWH, (F.col(value_col) * TERA_TO_MEGA))
|
|
70
|
+
.when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_MWH))
|
|
71
|
+
.when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_MWH))
|
|
72
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
73
|
+
.otherwise(None)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def to_gwh(unit_col: str, value_col: str) -> DataFrame:
|
|
78
|
+
"""Convert a column to gWh."""
|
|
79
|
+
return (
|
|
80
|
+
F.when(F.col(unit_col) == KWH, (F.col(value_col) * KILO_TO_GIGA))
|
|
81
|
+
.when(F.col(unit_col) == MWH, (F.col(value_col) * MEGA_TO_GIGA))
|
|
82
|
+
.when(F.col(unit_col) == GWH, F.col(value_col))
|
|
83
|
+
.when(F.col(unit_col) == TWH, (F.col(value_col) * TERA_TO_GIGA))
|
|
84
|
+
.when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_GWH))
|
|
85
|
+
.when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_GWH))
|
|
86
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
87
|
+
.otherwise(None)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def to_twh(unit_col: str, value_col: str) -> DataFrame:
|
|
92
|
+
"""Convert a column to tWh."""
|
|
93
|
+
return (
|
|
94
|
+
F.when(F.col(unit_col) == KWH, (F.col(value_col) * KILO_TO_TERA))
|
|
95
|
+
.when(F.col(unit_col) == MWH, (F.col(value_col) * MEGA_TO_TERA))
|
|
96
|
+
.when(F.col(unit_col) == GWH, (F.col(value_col) * GIGA_TO_TERA))
|
|
97
|
+
.when(F.col(unit_col) == TWH, F.col(value_col))
|
|
98
|
+
.when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_TWH))
|
|
99
|
+
.when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_TWH))
|
|
100
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
101
|
+
.otherwise(None)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def to_therm(unit_col: str, value_col: str) -> DataFrame:
|
|
106
|
+
"""Convert a column to therm."""
|
|
107
|
+
return (
|
|
108
|
+
F.when(F.col(unit_col) == KWH, (F.col(value_col) * KWH_TO_THERM))
|
|
109
|
+
.when(F.col(unit_col) == MWH, (F.col(value_col) * MWH_TO_THERM))
|
|
110
|
+
.when(F.col(unit_col) == GWH, (F.col(value_col) * GWH_TO_THERM))
|
|
111
|
+
.when(F.col(unit_col) == TWH, (F.col(value_col) * TWH_TO_THERM))
|
|
112
|
+
.when(F.col(unit_col) == THERM, F.col(value_col))
|
|
113
|
+
.when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_THERM))
|
|
114
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
115
|
+
.otherwise(None)
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def to_mbtu(unit_col: str, value_col: str) -> DataFrame:
|
|
120
|
+
"""Convert a column to MBtu."""
|
|
121
|
+
return (
|
|
122
|
+
F.when(F.col(unit_col) == KWH, (F.col(value_col) * KWH_TO_MBTU))
|
|
123
|
+
.when(F.col(unit_col) == MWH, (F.col(value_col) * MWH_TO_MBTU))
|
|
124
|
+
.when(F.col(unit_col) == GWH, (F.col(value_col) * GWH_TO_MBTU))
|
|
125
|
+
.when(F.col(unit_col) == TWH, (F.col(value_col) * TWH_TO_MBTU))
|
|
126
|
+
.when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_MBTU))
|
|
127
|
+
.when(F.col(unit_col) == MBTU, F.col(value_col))
|
|
128
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
129
|
+
.otherwise(None)
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def from_any_to_any(from_unit_col: str, to_unit_col: str, value_col: str) -> DataFrame:
|
|
134
|
+
"""Convert a column of energy based on from/to columns."""
|
|
135
|
+
return (
|
|
136
|
+
F.when(F.col(from_unit_col) == F.col(to_unit_col), F.col(value_col))
|
|
137
|
+
.when(F.col(from_unit_col) == "", F.col(value_col))
|
|
138
|
+
.when(F.col(to_unit_col) == KWH, to_kwh(from_unit_col, value_col))
|
|
139
|
+
.when(F.col(to_unit_col) == MWH, to_mwh(from_unit_col, value_col))
|
|
140
|
+
.when(F.col(to_unit_col) == GWH, to_gwh(from_unit_col, value_col))
|
|
141
|
+
.when(F.col(to_unit_col) == TWH, to_twh(from_unit_col, value_col))
|
|
142
|
+
.when(F.col(to_unit_col) == THERM, to_therm(from_unit_col, value_col))
|
|
143
|
+
.when(F.col(to_unit_col) == MBTU, to_mbtu(from_unit_col, value_col))
|
|
144
|
+
.otherwise(None)
|
|
145
|
+
)
|
dsgrid/units/power.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Contains functions to perform unit conversion of power."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from dsgrid.spark.types import DataFrame, F
|
|
6
|
+
from dsgrid.units.constants import (
|
|
7
|
+
GIGA_TO_KILO,
|
|
8
|
+
GIGA_TO_MEGA,
|
|
9
|
+
GIGA_TO_TERA,
|
|
10
|
+
GW,
|
|
11
|
+
KILO_TO_GIGA,
|
|
12
|
+
KILO_TO_MEGA,
|
|
13
|
+
KILO_TO_TERA,
|
|
14
|
+
KW,
|
|
15
|
+
MEGA_TO_GIGA,
|
|
16
|
+
MEGA_TO_KILO,
|
|
17
|
+
MEGA_TO_TERA,
|
|
18
|
+
MW,
|
|
19
|
+
TERA_TO_GIGA,
|
|
20
|
+
TERA_TO_KILO,
|
|
21
|
+
TERA_TO_MEGA,
|
|
22
|
+
TW,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def to_kw(unit_col: str, value_col: str) -> DataFrame:
|
|
30
|
+
"""Convert a column to kW."""
|
|
31
|
+
return (
|
|
32
|
+
F.when(F.col(unit_col) == KW, F.col(value_col))
|
|
33
|
+
.when(F.col(unit_col) == MW, (F.col(value_col) * MEGA_TO_KILO))
|
|
34
|
+
.when(F.col(unit_col) == GW, (F.col(value_col) * GIGA_TO_KILO))
|
|
35
|
+
.when(F.col(unit_col) == TW, (F.col(value_col) * TERA_TO_KILO))
|
|
36
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
37
|
+
.otherwise(None)
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_mw(unit_col: str, value_col: str) -> DataFrame:
|
|
42
|
+
"""Convert a column to MW."""
|
|
43
|
+
return (
|
|
44
|
+
F.when(F.col(unit_col) == KW, (F.col(value_col) * KILO_TO_MEGA))
|
|
45
|
+
.when(F.col(unit_col) == MW, F.col(value_col))
|
|
46
|
+
.when(F.col(unit_col) == GW, (F.col(value_col) * GIGA_TO_MEGA))
|
|
47
|
+
.when(F.col(unit_col) == TW, (F.col(value_col) * TERA_TO_MEGA))
|
|
48
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
49
|
+
.otherwise(None)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def to_gw(unit_col: str, value_col: str) -> DataFrame:
|
|
54
|
+
"""Convert a column to GW."""
|
|
55
|
+
return (
|
|
56
|
+
F.when(F.col(unit_col) == KW, (F.col(value_col) * KILO_TO_GIGA))
|
|
57
|
+
.when(F.col(unit_col) == MW, (F.col(value_col) * MEGA_TO_GIGA))
|
|
58
|
+
.when(F.col(unit_col) == GW, F.col(value_col))
|
|
59
|
+
.when(F.col(unit_col) == TW, (F.col(value_col) * TERA_TO_GIGA))
|
|
60
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
61
|
+
.otherwise(None)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def to_tw(unit_col: str, value_col: str) -> DataFrame:
|
|
66
|
+
"""Convert a column to TW."""
|
|
67
|
+
return (
|
|
68
|
+
F.when(F.col(unit_col) == KW, (F.col(value_col) * KILO_TO_TERA))
|
|
69
|
+
.when(F.col(unit_col) == MW, (F.col(value_col) * MEGA_TO_TERA))
|
|
70
|
+
.when(F.col(unit_col) == GW, (F.col(value_col) * GIGA_TO_TERA))
|
|
71
|
+
.when(F.col(unit_col) == TW, F.col(value_col))
|
|
72
|
+
.when(F.col(unit_col) == "", F.col(value_col))
|
|
73
|
+
.otherwise(None)
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def from_any_to_any(from_unit_col: str, to_unit_col: str, value_col: str) -> DataFrame:
|
|
78
|
+
"""Convert a column of power based on from/to columns."""
|
|
79
|
+
return (
|
|
80
|
+
F.when(F.col(from_unit_col) == F.col(to_unit_col), F.col(value_col))
|
|
81
|
+
.when(F.col(from_unit_col) == "", F.col(value_col))
|
|
82
|
+
.when(F.col(to_unit_col) == KW, to_kw(from_unit_col, value_col))
|
|
83
|
+
.when(F.col(to_unit_col) == MW, to_mw(from_unit_col, value_col))
|
|
84
|
+
.when(F.col(to_unit_col) == GW, to_gw(from_unit_col, value_col))
|
|
85
|
+
.when(F.col(to_unit_col) == TW, to_tw(from_unit_col, value_col))
|
|
86
|
+
.otherwise(None)
|
|
87
|
+
)
|
dsgrid/utils/__init__.py
ADDED
|
File without changes
|