dsgrid-toolkit 0.3.3__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. build_backend.py +93 -0
  2. dsgrid/__init__.py +22 -0
  3. dsgrid/api/__init__.py +0 -0
  4. dsgrid/api/api_manager.py +179 -0
  5. dsgrid/api/app.py +419 -0
  6. dsgrid/api/models.py +60 -0
  7. dsgrid/api/response_models.py +116 -0
  8. dsgrid/apps/__init__.py +0 -0
  9. dsgrid/apps/project_viewer/app.py +216 -0
  10. dsgrid/apps/registration_gui.py +444 -0
  11. dsgrid/chronify.py +32 -0
  12. dsgrid/cli/__init__.py +0 -0
  13. dsgrid/cli/common.py +120 -0
  14. dsgrid/cli/config.py +176 -0
  15. dsgrid/cli/download.py +13 -0
  16. dsgrid/cli/dsgrid.py +157 -0
  17. dsgrid/cli/dsgrid_admin.py +92 -0
  18. dsgrid/cli/install_notebooks.py +62 -0
  19. dsgrid/cli/query.py +729 -0
  20. dsgrid/cli/registry.py +1862 -0
  21. dsgrid/cloud/__init__.py +0 -0
  22. dsgrid/cloud/cloud_storage_interface.py +140 -0
  23. dsgrid/cloud/factory.py +31 -0
  24. dsgrid/cloud/fake_storage_interface.py +37 -0
  25. dsgrid/cloud/s3_storage_interface.py +156 -0
  26. dsgrid/common.py +36 -0
  27. dsgrid/config/__init__.py +0 -0
  28. dsgrid/config/annual_time_dimension_config.py +194 -0
  29. dsgrid/config/common.py +142 -0
  30. dsgrid/config/config_base.py +148 -0
  31. dsgrid/config/dataset_config.py +907 -0
  32. dsgrid/config/dataset_schema_handler_factory.py +46 -0
  33. dsgrid/config/date_time_dimension_config.py +136 -0
  34. dsgrid/config/dimension_config.py +54 -0
  35. dsgrid/config/dimension_config_factory.py +65 -0
  36. dsgrid/config/dimension_mapping_base.py +350 -0
  37. dsgrid/config/dimension_mappings_config.py +48 -0
  38. dsgrid/config/dimensions.py +1025 -0
  39. dsgrid/config/dimensions_config.py +71 -0
  40. dsgrid/config/file_schema.py +190 -0
  41. dsgrid/config/index_time_dimension_config.py +80 -0
  42. dsgrid/config/input_dataset_requirements.py +31 -0
  43. dsgrid/config/mapping_tables.py +209 -0
  44. dsgrid/config/noop_time_dimension_config.py +42 -0
  45. dsgrid/config/project_config.py +1462 -0
  46. dsgrid/config/registration_models.py +188 -0
  47. dsgrid/config/representative_period_time_dimension_config.py +194 -0
  48. dsgrid/config/simple_models.py +49 -0
  49. dsgrid/config/supplemental_dimension.py +29 -0
  50. dsgrid/config/time_dimension_base_config.py +192 -0
  51. dsgrid/data_models.py +155 -0
  52. dsgrid/dataset/__init__.py +0 -0
  53. dsgrid/dataset/dataset.py +123 -0
  54. dsgrid/dataset/dataset_expression_handler.py +86 -0
  55. dsgrid/dataset/dataset_mapping_manager.py +121 -0
  56. dsgrid/dataset/dataset_schema_handler_base.py +945 -0
  57. dsgrid/dataset/dataset_schema_handler_one_table.py +209 -0
  58. dsgrid/dataset/dataset_schema_handler_two_table.py +322 -0
  59. dsgrid/dataset/growth_rates.py +162 -0
  60. dsgrid/dataset/models.py +51 -0
  61. dsgrid/dataset/table_format_handler_base.py +257 -0
  62. dsgrid/dataset/table_format_handler_factory.py +17 -0
  63. dsgrid/dataset/unpivoted_table.py +121 -0
  64. dsgrid/dimension/__init__.py +0 -0
  65. dsgrid/dimension/base_models.py +230 -0
  66. dsgrid/dimension/dimension_filters.py +308 -0
  67. dsgrid/dimension/standard.py +252 -0
  68. dsgrid/dimension/time.py +352 -0
  69. dsgrid/dimension/time_utils.py +103 -0
  70. dsgrid/dsgrid_rc.py +88 -0
  71. dsgrid/exceptions.py +105 -0
  72. dsgrid/filesystem/__init__.py +0 -0
  73. dsgrid/filesystem/cloud_filesystem.py +32 -0
  74. dsgrid/filesystem/factory.py +32 -0
  75. dsgrid/filesystem/filesystem_interface.py +136 -0
  76. dsgrid/filesystem/local_filesystem.py +74 -0
  77. dsgrid/filesystem/s3_filesystem.py +118 -0
  78. dsgrid/loggers.py +132 -0
  79. dsgrid/minimal_patterns.cp313-win_amd64.pyd +0 -0
  80. dsgrid/notebooks/connect_to_dsgrid_registry.ipynb +949 -0
  81. dsgrid/notebooks/registration.ipynb +48 -0
  82. dsgrid/notebooks/start_notebook.sh +11 -0
  83. dsgrid/project.py +451 -0
  84. dsgrid/query/__init__.py +0 -0
  85. dsgrid/query/dataset_mapping_plan.py +142 -0
  86. dsgrid/query/derived_dataset.py +388 -0
  87. dsgrid/query/models.py +728 -0
  88. dsgrid/query/query_context.py +287 -0
  89. dsgrid/query/query_submitter.py +994 -0
  90. dsgrid/query/report_factory.py +19 -0
  91. dsgrid/query/report_peak_load.py +70 -0
  92. dsgrid/query/reports_base.py +20 -0
  93. dsgrid/registry/__init__.py +0 -0
  94. dsgrid/registry/bulk_register.py +165 -0
  95. dsgrid/registry/common.py +287 -0
  96. dsgrid/registry/config_update_checker_base.py +63 -0
  97. dsgrid/registry/data_store_factory.py +34 -0
  98. dsgrid/registry/data_store_interface.py +74 -0
  99. dsgrid/registry/dataset_config_generator.py +158 -0
  100. dsgrid/registry/dataset_registry_manager.py +950 -0
  101. dsgrid/registry/dataset_update_checker.py +16 -0
  102. dsgrid/registry/dimension_mapping_registry_manager.py +575 -0
  103. dsgrid/registry/dimension_mapping_update_checker.py +16 -0
  104. dsgrid/registry/dimension_registry_manager.py +413 -0
  105. dsgrid/registry/dimension_update_checker.py +16 -0
  106. dsgrid/registry/duckdb_data_store.py +207 -0
  107. dsgrid/registry/filesystem_data_store.py +150 -0
  108. dsgrid/registry/filter_registry_manager.py +123 -0
  109. dsgrid/registry/project_config_generator.py +57 -0
  110. dsgrid/registry/project_registry_manager.py +1623 -0
  111. dsgrid/registry/project_update_checker.py +48 -0
  112. dsgrid/registry/registration_context.py +223 -0
  113. dsgrid/registry/registry_auto_updater.py +316 -0
  114. dsgrid/registry/registry_database.py +667 -0
  115. dsgrid/registry/registry_interface.py +446 -0
  116. dsgrid/registry/registry_manager.py +558 -0
  117. dsgrid/registry/registry_manager_base.py +367 -0
  118. dsgrid/registry/versioning.py +92 -0
  119. dsgrid/rust_ext/__init__.py +14 -0
  120. dsgrid/rust_ext/find_minimal_patterns.py +129 -0
  121. dsgrid/spark/__init__.py +0 -0
  122. dsgrid/spark/functions.py +589 -0
  123. dsgrid/spark/types.py +110 -0
  124. dsgrid/tests/__init__.py +0 -0
  125. dsgrid/tests/common.py +140 -0
  126. dsgrid/tests/make_us_data_registry.py +265 -0
  127. dsgrid/tests/register_derived_datasets.py +103 -0
  128. dsgrid/tests/utils.py +25 -0
  129. dsgrid/time/__init__.py +0 -0
  130. dsgrid/time/time_conversions.py +80 -0
  131. dsgrid/time/types.py +67 -0
  132. dsgrid/units/__init__.py +0 -0
  133. dsgrid/units/constants.py +113 -0
  134. dsgrid/units/convert.py +71 -0
  135. dsgrid/units/energy.py +145 -0
  136. dsgrid/units/power.py +87 -0
  137. dsgrid/utils/__init__.py +0 -0
  138. dsgrid/utils/dataset.py +830 -0
  139. dsgrid/utils/files.py +179 -0
  140. dsgrid/utils/filters.py +125 -0
  141. dsgrid/utils/id_remappings.py +100 -0
  142. dsgrid/utils/py_expression_eval/LICENSE +19 -0
  143. dsgrid/utils/py_expression_eval/README.md +8 -0
  144. dsgrid/utils/py_expression_eval/__init__.py +847 -0
  145. dsgrid/utils/py_expression_eval/tests.py +283 -0
  146. dsgrid/utils/run_command.py +70 -0
  147. dsgrid/utils/scratch_dir_context.py +65 -0
  148. dsgrid/utils/spark.py +918 -0
  149. dsgrid/utils/spark_partition.py +98 -0
  150. dsgrid/utils/timing.py +239 -0
  151. dsgrid/utils/utilities.py +221 -0
  152. dsgrid/utils/versioning.py +36 -0
  153. dsgrid_toolkit-0.3.3.dist-info/METADATA +193 -0
  154. dsgrid_toolkit-0.3.3.dist-info/RECORD +157 -0
  155. dsgrid_toolkit-0.3.3.dist-info/WHEEL +4 -0
  156. dsgrid_toolkit-0.3.3.dist-info/entry_points.txt +4 -0
  157. dsgrid_toolkit-0.3.3.dist-info/licenses/LICENSE +29 -0
@@ -0,0 +1,113 @@
1
+ KILO = 1_000
2
+ MEGA = 1_000_000
3
+ GIGA = 1_000_000_000
4
+ TERA = 1_000_000_000_000
5
+
6
+ KILO_TO_MEGA = KILO / MEGA
7
+ KILO_TO_GIGA = KILO / GIGA
8
+ KILO_TO_TERA = KILO / TERA
9
+
10
+ MEGA_TO_KILO = MEGA / KILO
11
+ MEGA_TO_GIGA = MEGA / GIGA
12
+ MEGA_TO_TERA = MEGA / TERA
13
+
14
+ GIGA_TO_KILO = GIGA / KILO
15
+ GIGA_TO_MEGA = GIGA / MEGA
16
+ GIGA_TO_TERA = GIGA / TERA
17
+
18
+ TERA_TO_KILO = TERA / KILO
19
+ TERA_TO_MEGA = TERA / MEGA
20
+ TERA_TO_GIGA = TERA / GIGA
21
+
22
+ KWH = "kWh"
23
+ MWH = "MWh"
24
+ GWH = "GWh"
25
+ TWH = "TWh"
26
+ THERM = "therm"
27
+ MBTU = "MBtu"
28
+
29
+ KW = "kW"
30
+ MW = "MW"
31
+ GW = "GW"
32
+ TW = "TW"
33
+
34
+ THERM_TO_KWH = 29.307107017222222
35
+ THERM_TO_MWH = THERM_TO_KWH * KILO_TO_MEGA
36
+ THERM_TO_GWH = THERM_TO_KWH * KILO_TO_GIGA
37
+ THERM_TO_TWH = THERM_TO_KWH * KILO_TO_TERA
38
+
39
+ KWH_TO_THERM = 1 / THERM_TO_KWH
40
+ MWH_TO_THERM = 1 / THERM_TO_MWH
41
+ GWH_TO_THERM = 1 / THERM_TO_GWH
42
+ TWH_TO_THERM = 1 / THERM_TO_TWH
43
+
44
+ # BTU conversion is based on EIA. This website says 1 kWh = 3,412 BTU.
45
+ # https://www.eia.gov/energyexplained/units-and-calculators/energy-conversion-calculators.php
46
+ # The more precise number below comes from ResStock at
47
+ # https://github.com/NREL/resstock/blob/2e0a82a7bfad0f17ff75a3c66c91a5d72265a847/resources/hpxml-measures/HPXMLtoOpenStudio/resources/unit_conversions.rb
48
+ MBTU_TO_KWH = 293.0710701722222
49
+ MBTU_TO_MWH = MBTU_TO_KWH * KILO_TO_MEGA
50
+ MBTU_TO_GWH = MBTU_TO_KWH * KILO_TO_GIGA
51
+ MBTU_TO_TWH = MBTU_TO_KWH * KILO_TO_TERA
52
+
53
+ KWH_TO_MBTU = 1 / MBTU_TO_KWH
54
+ MWH_TO_MBTU = 1 / MBTU_TO_MWH
55
+ GWH_TO_MBTU = 1 / MBTU_TO_GWH
56
+ TWH_TO_MBTU = 1 / MBTU_TO_TWH
57
+
58
+ MBTU_TO_THERM = 10.0
59
+ THERM_TO_MBTU = 1 / MBTU_TO_THERM
60
+
61
+ ENERGY_UNITS = (KWH, MWH, GWH, TWH, THERM, MBTU)
62
+ POWER_UNITS = (KW, MW, GW, TW)
63
+
64
+
65
+ # Constants for unit conversions
66
+ __all__ = (
67
+ "ENERGY_UNITS",
68
+ "POWER_UNITS",
69
+ "KILO",
70
+ "MEGA",
71
+ "GIGA",
72
+ "TERA",
73
+ "KILO_TO_MEGA",
74
+ "KILO_TO_GIGA",
75
+ "KILO_TO_TERA",
76
+ "MEGA_TO_KILO",
77
+ "MEGA_TO_GIGA",
78
+ "MEGA_TO_TERA",
79
+ "GIGA_TO_KILO",
80
+ "GIGA_TO_MEGA",
81
+ "GIGA_TO_TERA",
82
+ "TERA_TO_KILO",
83
+ "TERA_TO_MEGA",
84
+ "TERA_TO_GIGA",
85
+ "KWH",
86
+ "MWH",
87
+ "GWH",
88
+ "TWH",
89
+ "THERM",
90
+ "MBTU",
91
+ "KW",
92
+ "MW",
93
+ "GW",
94
+ "TW",
95
+ "THERM_TO_KWH",
96
+ "THERM_TO_MWH",
97
+ "THERM_TO_GWH",
98
+ "THERM_TO_TWH",
99
+ "KWH_TO_THERM",
100
+ "MWH_TO_THERM",
101
+ "GWH_TO_THERM",
102
+ "TWH_TO_THERM",
103
+ "MBTU_TO_KWH",
104
+ "MBTU_TO_MWH",
105
+ "MBTU_TO_GWH",
106
+ "MBTU_TO_TWH",
107
+ "KWH_TO_MBTU",
108
+ "MWH_TO_MBTU",
109
+ "GWH_TO_MBTU",
110
+ "TWH_TO_MBTU",
111
+ "MBTU_TO_THERM",
112
+ "THERM_TO_MBTU",
113
+ )
@@ -0,0 +1,71 @@
1
+ import logging
2
+
3
+ import dsgrid.units.energy as energy
4
+ import dsgrid.units.power as power
5
+ from dsgrid.common import VALUE_COLUMN
6
+ from dsgrid.spark.functions import except_all, is_dataframe_empty, join
7
+ from dsgrid.spark.types import DataFrame, F
8
+ from dsgrid.units.constants import ENERGY_UNITS, POWER_UNITS
9
+ from dsgrid.utils.spark import get_unique_values
10
+
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def convert_units_unpivoted(
16
+ df: DataFrame,
17
+ metric_column: str,
18
+ from_records: DataFrame,
19
+ from_to_records: DataFrame | None,
20
+ to_unit_records: DataFrame,
21
+ ) -> DataFrame:
22
+ """Convert the value column of the dataframe to the target units.
23
+
24
+ Parameters
25
+ ----------
26
+ df : DataFrame
27
+ Load data table
28
+ metric_column : str
29
+ Column in dataframe with metric record IDs
30
+ from_records : DataFrame
31
+ Metric dimension records for the columns being converted
32
+ from_to_records : DataFrame | None
33
+ Records that map the dimension IDs in columns to the target IDs
34
+ If None, mapping is not required and from_records contain the units.
35
+ to_unit_records : DataFrame
36
+ Metric dimension records for the target IDs
37
+ """
38
+ unit_col = "unit" # must match EnergyEndUse.unit
39
+ tmp1 = from_records.select("id", unit_col).withColumnRenamed(unit_col, "from_unit")
40
+ if from_to_records is None:
41
+ unit_df = tmp1.select("id", "from_unit")
42
+ else:
43
+ tmp2 = from_to_records.select("from_id", "to_id")
44
+ unit_df = (
45
+ join(tmp1, tmp2, "id", "from_id")
46
+ .select(F.col("to_id").alias("id"), "from_unit")
47
+ .distinct()
48
+ )
49
+ if is_dataframe_empty(
50
+ except_all(unit_df, to_unit_records.select("id", F.col("unit").alias("from_unit")))
51
+ ):
52
+ logger.debug("Return early because the units match.")
53
+ return df
54
+
55
+ df = join(df, unit_df, metric_column, "id").drop("id")
56
+ tmp3 = to_unit_records.select("id", "unit").withColumnRenamed(unit_col, "to_unit")
57
+ df = join(df, tmp3, metric_column, "id").drop("id")
58
+ logger.debug("Converting units from column %s", metric_column)
59
+
60
+ units = get_unique_values(to_unit_records, unit_col)
61
+ if units.issubset(ENERGY_UNITS):
62
+ func = energy.from_any_to_any
63
+ elif units.issubset(POWER_UNITS):
64
+ func = power.from_any_to_any
65
+ else:
66
+ msg = f"Unsupported unit conversion: {units}"
67
+ raise ValueError(msg)
68
+
69
+ return df.withColumn(VALUE_COLUMN, func("from_unit", "to_unit", VALUE_COLUMN)).drop(
70
+ "from_unit", "to_unit"
71
+ )
dsgrid/units/energy.py ADDED
@@ -0,0 +1,145 @@
1
+ """Contains functions to perform unit conversion of energy."""
2
+
3
+ import logging
4
+
5
+ from dsgrid.spark.types import DataFrame, F
6
+ from dsgrid.units.constants import (
7
+ GIGA_TO_KILO,
8
+ GIGA_TO_MEGA,
9
+ GIGA_TO_TERA,
10
+ GWH,
11
+ GWH_TO_MBTU,
12
+ GWH_TO_THERM,
13
+ KILO_TO_GIGA,
14
+ KILO_TO_MEGA,
15
+ KILO_TO_TERA,
16
+ KWH,
17
+ KWH_TO_MBTU,
18
+ KWH_TO_THERM,
19
+ MBTU,
20
+ MBTU_TO_GWH,
21
+ MBTU_TO_KWH,
22
+ MBTU_TO_MWH,
23
+ MBTU_TO_THERM,
24
+ MBTU_TO_TWH,
25
+ MEGA_TO_GIGA,
26
+ MEGA_TO_KILO,
27
+ MEGA_TO_TERA,
28
+ MWH,
29
+ MWH_TO_MBTU,
30
+ MWH_TO_THERM,
31
+ TERA_TO_GIGA,
32
+ TERA_TO_KILO,
33
+ TERA_TO_MEGA,
34
+ THERM,
35
+ THERM_TO_GWH,
36
+ THERM_TO_KWH,
37
+ THERM_TO_MBTU,
38
+ THERM_TO_MWH,
39
+ THERM_TO_TWH,
40
+ TWH,
41
+ TWH_TO_MBTU,
42
+ TWH_TO_THERM,
43
+ )
44
+
45
+
46
+ logger = logging.getLogger(__name__)
47
+
48
+
49
+ def to_kwh(unit_col: str, value_col: str) -> DataFrame:
50
+ """Convert a column to kWh."""
51
+ return (
52
+ F.when(F.col(unit_col) == KWH, F.col(value_col))
53
+ .when(F.col(unit_col) == MWH, (F.col(value_col) * MEGA_TO_KILO))
54
+ .when(F.col(unit_col) == GWH, (F.col(value_col) * GIGA_TO_KILO))
55
+ .when(F.col(unit_col) == TWH, (F.col(value_col) * TERA_TO_KILO))
56
+ .when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_KWH))
57
+ .when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_KWH))
58
+ .when(F.col(unit_col) == "", F.col(value_col))
59
+ .otherwise(None)
60
+ )
61
+
62
+
63
+ def to_mwh(unit_col: str, value_col: str) -> DataFrame:
64
+ """Convert a column to mWh."""
65
+ return (
66
+ F.when(F.col(unit_col) == KWH, (F.col(value_col) * KILO_TO_MEGA))
67
+ .when(F.col(unit_col) == MWH, F.col(value_col))
68
+ .when(F.col(unit_col) == GWH, (F.col(value_col) * GIGA_TO_MEGA))
69
+ .when(F.col(unit_col) == TWH, (F.col(value_col) * TERA_TO_MEGA))
70
+ .when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_MWH))
71
+ .when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_MWH))
72
+ .when(F.col(unit_col) == "", F.col(value_col))
73
+ .otherwise(None)
74
+ )
75
+
76
+
77
+ def to_gwh(unit_col: str, value_col: str) -> DataFrame:
78
+ """Convert a column to gWh."""
79
+ return (
80
+ F.when(F.col(unit_col) == KWH, (F.col(value_col) * KILO_TO_GIGA))
81
+ .when(F.col(unit_col) == MWH, (F.col(value_col) * MEGA_TO_GIGA))
82
+ .when(F.col(unit_col) == GWH, F.col(value_col))
83
+ .when(F.col(unit_col) == TWH, (F.col(value_col) * TERA_TO_GIGA))
84
+ .when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_GWH))
85
+ .when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_GWH))
86
+ .when(F.col(unit_col) == "", F.col(value_col))
87
+ .otherwise(None)
88
+ )
89
+
90
+
91
+ def to_twh(unit_col: str, value_col: str) -> DataFrame:
92
+ """Convert a column to tWh."""
93
+ return (
94
+ F.when(F.col(unit_col) == KWH, (F.col(value_col) * KILO_TO_TERA))
95
+ .when(F.col(unit_col) == MWH, (F.col(value_col) * MEGA_TO_TERA))
96
+ .when(F.col(unit_col) == GWH, (F.col(value_col) * GIGA_TO_TERA))
97
+ .when(F.col(unit_col) == TWH, F.col(value_col))
98
+ .when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_TWH))
99
+ .when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_TWH))
100
+ .when(F.col(unit_col) == "", F.col(value_col))
101
+ .otherwise(None)
102
+ )
103
+
104
+
105
+ def to_therm(unit_col: str, value_col: str) -> DataFrame:
106
+ """Convert a column to therm."""
107
+ return (
108
+ F.when(F.col(unit_col) == KWH, (F.col(value_col) * KWH_TO_THERM))
109
+ .when(F.col(unit_col) == MWH, (F.col(value_col) * MWH_TO_THERM))
110
+ .when(F.col(unit_col) == GWH, (F.col(value_col) * GWH_TO_THERM))
111
+ .when(F.col(unit_col) == TWH, (F.col(value_col) * TWH_TO_THERM))
112
+ .when(F.col(unit_col) == THERM, F.col(value_col))
113
+ .when(F.col(unit_col) == MBTU, (F.col(value_col) * MBTU_TO_THERM))
114
+ .when(F.col(unit_col) == "", F.col(value_col))
115
+ .otherwise(None)
116
+ )
117
+
118
+
119
+ def to_mbtu(unit_col: str, value_col: str) -> DataFrame:
120
+ """Convert a column to MBtu."""
121
+ return (
122
+ F.when(F.col(unit_col) == KWH, (F.col(value_col) * KWH_TO_MBTU))
123
+ .when(F.col(unit_col) == MWH, (F.col(value_col) * MWH_TO_MBTU))
124
+ .when(F.col(unit_col) == GWH, (F.col(value_col) * GWH_TO_MBTU))
125
+ .when(F.col(unit_col) == TWH, (F.col(value_col) * TWH_TO_MBTU))
126
+ .when(F.col(unit_col) == THERM, (F.col(value_col) * THERM_TO_MBTU))
127
+ .when(F.col(unit_col) == MBTU, F.col(value_col))
128
+ .when(F.col(unit_col) == "", F.col(value_col))
129
+ .otherwise(None)
130
+ )
131
+
132
+
133
+ def from_any_to_any(from_unit_col: str, to_unit_col: str, value_col: str) -> DataFrame:
134
+ """Convert a column of energy based on from/to columns."""
135
+ return (
136
+ F.when(F.col(from_unit_col) == F.col(to_unit_col), F.col(value_col))
137
+ .when(F.col(from_unit_col) == "", F.col(value_col))
138
+ .when(F.col(to_unit_col) == KWH, to_kwh(from_unit_col, value_col))
139
+ .when(F.col(to_unit_col) == MWH, to_mwh(from_unit_col, value_col))
140
+ .when(F.col(to_unit_col) == GWH, to_gwh(from_unit_col, value_col))
141
+ .when(F.col(to_unit_col) == TWH, to_twh(from_unit_col, value_col))
142
+ .when(F.col(to_unit_col) == THERM, to_therm(from_unit_col, value_col))
143
+ .when(F.col(to_unit_col) == MBTU, to_mbtu(from_unit_col, value_col))
144
+ .otherwise(None)
145
+ )
dsgrid/units/power.py ADDED
@@ -0,0 +1,87 @@
1
+ """Contains functions to perform unit conversion of power."""
2
+
3
+ import logging
4
+
5
+ from dsgrid.spark.types import DataFrame, F
6
+ from dsgrid.units.constants import (
7
+ GIGA_TO_KILO,
8
+ GIGA_TO_MEGA,
9
+ GIGA_TO_TERA,
10
+ GW,
11
+ KILO_TO_GIGA,
12
+ KILO_TO_MEGA,
13
+ KILO_TO_TERA,
14
+ KW,
15
+ MEGA_TO_GIGA,
16
+ MEGA_TO_KILO,
17
+ MEGA_TO_TERA,
18
+ MW,
19
+ TERA_TO_GIGA,
20
+ TERA_TO_KILO,
21
+ TERA_TO_MEGA,
22
+ TW,
23
+ )
24
+
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ def to_kw(unit_col: str, value_col: str) -> DataFrame:
30
+ """Convert a column to kW."""
31
+ return (
32
+ F.when(F.col(unit_col) == KW, F.col(value_col))
33
+ .when(F.col(unit_col) == MW, (F.col(value_col) * MEGA_TO_KILO))
34
+ .when(F.col(unit_col) == GW, (F.col(value_col) * GIGA_TO_KILO))
35
+ .when(F.col(unit_col) == TW, (F.col(value_col) * TERA_TO_KILO))
36
+ .when(F.col(unit_col) == "", F.col(value_col))
37
+ .otherwise(None)
38
+ )
39
+
40
+
41
+ def to_mw(unit_col: str, value_col: str) -> DataFrame:
42
+ """Convert a column to MW."""
43
+ return (
44
+ F.when(F.col(unit_col) == KW, (F.col(value_col) * KILO_TO_MEGA))
45
+ .when(F.col(unit_col) == MW, F.col(value_col))
46
+ .when(F.col(unit_col) == GW, (F.col(value_col) * GIGA_TO_MEGA))
47
+ .when(F.col(unit_col) == TW, (F.col(value_col) * TERA_TO_MEGA))
48
+ .when(F.col(unit_col) == "", F.col(value_col))
49
+ .otherwise(None)
50
+ )
51
+
52
+
53
+ def to_gw(unit_col: str, value_col: str) -> DataFrame:
54
+ """Convert a column to GW."""
55
+ return (
56
+ F.when(F.col(unit_col) == KW, (F.col(value_col) * KILO_TO_GIGA))
57
+ .when(F.col(unit_col) == MW, (F.col(value_col) * MEGA_TO_GIGA))
58
+ .when(F.col(unit_col) == GW, F.col(value_col))
59
+ .when(F.col(unit_col) == TW, (F.col(value_col) * TERA_TO_GIGA))
60
+ .when(F.col(unit_col) == "", F.col(value_col))
61
+ .otherwise(None)
62
+ )
63
+
64
+
65
+ def to_tw(unit_col: str, value_col: str) -> DataFrame:
66
+ """Convert a column to TW."""
67
+ return (
68
+ F.when(F.col(unit_col) == KW, (F.col(value_col) * KILO_TO_TERA))
69
+ .when(F.col(unit_col) == MW, (F.col(value_col) * MEGA_TO_TERA))
70
+ .when(F.col(unit_col) == GW, (F.col(value_col) * GIGA_TO_TERA))
71
+ .when(F.col(unit_col) == TW, F.col(value_col))
72
+ .when(F.col(unit_col) == "", F.col(value_col))
73
+ .otherwise(None)
74
+ )
75
+
76
+
77
+ def from_any_to_any(from_unit_col: str, to_unit_col: str, value_col: str) -> DataFrame:
78
+ """Convert a column of power based on from/to columns."""
79
+ return (
80
+ F.when(F.col(from_unit_col) == F.col(to_unit_col), F.col(value_col))
81
+ .when(F.col(from_unit_col) == "", F.col(value_col))
82
+ .when(F.col(to_unit_col) == KW, to_kw(from_unit_col, value_col))
83
+ .when(F.col(to_unit_col) == MW, to_mw(from_unit_col, value_col))
84
+ .when(F.col(to_unit_col) == GW, to_gw(from_unit_col, value_col))
85
+ .when(F.col(to_unit_col) == TW, to_tw(from_unit_col, value_col))
86
+ .otherwise(None)
87
+ )
File without changes