dwind 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwind/__init__.py +1 -1
- dwind/btm_sizing.py +1 -2
- dwind/cli/__init__.py +0 -0
- dwind/cli/collect.py +114 -0
- dwind/cli/debug.py +137 -0
- dwind/cli/run.py +288 -0
- dwind/cli/utils.py +166 -0
- dwind/config.py +147 -6
- dwind/main.py +20 -0
- dwind/model.py +128 -63
- dwind/mp.py +30 -35
- dwind/resource.py +120 -41
- dwind/scenarios.py +73 -36
- dwind/utils/array.py +16 -89
- dwind/utils/hpc.py +44 -2
- dwind/utils/loader.py +63 -0
- dwind/utils/progress.py +60 -0
- dwind/valuation.py +368 -239
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/METADATA +2 -1
- dwind-0.3.2.dist-info/RECORD +28 -0
- dwind-0.3.2.dist-info/entry_points.txt +2 -0
- dwind-0.3.1.dist-info/RECORD +0 -20
- dwind-0.3.1.dist-info/entry_points.txt +0 -2
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/WHEEL +0 -0
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/licenses/LICENSE.txt +0 -0
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/top_level.txt +0 -0
dwind/resource.py
CHANGED
@@ -1,23 +1,75 @@
|
|
1
|
+
"""Provides the :py:class:`ResourcePotential` class for gathering pre-calculated reV generation
|
2
|
+
data.
|
3
|
+
"""
|
4
|
+
|
1
5
|
import h5py as h5
|
2
6
|
import pandas as pd
|
3
7
|
|
4
|
-
from dwind import Configuration
|
8
|
+
from dwind.config import Sector, Technology, Configuration
|
5
9
|
|
6
10
|
|
7
11
|
class ResourcePotential:
|
12
|
+
"""Helper class designed to retrieve pre-calculated energy generation data from reV."""
|
13
|
+
|
8
14
|
def __init__(
|
9
|
-
self,
|
15
|
+
self,
|
16
|
+
parcels: pd.DataFrame,
|
17
|
+
model_config: Configuration,
|
18
|
+
sector: Sector,
|
19
|
+
tech: str = "wind",
|
20
|
+
year: int = 2018,
|
10
21
|
):
|
22
|
+
"""Initializes the :py:class:`ResourcePotential` instance.
|
23
|
+
|
24
|
+
Args:
|
25
|
+
parcels (pd.DataFrame): The agent DataFrame containing at least the following columns:
|
26
|
+
"gid", "rev_gid_{tech}", "solar_az_tilt" (solar only), "azimuth_{sector}"
|
27
|
+
(solar only), "tilt_{tech}" (solar only), "turbine_class" (wind only),
|
28
|
+
"wind_turbine_kw" (wind only), and "turbine_height_m" (wind only).
|
29
|
+
model_config (Configuration): The pre-loaded model configuration data object containing
|
30
|
+
the requisite SQL, file, and configuration data.
|
31
|
+
sector (dwind.config.Sector): A valid sector instance.
|
32
|
+
tech (str, optional): One of "solar" or "wind". Defaults to "wind".
|
33
|
+
year (int, optional): Resource year for the reV lookup. Defaults to 2018.
|
34
|
+
|
35
|
+
Raises:
|
36
|
+
ValueError: Raised if :py:attr:`parcels:` is missing any of the required columns.
|
37
|
+
"""
|
11
38
|
self.df = parcels
|
12
|
-
self.tech = tech
|
13
|
-
self.
|
39
|
+
self.tech = Technology(tech)
|
40
|
+
self.sector = sector
|
14
41
|
self.year = year
|
15
42
|
self.config = model_config
|
16
43
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
44
|
+
solar_cols = ("solar_az_tilt", f"azimuth_{self.sector.value}", f"tilt_{self.tech.value}")
|
45
|
+
# wind_cols = ("turbine_class", "wind_turbine_kw", "turbine_height_m")
|
46
|
+
wind_cols = ("wind_turbine_kw", "turbine_height_m")
|
47
|
+
|
48
|
+
if self.tech is Technology.WIND:
|
49
|
+
cols = wind_cols
|
50
|
+
elif self.tech is Technology.SOLAR:
|
51
|
+
cols = solar_cols
|
52
|
+
|
53
|
+
missing = set(cols).difference(self.df.columns.tolist())
|
54
|
+
if missing:
|
55
|
+
raise ValueError(f"`parcels` is missing the following columns: {', '.join(missing)}")
|
56
|
+
|
57
|
+
def create_rev_gid_to_summary_lkup(
|
58
|
+
self, configs: list[str], *, save_csv: bool = True
|
59
|
+
) -> pd.DataFrame:
|
60
|
+
"""Creates the reV summary tables based on the "gid" mappings in :py:attr:`parcels`.
|
61
|
+
|
62
|
+
Args:
|
63
|
+
configs (list[str]): The list of technology-specific configurations where the generation
|
64
|
+
data should be retrieved.
|
65
|
+
save_csv (bool, optional): If True, save the resulting lookup calculated from reV to the
|
66
|
+
reV folder definied in ``Configuration.rev.generation.{tech}_DIR``. Defaults to
|
67
|
+
True.
|
68
|
+
|
69
|
+
Returns:
|
70
|
+
pd.DataFrame: reV generation lookup table for the technology-specific configurations in
|
71
|
+
:py:attr:`configs`.
|
72
|
+
"""
|
21
73
|
config_dfs = []
|
22
74
|
for c in configs:
|
23
75
|
file_str = self.config.rev.DIR / f"rev_{c}_generation_{self.year}.h5"
|
@@ -30,10 +82,10 @@ class ResourcePotential:
|
|
30
82
|
|
31
83
|
config_df = pd.concat([rev_index, gids, annual_energy, cf_mean], axis=1)
|
32
84
|
config_df.columns = [
|
33
|
-
f"rev_index_{self.tech}",
|
34
|
-
f"rev_gid_{self.tech}",
|
35
|
-
f"{self.tech}_naep",
|
36
|
-
f"{self.tech}_cf",
|
85
|
+
f"rev_index_{self.tech.value}",
|
86
|
+
f"rev_gid_{self.tech.value}",
|
87
|
+
f"{self.tech.value}_naep",
|
88
|
+
f"{self.tech.value}_cf",
|
37
89
|
]
|
38
90
|
|
39
91
|
config_df["config"] = c
|
@@ -43,87 +95,107 @@ class ResourcePotential:
|
|
43
95
|
|
44
96
|
if save_csv:
|
45
97
|
save_name = (
|
46
|
-
self.config.rev.generation[f"{self.tech}_DIR"]
|
47
|
-
/ f"lkup_rev_gid_to_summary_{self.tech}_{self.year}.csv"
|
98
|
+
self.config.rev.generation[f"{self.tech.value}_DIR"]
|
99
|
+
/ f"lkup_rev_gid_to_summary_{self.tech.value}_{self.year}.csv"
|
48
100
|
)
|
49
101
|
summary_df.to_csv(save_name, index=False)
|
50
102
|
|
51
103
|
return summary_df
|
52
104
|
|
53
105
|
def find_rev_summary_table(self):
|
54
|
-
|
106
|
+
"""Creates the generation summary data for each of the :py:attr:`tech`-specific
|
107
|
+
configurations specified in :py:attr:`config.rev.settings.{tech}`, then maps it to the
|
108
|
+
agent data (:py:attr:`parcels`), overwriting any previously computed data.
|
109
|
+
"""
|
110
|
+
if self.tech is Technology.SOLAR:
|
55
111
|
configs = self.config.rev.settings.solar
|
56
112
|
config_col = "solar_az_tilt"
|
57
|
-
col_list = ["gid", f"rev_gid_{self.tech}", config_col]
|
58
|
-
self.df[config_col] = self.df[f"azimuth_{self.
|
113
|
+
col_list = ["gid", f"rev_gid_{self.tech.value}", config_col]
|
114
|
+
self.df[config_col] = self.df[f"azimuth_{self.sector.value}"].map(
|
59
115
|
self.config.rev.settings.azimuth_direction_to_degree
|
60
116
|
)
|
61
117
|
self.df[config_col] = (
|
62
|
-
self.df[config_col].astype(str)
|
118
|
+
self.df[config_col].astype(str)
|
119
|
+
+ "_"
|
120
|
+
+ self.df[f"tilt_{self.tech.value}"].astype(str)
|
63
121
|
)
|
64
|
-
elif self.tech
|
122
|
+
elif self.tech is Technology.WIND:
|
65
123
|
configs = self.config.rev.settings.wind
|
66
124
|
config_col = "turbine_class"
|
67
125
|
col_list = [
|
68
126
|
"gid",
|
69
|
-
f"rev_gid_{self.tech}",
|
127
|
+
f"rev_gid_{self.tech.value}",
|
70
128
|
config_col,
|
71
129
|
"turbine_height_m",
|
72
130
|
"wind_turbine_kw",
|
73
131
|
]
|
74
132
|
self.df[config_col] = self.df["wind_turbine_kw"].map(self.config.rev.turbine_class_dict)
|
75
133
|
|
76
|
-
out_cols = [
|
77
|
-
|
78
|
-
|
134
|
+
out_cols = [
|
135
|
+
*col_list,
|
136
|
+
f"rev_index_{self.tech.value}",
|
137
|
+
f"{self.tech.value}_naep",
|
138
|
+
f"{self.tech.value}_cf",
|
139
|
+
]
|
140
|
+
|
141
|
+
drop_cols = [
|
142
|
+
f"rev_gid_{self.tech.value}",
|
143
|
+
f"{self.tech.value}_naep",
|
144
|
+
f"{self.tech.value}_cf",
|
145
|
+
]
|
79
146
|
self.df = self.df.drop(columns=[c for c in drop_cols if c in self.df])
|
80
147
|
|
81
148
|
f_gen = (
|
82
|
-
self.config.rev.generation[f"{self.tech}_DIR"]
|
83
|
-
/ f"lkup_rev_gid_to_summary_{self.tech}_{self.year}.csv"
|
149
|
+
self.config.rev.generation[f"{self.tech.value}_DIR"]
|
150
|
+
/ f"lkup_rev_gid_to_summary_{self.tech.value}_{self.year}.csv"
|
84
151
|
)
|
85
152
|
|
86
153
|
if f_gen.exists():
|
87
|
-
generation_summary = pd.read_csv(f_gen)
|
154
|
+
generation_summary = pd.read_csv(f_gen, dtype_backend="pyarrow")
|
88
155
|
else:
|
89
156
|
generation_summary = self.create_rev_gid_to_summary_lkup(configs)
|
90
157
|
|
91
158
|
generation_summary = (
|
92
159
|
generation_summary.reset_index(drop=True)
|
93
|
-
.drop_duplicates(subset=[f"rev_index_{self.tech}", "config"])
|
160
|
+
.drop_duplicates(subset=[f"rev_index_{self.tech.value}", "config"])
|
94
161
|
.rename(columns={"config": config_col})
|
95
162
|
)
|
96
163
|
agents = self.df.merge(
|
97
|
-
generation_summary, how="left", on=[f"rev_index_{self.tech}", config_col]
|
164
|
+
generation_summary, how="left", on=[f"rev_index_{self.tech.value}", config_col]
|
98
165
|
)
|
99
166
|
return agents[out_cols]
|
100
167
|
|
101
168
|
def prepare_agents_for_gen(self):
|
102
|
-
|
103
|
-
if self.tech
|
169
|
+
"""Create lookup column based on each technology."""
|
170
|
+
if self.tech is Technology.WIND:
|
104
171
|
# drop wind turbine size duplicates
|
105
172
|
# SINCE WE ASSUME ANY TURBINE IN A GIVEN CLASS HAS THE SAME POWER CURVE
|
106
173
|
self.df.drop_duplicates(subset=["gid", "wind_size_kw"], keep="last", inplace=True)
|
107
|
-
# if running FOM
|
108
|
-
if self.
|
174
|
+
# if running FOM sector, only consider a single (largest) turbine size
|
175
|
+
if self.sector is Sector.FOM:
|
109
176
|
self.df = self.df.loc[self.df["wind_size_kw"] == self.df["wind_size_kw_fom"]]
|
110
177
|
|
111
178
|
self.df["turbine_class"] = self.df["wind_turbine_kw"].map(
|
112
179
|
self.config.rev.turbine_class_dict
|
113
180
|
)
|
114
181
|
|
115
|
-
if self.tech
|
116
|
-
# NOTE: tilt and azimuth are
|
117
|
-
self.df["solar_az_tilt"] = self.df[f"azimuth_{self.
|
182
|
+
if self.tech is Technology.SOLAR:
|
183
|
+
# NOTE: tilt and azimuth are sector-specific
|
184
|
+
self.df["solar_az_tilt"] = self.df[f"azimuth_{self.sector.value}"].map(
|
118
185
|
self.config.rev.settings.azimuth_direction_to_degree
|
119
186
|
)
|
120
187
|
self.df["solar_az_tilt"] = self.df["solar_az_tilt"].astype(str)
|
121
188
|
self.df["solar_az_tilt"] = (
|
122
|
-
self.df["solar_az_tilt"] + "_" + self.df[f"tilt_{self.
|
189
|
+
self.df["solar_az_tilt"] + "_" + self.df[f"tilt_{self.sector.value}"].astype(str)
|
123
190
|
)
|
124
191
|
|
125
|
-
def merge_gen_to_agents(self, tech_agents):
|
126
|
-
|
192
|
+
def merge_gen_to_agents(self, tech_agents: pd.DataFrame):
|
193
|
+
"""Merges :py:attr:`tech_agents` to the parcel data :py:attr:`df`.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
tech_agents (pd.DataFrame): The technology-specific energy generation data.
|
197
|
+
"""
|
198
|
+
if self.tech is Technology.WIND:
|
127
199
|
cols = ["turbine_height_m", "wind_turbine_kw", "turbine_class"]
|
128
200
|
else:
|
129
201
|
# NOTE: need to drop duplicates in solar agents
|
@@ -133,16 +205,23 @@ class ResourcePotential:
|
|
133
205
|
)
|
134
206
|
cols = ["solar_az_tilt"]
|
135
207
|
|
136
|
-
cols.extend(["gid", f"rev_index_{self.tech}"])
|
208
|
+
cols.extend(["gid", f"rev_index_{self.tech.value}"])
|
137
209
|
|
138
210
|
self.df = self.df.merge(tech_agents, how="left", on=cols)
|
139
211
|
|
140
212
|
def match_rev_summary_to_agents(self):
|
213
|
+
"""Runs the energy generation gathering and merging steps, and retursns back the updated
|
214
|
+
:py:attr:`df` agent/parcel data.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
pd.DataFrame: Updated agent/parcel data with rec/alculated "wind_aep" or "solar_aep"
|
218
|
+
information for each agent.
|
219
|
+
"""
|
141
220
|
self.prepare_agents_for_gen()
|
142
221
|
tech_agents = self.find_rev_summary_table()
|
143
222
|
self.merge_gen_to_agents(tech_agents)
|
144
223
|
|
145
|
-
if self.tech
|
224
|
+
if self.tech is Technology.WIND:
|
146
225
|
# fill nan generation values
|
147
226
|
self.df = self.df.loc[
|
148
227
|
~((self.df["wind_naep"].isnull()) & (self.df["turbine_class"] != "none"))
|
@@ -153,7 +232,7 @@ class ResourcePotential:
|
|
153
232
|
# calculate annual energy production (aep)
|
154
233
|
self.df["wind_aep"] = self.df["wind_naep"] * self.df["wind_turbine_kw"]
|
155
234
|
# self.df = self.df.drop(columns="turbine_class")
|
156
|
-
|
235
|
+
elif self.tech is Technology.SOLAR:
|
157
236
|
# fill nan generation values
|
158
237
|
self.df = self.df.loc[~(self.df["solar_naep"].isnull())]
|
159
238
|
# size groundmount system to equal wind aep
|
dwind/scenarios.py
CHANGED
@@ -1,48 +1,80 @@
|
|
1
|
+
"""Provides the scenario-specific mapping for varying financial and model configuration data."""
|
2
|
+
|
1
3
|
import json
|
2
4
|
from pathlib import Path
|
3
5
|
|
4
6
|
import pandas as pd
|
5
7
|
|
8
|
+
from dwind.config import Year, Scenario
|
6
9
|
|
7
|
-
def config_nem(scenario, year):
|
8
|
-
# NEM_SCENARIO_CSV
|
9
|
-
nem_opt_scens = ["highrecost", "lowrecost", "re100"]
|
10
|
-
# nem_opt_scens = ['der_value_HighREcost', 'der_value_LowREcost', 're_100']
|
11
|
-
if scenario in nem_opt_scens:
|
12
|
-
nem_scenario_csv = "nem_optimistic_der_value_2035.csv"
|
13
|
-
elif scenario == "baseline" and year in (2022, 2025, 2035):
|
14
|
-
nem_scenario_csv = f"nem_baseline_{year}.csv"
|
15
|
-
else:
|
16
|
-
nem_scenario_csv = "nem_baseline_2035.csv"
|
17
10
|
|
18
|
-
|
11
|
+
def config_nem(scenario: Scenario, year: Year) -> str:
|
12
|
+
"""Provides NEM configuration based on :py:attr:`scenario` and :py:attr:`year`.
|
19
13
|
|
14
|
+
Args:
|
15
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
16
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
18
|
+
Returns:
|
19
|
+
str: Name of the NEM scenario file to use.
|
20
|
+
"""
|
21
|
+
if scenario in (Scenario.HIGHRECOST, Scenario.LOWRECOST, Scenario.RE100):
|
22
|
+
return "nem_optimistic_der_value_2035.csv"
|
23
|
+
|
24
|
+
if scenario is Scenario.BASELINE and year in (Year._2022, Year._2025, Year._2035):
|
25
|
+
return f"nem_baseline_{year.value}.csv"
|
26
|
+
|
27
|
+
return "nem_baseline_2035.csv"
|
28
|
+
|
29
|
+
|
30
|
+
def config_cambium(scenario: Scenario) -> str:
|
31
|
+
"""Loads the cambium configuration name based on :py:attr:`scenario`.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
30
35
|
|
31
|
-
|
36
|
+
Returns:
|
37
|
+
str: Name of the Cambium scenario to use.
|
38
|
+
"""
|
39
|
+
if scenario in (Scenario.HIGHRECOST, Scenario.RE100):
|
40
|
+
return "StdScen20_HighRECost"
|
32
41
|
|
42
|
+
if scenario is Scenario.LOWRECOST:
|
43
|
+
return "StdScen20_LowRECost"
|
33
44
|
|
34
|
-
|
35
|
-
|
36
|
-
|
45
|
+
return "Cambium23_MidCase"
|
46
|
+
|
47
|
+
|
48
|
+
def config_costs(scenario: Scenario, year: Year) -> dict:
|
49
|
+
"""Loads the cost configuration based on the ATB analysis.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
53
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
dict: Dictionary of ATB assumptions to be used for PySAM's cost inputs.
|
57
|
+
"""
|
58
|
+
f = Path(
|
59
|
+
f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario.value}_{year.value}.json"
|
60
|
+
).resolve()
|
37
61
|
with f.open("r") as f_in:
|
38
62
|
cost_inputs = json.load(f_in)
|
39
63
|
|
40
64
|
return cost_inputs
|
41
65
|
|
42
66
|
|
43
|
-
def config_performance(scenario, year):
|
44
|
-
|
45
|
-
|
67
|
+
def config_performance(scenario: Scenario, year: Year) -> pd.DataFrame:
|
68
|
+
"""Loads the technology performance configurations.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
72
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
pd.DataFrame: Performance data based on the scale of each technology.
|
76
|
+
"""
|
77
|
+
if scenario is Scenario.BASELINE and year is Year._2022:
|
46
78
|
performance_inputs = {
|
47
79
|
"solar": pd.DataFrame(
|
48
80
|
[
|
@@ -108,16 +140,21 @@ def config_performance(scenario, year):
|
|
108
140
|
return performance_inputs
|
109
141
|
|
110
142
|
|
111
|
-
def config_financial(scenario, year):
|
112
|
-
|
113
|
-
|
114
|
-
|
143
|
+
def config_financial(scenario: Scenario, year: Year) -> dict:
|
144
|
+
"""Loads the financial configuration based on the ATB analysis.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
148
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
149
|
+
|
150
|
+
Returns:
|
151
|
+
dict: Dictionary of ATB assumptions to be used for configuration PySAM.
|
152
|
+
"""
|
153
|
+
if year is Year._2025:
|
115
154
|
f = f"/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_{year}.json"
|
116
|
-
i = Path("/projects/dwind/data/incentives/2025_incentives.
|
117
|
-
|
118
|
-
|
119
|
-
incentives.index.name = "census_tract_id"
|
120
|
-
elif scenario in scenarios and year in (2035, 2040):
|
155
|
+
i = Path("/projects/dwind/data/incentives/2025_incentives.pqt").resolve()
|
156
|
+
incentives = pd.read_parquet(i, dtype_backend="pyarrow")
|
157
|
+
elif year in (Year._2035, Year._2040):
|
121
158
|
f = "/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_2035.json"
|
122
159
|
else:
|
123
160
|
# use old assumptions
|
dwind/utils/array.py
CHANGED
@@ -1,11 +1,25 @@
|
|
1
|
+
"""Provides a series of generic NumPy and Pandas utility functions."""
|
2
|
+
|
1
3
|
from __future__ import annotations
|
2
4
|
|
3
5
|
import numpy as np
|
4
6
|
import pandas as pd
|
5
7
|
|
6
8
|
|
7
|
-
def memory_downcaster(df):
|
8
|
-
|
9
|
+
def memory_downcaster(df: pd.DataFrame | pd.Series) -> pd.DataFrame | pd.Series:
|
10
|
+
"""Downcasts ``int`` and ``float`` columns to the lowest memory alternative possible. For
|
11
|
+
integers this means converting to either signed or unsigned 8-, 16-, 32-, or 64-bit integers,
|
12
|
+
and for floats, converting to ``np.float32``.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
df (pd.DataFrame | pd.Series): DataFrame or Series to have its memory footprint reduced.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
pd.DataFrame | pd.Series: Reduced footprint version of the passed :py:attr:`df`.
|
19
|
+
"""
|
20
|
+
# if not isinstance(df, pd.DataFrame | pd.Series):
|
21
|
+
if not isinstance(df, (pd.DataFrame, pd.Series)): # noqa
|
22
|
+
raise TypeError("Input value must be a Pandas DataFrame or Series.")
|
9
23
|
|
10
24
|
NAlist = []
|
11
25
|
for col in df.select_dtypes(include=[np.number]).columns:
|
@@ -56,93 +70,6 @@ def memory_downcaster(df):
|
|
56
70
|
return df
|
57
71
|
|
58
72
|
|
59
|
-
def interpolate_array(row, col_1, col_2, col_in, col_out):
|
60
|
-
if row[col_in] != 0:
|
61
|
-
interpolated = row[col_in] * (row[col_2] - row[col_1]) + row[col_1]
|
62
|
-
else:
|
63
|
-
interpolated = row[col_1]
|
64
|
-
|
65
|
-
row[col_out] = interpolated
|
66
|
-
|
67
|
-
return row
|
68
|
-
|
69
|
-
|
70
|
-
def scale_array_precision(df: pd.DataFrame, hourly_col: str, prec_offset_col: str):
|
71
|
-
"""Scales the precision of :py:attr:`hourly_col` by the :py:attr:`prec_offset_col`.
|
72
|
-
|
73
|
-
Args:
|
74
|
-
df (pd.DataFrame): A Pandas DataFrame containing :py:att:`hourly_col` and
|
75
|
-
:py:att:`prec_offset_col`.
|
76
|
-
hourly_col (str) The column to adjust the precision.
|
77
|
-
prec_offset_col (str): The column for scaling the precison of :py:attr:`hourly_col`.
|
78
|
-
|
79
|
-
Returns:
|
80
|
-
pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`hourly_col` scaled.
|
81
|
-
"""
|
82
|
-
df[hourly_col] = (
|
83
|
-
np.array(df[hourly_col].values.tolist(), dtype="float64")
|
84
|
-
/ df[prec_offset_col].values.reshape(-1, 1)
|
85
|
-
).tolist()
|
86
|
-
return df
|
87
|
-
|
88
|
-
|
89
|
-
def scale_array_deprecision(df: pd.DataFrame, col: str | list[str]) -> pd.DataFrame:
|
90
|
-
"""Rounds the column(s) :py:attr:`col` to the nearest 2nd decimal and converts to NumPy's
|
91
|
-
float32.
|
92
|
-
|
93
|
-
Args:
|
94
|
-
df (pd.DataFrame): A Pandas DataFrame containing :py:att:`col`.
|
95
|
-
col (str | list[str]): The column(s) to have reduced precision.
|
96
|
-
|
97
|
-
Returns:
|
98
|
-
pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`col` lowered.
|
99
|
-
"""
|
100
|
-
df[col] = np.round(np.round(df[col], 2).astype(np.float32), 2)
|
101
|
-
return df
|
102
|
-
|
103
|
-
|
104
|
-
def scale_array_sum(df: pd.DataFrame, hourly_col: str, scale_col: str) -> pd.DataFrame:
|
105
|
-
"""Scales the :py:attr:`hourly_col` by its sum and multiples by the :py:attr:`scale_col`.
|
106
|
-
|
107
|
-
Args:
|
108
|
-
df (pd.DataFrame): Pandas DataFrame containing the :py:attr:`hourly_col` and
|
109
|
-
:py:attr:`scale_col`.
|
110
|
-
hourly_col (str): The name of the column to be scaled whose values are lists.
|
111
|
-
scale_col (str): The column to scale the :py:attr:`hourly_col`.
|
112
|
-
|
113
|
-
Returns:
|
114
|
-
pandas.DataFrame: The input dataframe, but with the values of the :py:attr:`hourly_col`
|
115
|
-
scaled appropriately.
|
116
|
-
"""
|
117
|
-
hourly_array = np.array(df[hourly_col].values.tolist())
|
118
|
-
df[hourly_col] = (
|
119
|
-
hourly_array / hourly_array.sum(axis=1).reshape(-1, 1) * df[scale_col].values.reshape(-1, 1)
|
120
|
-
).tolist()
|
121
|
-
return df
|
122
|
-
|
123
|
-
|
124
|
-
def scale_array_multiplier(
|
125
|
-
df: pd.DataFrame, hourly_col: str, multiplier_col: str, col_out: str
|
126
|
-
) -> pd.DataFrame:
|
127
|
-
"""Scales the :py:attr:hourly_col` values by the :py:attr:`multiplier_col`, and places it in
|
128
|
-
the :py:attr:`col_out`.
|
129
|
-
|
130
|
-
Args:
|
131
|
-
df (pd.DataFrame): The Pandas DataFrame containing the :py:attr:`hourly_col` and
|
132
|
-
:py:attr:`multiplier_col`.
|
133
|
-
hourly_col (str): A column of hourly values as a list of floats in each cell.
|
134
|
-
multiplier_col (str): The column used to scale the :py:attr:`hourly_col`.
|
135
|
-
col_out (str): A new column that will contain the scaled data.
|
136
|
-
|
137
|
-
Returns:
|
138
|
-
pd.DataFrame: A new copy of the original data (:py:attr:`df`) containing the
|
139
|
-
:py:attr:`col_out` column.
|
140
|
-
"""
|
141
|
-
hourly_array = np.array(df[hourly_col].values.tolist())
|
142
|
-
df[col_out] = (hourly_array * df[multiplier_col].values.reshape(-1, 1)).tolist()
|
143
|
-
return df
|
144
|
-
|
145
|
-
|
146
73
|
def split_by_index(
|
147
74
|
arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
|
148
75
|
) -> tuple[np.ndarray, np.ndarray]:
|
dwind/utils/hpc.py
CHANGED
@@ -1,5 +1,14 @@
|
|
1
|
+
"""Provides the live timing table functionalities for the Kestrel :py:class:`MultiProcess` class."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import io
|
6
|
+
import re
|
1
7
|
import time
|
8
|
+
import subprocess
|
9
|
+
from copy import deepcopy
|
2
10
|
|
11
|
+
import pandas as pd
|
3
12
|
from rich.table import Table
|
4
13
|
from rex.utilities.hpc import SLURM
|
5
14
|
|
@@ -68,7 +77,7 @@ def update_status(job_status: dict) -> dict:
|
|
68
77
|
return update
|
69
78
|
|
70
79
|
|
71
|
-
def
|
80
|
+
def generate_run_status_table(job_status: dict) -> tuple[Table, bool]:
|
72
81
|
"""Generate the job status run time statistics table.
|
73
82
|
|
74
83
|
Args:
|
@@ -92,5 +101,38 @@ def generate_table(job_status: dict) -> tuple[Table, bool]:
|
|
92
101
|
table.add_row(
|
93
102
|
job, status, convert_seconds_for_print(_wait), convert_seconds_for_print(_run)
|
94
103
|
)
|
95
|
-
done = all(el["status"] in ("CG", None) for el in job_status.values())
|
104
|
+
done = all(el["status"] in ("CG", "CF", None) for el in job_status.values())
|
96
105
|
return table, done
|
106
|
+
|
107
|
+
|
108
|
+
def get_finished_run_status(jobs: int | str | list[int | str]) -> dict[str, str]:
|
109
|
+
"""Extracts a dictionary of job_id and status from the ``sacct`` output for a single
|
110
|
+
job or series of jobs.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
jobs (int | str | list[int | str]): Single job ID or list of job IDs that have finished
|
114
|
+
running.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
dict[str, str]: Dictionary of {job_id_1: status_1, ..., job_id_N: status_N}.
|
118
|
+
"""
|
119
|
+
if isinstance(jobs, (int, str)): # noqa
|
120
|
+
jobs = [jobs]
|
121
|
+
jobs = [str(j) for j in jobs]
|
122
|
+
|
123
|
+
# Format the command to be in the form of [sacct, -j, job_id_1, ..., -j job_id_N]
|
124
|
+
command = deepcopy(jobs)
|
125
|
+
for i in range(len(command) - 1, -1, -1):
|
126
|
+
command.insert(i, "-j")
|
127
|
+
command.insert(0, "sacct")
|
128
|
+
results = subprocess.check_output(command)
|
129
|
+
|
130
|
+
# Convert the sacct string output to be table-like
|
131
|
+
buffer = io.StringIO(results.decode("utf8", "ignore"))
|
132
|
+
lines = [re.split(" +", line) for line in buffer.readlines() if not line.startswith("-")]
|
133
|
+
|
134
|
+
# Create a dataframe, and export a dictionary of the form job_id: job_status
|
135
|
+
df = pd.DataFrame(lines[1:], columns=lines[0])
|
136
|
+
df = df.loc[df.JobID.isin(jobs), ["JobID", "State"]]
|
137
|
+
df.JobID = df.JobID.astype(int)
|
138
|
+
return dict(df.values.tolist())
|
dwind/utils/loader.py
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
"""Provides the core data loading methods for importing scenario data from flat files or SQL."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
from sqlalchemy import create_engine
|
9
|
+
|
10
|
+
from dwind.config import Year
|
11
|
+
|
12
|
+
|
13
|
+
def load_df(file_or_table: str | Path, year: Year | None, sql_constructor: str | None = None):
|
14
|
+
"""Loads data from either a SQL table or file to a pandas ``DataFrame``.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
file_or_table (str | Path): File name or path object, or SQL table where the data are
|
18
|
+
located.
|
19
|
+
year (:py:class:`dwind.config.Year`, optional): If used, only extracts the single year from
|
20
|
+
a column called "year". Defaults to None.
|
21
|
+
sql_constructor (str | None, optional): The SQL engine constructor string. Required if
|
22
|
+
extracting from SQL. Defaults to None.
|
23
|
+
"""
|
24
|
+
valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
|
25
|
+
if str(file_or_table).endswith(valid_extenstions):
|
26
|
+
return _load_from_file(filename=file_or_table, year=year)
|
27
|
+
|
28
|
+
return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
|
29
|
+
|
30
|
+
|
31
|
+
def _load_from_file(filename: str | Path, year: Year | None) -> pd.DataFrame:
|
32
|
+
"""Loads tabular data from a file to a ``pandas.DataFrame``."""
|
33
|
+
if isinstance(filename, str):
|
34
|
+
filename = Path(filename).resolve()
|
35
|
+
if not isinstance(filename, Path):
|
36
|
+
raise TypeError(f"`filename` must be a valid path, not {filename=}")
|
37
|
+
|
38
|
+
if filename.suffix == ".csv":
|
39
|
+
df = pd.read_csv(filename, dtype_backend="pyarrow")
|
40
|
+
elif filename.suffix in (".parquet", ".pqt"):
|
41
|
+
df = pd.read_parquet(filename, dtype_backend="pyarrow")
|
42
|
+
elif filename.suffix in (".pickle", ".pkl"):
|
43
|
+
df = pd.read_pickle(filename, dtype_backend="pyarrow")
|
44
|
+
else:
|
45
|
+
raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
|
46
|
+
|
47
|
+
if year is not None:
|
48
|
+
df = df.loc[df.year == year]
|
49
|
+
|
50
|
+
return df
|
51
|
+
|
52
|
+
|
53
|
+
def _load_from_sql(table: str, sql_constructor: str, year: Year | None) -> pd.DataFrame:
|
54
|
+
"""Load tabular data from SQL."""
|
55
|
+
where = f"where year = {year}" if year is not None else ""
|
56
|
+
sql = f"""select * from diffusion_shared."{table}" {where};"""
|
57
|
+
atlas_engine = create_engine(sql_constructor)
|
58
|
+
|
59
|
+
with atlas_engine.connect() as conn:
|
60
|
+
df = pd.read_sql(sql, con=conn.connection, dtype_backend="pyarrow")
|
61
|
+
|
62
|
+
atlas_engine.dispose()
|
63
|
+
return df
|