dwind 0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwind/__init__.py +3 -0
- dwind/btm_sizing.py +129 -0
- dwind/config.py +118 -0
- dwind/helper.py +172 -0
- dwind/loader.py +59 -0
- dwind/model.py +371 -0
- dwind/mp.py +225 -0
- dwind/resource.py +166 -0
- dwind/run.py +288 -0
- dwind/scenarios.py +139 -0
- dwind/valuation.py +1562 -0
- dwind-0.3.dist-info/METADATA +168 -0
- dwind-0.3.dist-info/RECORD +17 -0
- dwind-0.3.dist-info/WHEEL +5 -0
- dwind-0.3.dist-info/entry_points.txt +2 -0
- dwind-0.3.dist-info/licenses/LICENSE.txt +29 -0
- dwind-0.3.dist-info/top_level.txt +1 -0
dwind/model.py
ADDED
@@ -0,0 +1,371 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import warnings
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import pandas as pd
|
9
|
+
|
10
|
+
from dwind import Configuration, helper, resource, scenarios, valuation, btm_sizing
|
11
|
+
|
12
|
+
|
13
|
+
# POTENTIALLY DANGEROUS!
|
14
|
+
warnings.filterwarnings("ignore")
|
15
|
+
|
16
|
+
|
17
|
+
class Agents:
|
18
|
+
"""Reponsible for reading in the agent data and storing it for the ``Model`` class.
|
19
|
+
|
20
|
+
Agents are the modified parcels that have been truncated to the largest circle able
|
21
|
+
to be contained in the parcel, and contain all of the relevant tax lot and
|
22
|
+
geographic variables that would be found in a parcel.
|
23
|
+
|
24
|
+
Parameters
|
25
|
+
---------
|
26
|
+
agent_file : str | pathlib.Path
|
27
|
+
Either a parquet file (.pqt or .parquet) or pickle file (.pkl or .pickle)
|
28
|
+
containing the previously generated agent data.
|
29
|
+
|
30
|
+
Raises:
|
31
|
+
------
|
32
|
+
ValueError
|
33
|
+
Raised if the :py:attr:`agent_file` does not have a valid file extension for
|
34
|
+
either a pickle file (.pkl or .pickle) or a parquet file (.pqt or .parquet).
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self, agent_file: str | Path):
|
38
|
+
self.agent_file = Path(agent_file).resolve()
|
39
|
+
self.load_agents()
|
40
|
+
|
41
|
+
def load_agents(self):
|
42
|
+
"""Loads in the agent file and drops any indices."""
|
43
|
+
suffix = self.agent_file.suffix
|
44
|
+
if suffix in (".pqt", ".parquet"):
|
45
|
+
file_reader = pd.read_parquet
|
46
|
+
elif suffix in (".pkl", ".pickle"):
|
47
|
+
file_reader = pd.read_pickle
|
48
|
+
elif suffix == ".csv":
|
49
|
+
file_reader = pd.read_csv
|
50
|
+
else:
|
51
|
+
raise ValueError(
|
52
|
+
f"File types ending in {suffix} can't be read as pickle, parquet, or CSV"
|
53
|
+
)
|
54
|
+
|
55
|
+
self.agents = file_reader(self.agent_file)
|
56
|
+
if suffix == ".csv":
|
57
|
+
self.agents = self.agents.reset_index(drop=True)
|
58
|
+
|
59
|
+
if "state_fips" not in self.agents.columns:
|
60
|
+
self.agents["state_fips"] = self.agents["fips_code"].str[:2]
|
61
|
+
|
62
|
+
if "census_tract_id" not in self.agents.columns:
|
63
|
+
census_tracts = pd.read_csv(
|
64
|
+
"/projects/dwind/configs/sizing/wind/lkup_block_to_pgid_2020.csv",
|
65
|
+
dtype={"fips_block": str, "pgid": str},
|
66
|
+
)
|
67
|
+
census_tracts["census_tract_id"] = census_tracts["fips_block"].str[:11]
|
68
|
+
census_tracts = census_tracts[["pgid", "census_tract_id"]]
|
69
|
+
census_tracts = census_tracts.drop_duplicates()
|
70
|
+
self.agents = self.agents.merge(census_tracts, how="left", on="pgid")
|
71
|
+
self.agents = self.agents.drop_duplicates(subset=["gid"])
|
72
|
+
self.agents = self.agents.reset_index(drop=True)
|
73
|
+
|
74
|
+
|
75
|
+
class Model:
|
76
|
+
def __init__(
|
77
|
+
self,
|
78
|
+
agents: pd.DataFrame,
|
79
|
+
location: str,
|
80
|
+
sector: str,
|
81
|
+
scenario: str,
|
82
|
+
year: int,
|
83
|
+
out_path: str | Path,
|
84
|
+
model_config: str | Path,
|
85
|
+
chunk_ix: int | None = None,
|
86
|
+
):
|
87
|
+
if chunk_ix is None:
|
88
|
+
chunk_ix = 0
|
89
|
+
self.agents = agents
|
90
|
+
self.out_path = Path(out_path).resolve()
|
91
|
+
|
92
|
+
self.full_scenario = f"{location}_{sector}_{scenario}_{year}"
|
93
|
+
self.run_name = f"{self.full_scenario}_{chunk_ix}"
|
94
|
+
self.location = location
|
95
|
+
self.sector = sector
|
96
|
+
self.scenario = scenario
|
97
|
+
self.year = year
|
98
|
+
self.config = Configuration(model_config)
|
99
|
+
|
100
|
+
self.init_logging()
|
101
|
+
|
102
|
+
t_dict = self.config.rev.turbine_class_dict
|
103
|
+
if self.sector == "fom":
|
104
|
+
apps = ["BTM, FOM", "BTM, FOM, Utility", "FOM, Utility"]
|
105
|
+
self.agents["turbine_class"] = self.agents["wind_size_kw_fom"].map(t_dict)
|
106
|
+
else:
|
107
|
+
apps = ["BTM", "BTM, FOM", "BTM, FOM, Utility"]
|
108
|
+
self.agents["turbine_class"] = self.agents["wind_size_kw"].map(t_dict)
|
109
|
+
|
110
|
+
# filter by sector
|
111
|
+
self.agents = self.agents[self.agents["application"].isin(apps)]
|
112
|
+
|
113
|
+
def init_logging(self):
|
114
|
+
log_dir = self.out_path / "logs"
|
115
|
+
if not log_dir.exists():
|
116
|
+
log_dir.mkdir()
|
117
|
+
|
118
|
+
logging.basicConfig(
|
119
|
+
level=logging.INFO,
|
120
|
+
format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s",
|
121
|
+
handlers=[
|
122
|
+
logging.FileHandler(log_dir / "dwfs.txt"),
|
123
|
+
logging.StreamHandler(),
|
124
|
+
],
|
125
|
+
)
|
126
|
+
|
127
|
+
self.log = logging.getLogger("dwfs")
|
128
|
+
|
129
|
+
def get_gen(self, resource_year="2018"):
|
130
|
+
if resource_year != "2018":
|
131
|
+
return
|
132
|
+
|
133
|
+
# update 2012 rev index to 2018 index
|
134
|
+
f = "/projects/dwind/configs/rev/wind/lkup_rev_index_2012_to_2018.csv"
|
135
|
+
lkup = pd.read_csv(f)[["rev_index_wind_2012", "rev_index_wind_2018"]]
|
136
|
+
|
137
|
+
self.agents = (
|
138
|
+
self.agents.merge(
|
139
|
+
lkup, left_on="rev_index_wind", right_on="rev_index_wind_2012", how="left"
|
140
|
+
)
|
141
|
+
.drop(columns=["rev_index_wind", "rev_index_wind_2012"])
|
142
|
+
.rename(columns={"rev_index_wind_2018": "rev_index_wind"})
|
143
|
+
.dropna(subset="rev_index_wind")
|
144
|
+
)
|
145
|
+
|
146
|
+
# update 2012 rev cf/naep/aep to 2018 values
|
147
|
+
# self.agents = self.agents.drop(columns=["wind_naep", "wind_cf", "wind_aep"])
|
148
|
+
resource_potential = resource.ResourcePotential(
|
149
|
+
parcels=self.agents,
|
150
|
+
application=self.sector,
|
151
|
+
year=resource_year,
|
152
|
+
model_config=self.config,
|
153
|
+
)
|
154
|
+
self.agents = resource_potential.match_rev_summary_to_agents()
|
155
|
+
|
156
|
+
def get_rates(self):
|
157
|
+
self.agents = self.agents[~self.agents["rate_id_alias"].isna()]
|
158
|
+
self.agents["rate_id_alias"] = self.agents["rate_id_alias"].astype(int)
|
159
|
+
rate_ids = np.unique(self.agents.rate_id_alias.values)
|
160
|
+
|
161
|
+
tariff = (
|
162
|
+
pd.read_parquet("/projects/dwind/data/tariffs/2025_tariffs.pqt")
|
163
|
+
.loc[rate_ids]
|
164
|
+
.reset_index(drop=False) # , names="rate_id_alias")
|
165
|
+
)
|
166
|
+
self.tariff_columns = [
|
167
|
+
c for c in tariff.columns if c not in ("rate_id_alias", "tariff_name")
|
168
|
+
]
|
169
|
+
|
170
|
+
self.agents = self.agents.merge(tariff, how="left", on="rate_id_alias")
|
171
|
+
|
172
|
+
def get_load(self):
|
173
|
+
consumption_hourly = pd.read_parquet("/projects/dwind/data/crb_consumption_hourly.pqt")
|
174
|
+
|
175
|
+
consumption_hourly["scale_offset"] = 1e8
|
176
|
+
consumption_hourly = helper.scale_array_precision(
|
177
|
+
consumption_hourly, "consumption_hourly", "scale_offset"
|
178
|
+
)
|
179
|
+
|
180
|
+
self.agents = self.agents.merge(
|
181
|
+
consumption_hourly, how="left", on=["crb_model", "hdf_index"]
|
182
|
+
)
|
183
|
+
|
184
|
+
# update load based on scaling factors from 2024 consumption data
|
185
|
+
f = "/projects/dwind/data/parcel_landuse_load_application_mapping.csv"
|
186
|
+
bldg_types = pd.read_csv(f)[["land_use", "bldg_type"]]
|
187
|
+
self.agents = self.agents.merge(bldg_types, on="land_use", how="left")
|
188
|
+
|
189
|
+
f = "/projects/dwind/data/consumption/2024/load_scaling_factors.csv"
|
190
|
+
sfs = pd.read_csv(f, dtype={"state_fips": str})[["state_fips", "bldg_type", "load_sf_2024"]]
|
191
|
+
self.agents = self.agents.merge(sfs, on=["state_fips", "bldg_type"], how="left")
|
192
|
+
self.agents["load_kwh"] *= self.agents["load_sf_2024"]
|
193
|
+
self.agents["max_demand_kw"] *= self.agents["load_sf_2024"]
|
194
|
+
self.agents = self.agents.drop(columns=["load_sf_2024"])
|
195
|
+
|
196
|
+
if self.year > 2025:
|
197
|
+
# get county_id to nerc_region_abbr lkup
|
198
|
+
# from diffusion_shared.county_nerc_join (dgen_db_fy23q4_ss23)
|
199
|
+
f = "/projects/dwind/data/county_nerc_join.csv"
|
200
|
+
nerc_regions = pd.read_csv(f)[["county_id", "nerc_region_abbr"]]
|
201
|
+
self.agents = self.agents.merge(nerc_regions, on=["county_id"], how="left")
|
202
|
+
|
203
|
+
# get load growth projects from AEO
|
204
|
+
# from diffusion_shared.aeo_load_growth_projections_nerc_2023_updt (dgen_db_fy23q4_ss23)
|
205
|
+
f = "/projects/dwind/data/consumption/aeo_load_growth_projections_nerc_2023_updt.csv"
|
206
|
+
load_growth = pd.read_csv(f)
|
207
|
+
load_growth = load_growth.loc[
|
208
|
+
load_growth["scenario"].eq("AEO2023 Reference case")
|
209
|
+
& load_growth["year"].eq(self.year),
|
210
|
+
["nerc_region_abbr", "sector_abbr", "load_multiplier"],
|
211
|
+
]
|
212
|
+
|
213
|
+
# merge load growth projections
|
214
|
+
self.agents = self.agents.merge(
|
215
|
+
load_growth, on=["nerc_region_abbr", "sector_abbr"], how="left"
|
216
|
+
)
|
217
|
+
self.agents["load_kwh"] *= self.agents["load_multiplier"]
|
218
|
+
self.agents["max_demand_kw"] *= self.agents["load_multiplier"]
|
219
|
+
self.agents = self.agents.drop(columns=["load_multiplier", "nerc_region_abbr"])
|
220
|
+
|
221
|
+
self.agents = helper.scale_array_sum(self.agents, "consumption_hourly", "load_kwh")
|
222
|
+
|
223
|
+
def get_nem(self):
|
224
|
+
if self.scenario == "metering":
|
225
|
+
self.agents["compensation_style"] = "net metering"
|
226
|
+
self.agents["nem_system_kw_limit"] = 1000000000
|
227
|
+
elif self.scenario == "billing":
|
228
|
+
self.agents["compensation_style"] = "net billing"
|
229
|
+
self.agents["nem_system_kw_limit"] = 1000000000
|
230
|
+
else:
|
231
|
+
cols = ["state_abbr", "sector_abbr", "compensation_style", "nem_system_kw_limit"]
|
232
|
+
nem_scenario_csv = scenarios.config_nem(self.scenario, self.year)
|
233
|
+
nem_df = (
|
234
|
+
pd.read_csv(self.config.project.DIR / f"data/nem/{nem_scenario_csv}")
|
235
|
+
.rename(columns={"max_pv_kw_limit": "nem_system_kw_limit"})
|
236
|
+
.loc[:, cols]
|
237
|
+
)
|
238
|
+
|
239
|
+
self.agents = self.agents.merge(nem_df, how="left", on=["state_abbr", "sector_abbr"])
|
240
|
+
|
241
|
+
self.agents["compensation_style"] = self.agents["compensation_style"].fillna(
|
242
|
+
"net billing"
|
243
|
+
)
|
244
|
+
self.agents["nem_system_kw_limit"] = self.agents["nem_system_kw_limit"].fillna(0.0)
|
245
|
+
|
246
|
+
# check if selected system size by tech violate nem_system_kw_limit
|
247
|
+
for tech in self.config.project.settings.TECHS:
|
248
|
+
col = f"{tech}_size_kw_btm"
|
249
|
+
self.agents.loc[
|
250
|
+
(self.agents[col] > self.agents["nem_system_kw_limit"]), "compensation_style"
|
251
|
+
] = "net billing"
|
252
|
+
|
253
|
+
def prepare_agents(self):
|
254
|
+
# get generation data
|
255
|
+
self.log.info("....fetching resource information")
|
256
|
+
self.get_gen()
|
257
|
+
|
258
|
+
if self.sector == "btm":
|
259
|
+
# map tariffs
|
260
|
+
self.log.info("....running with pre-processed tariffs")
|
261
|
+
self.get_rates()
|
262
|
+
|
263
|
+
# get hourly consumption
|
264
|
+
self.log.info("....fetching hourly consumption")
|
265
|
+
self.get_load()
|
266
|
+
|
267
|
+
if self.config.project.settings.SIZE_SYSTEMS:
|
268
|
+
# size btm systems
|
269
|
+
self.log.info("....sizing BTM systems")
|
270
|
+
self.agents = btm_sizing.sizer(self.agents, self.config)
|
271
|
+
|
272
|
+
# map nem policies
|
273
|
+
self.log.info("....processing NEM for BTM systems")
|
274
|
+
self.get_nem()
|
275
|
+
|
276
|
+
if self.sector == "fom":
|
277
|
+
if self.config.project.settings.SIZE_SYSTEMS:
|
278
|
+
# for fom agents, take largest wind turbine
|
279
|
+
self.agents.sort_values(
|
280
|
+
by=["wind_turbine_kw", "turbine_height_m"],
|
281
|
+
ascending=[False, False],
|
282
|
+
inplace=True,
|
283
|
+
)
|
284
|
+
self.agents.drop_duplicates(subset=["gid"], inplace=True)
|
285
|
+
|
286
|
+
# track FOM techpot sizes
|
287
|
+
self.agents["solar_size_kw_techpot"] = self.agents["solar_size_kw_fom"]
|
288
|
+
self.agents["wind_size_kw_techpot"] = self.agents["wind_size_kw_fom"]
|
289
|
+
|
290
|
+
# handle FOM max system sizes
|
291
|
+
if "solar" in self.config.project.settings.TECHS:
|
292
|
+
mask = (
|
293
|
+
self.agents["solar_size_kw_fom"]
|
294
|
+
> self.config.siting["solar"]["max_fom_size_kw"]
|
295
|
+
)
|
296
|
+
self.agents.loc[mask, "solar_size_kw_fom"] = self.config.siting["solar"][
|
297
|
+
"max_fom_size_kw"
|
298
|
+
]
|
299
|
+
self.agents["solar_aep_fom"] = (
|
300
|
+
self.agents["solar_naep"] * self.agents["solar_size_kw_fom"]
|
301
|
+
)
|
302
|
+
|
303
|
+
if "wind" in self.config.project.settings.TECHS:
|
304
|
+
mask = (
|
305
|
+
self.agents["wind_size_kw_fom"]
|
306
|
+
> self.config.siting["wind"]["max_fom_size_kw"]
|
307
|
+
)
|
308
|
+
self.agents.loc[mask, "wind_size_kw_fom"] = self.config.siting["wind"][
|
309
|
+
"max_fom_size_kw"
|
310
|
+
]
|
311
|
+
self.agents["wind_aep_fom"] = (
|
312
|
+
self.agents["wind_naep"] * self.agents["wind_size_kw_fom"]
|
313
|
+
)
|
314
|
+
|
315
|
+
def run_valuation(self):
|
316
|
+
valuer = valuation.ValueFunctions(self.scenario, self.year, self.config)
|
317
|
+
|
318
|
+
if self.sector == "btm":
|
319
|
+
self.agents["application"] = "BTM"
|
320
|
+
|
321
|
+
if len(self.agents) > 0:
|
322
|
+
self.log.info("\n")
|
323
|
+
self.log.info(f"starting valuation for {len(self.agents)} BTM agents")
|
324
|
+
|
325
|
+
self.agents = valuer.run_multiprocessing(self.agents, sector="btm")
|
326
|
+
|
327
|
+
self.log.info("null counts:")
|
328
|
+
self.log.info(self.agents.isnull().sum().sort_values())
|
329
|
+
|
330
|
+
# save pickle
|
331
|
+
if self.config.project.settings.SAVE_APP_PARQUET:
|
332
|
+
if "wind_cf_hourly" in self.agents.columns:
|
333
|
+
self.agents.drop(columns=["wind_cf_hourly"], inplace=True, errors="ignore")
|
334
|
+
|
335
|
+
if "solar_cf_hourly" in self.agents.columns:
|
336
|
+
self.agents.drop(columns=["solar_cf_hourly"], inplace=True, errors="ignore")
|
337
|
+
|
338
|
+
self.agents.drop(columns=self.tariff_columns, inplace=True, errors="ignore")
|
339
|
+
|
340
|
+
f_out = self.out_path / f"{self.run_name}.pqt"
|
341
|
+
self.agents.to_parquet(f_out)
|
342
|
+
else:
|
343
|
+
self.agents = pd.DataFrame()
|
344
|
+
|
345
|
+
if self.sector == "fom":
|
346
|
+
self.agents["application"] = "FOM"
|
347
|
+
|
348
|
+
if len(self.agents) > 0:
|
349
|
+
self.log.info("\n")
|
350
|
+
self.log.info(f"starting valuation for {len(self.agents)} FOM agents")
|
351
|
+
|
352
|
+
self.agents = valuer.run_multiprocessing(self.agents, configuration="fom")
|
353
|
+
|
354
|
+
self.log.info("null counts:")
|
355
|
+
self.log.info(self.agents.isnull().sum().sort_values())
|
356
|
+
|
357
|
+
# --- save sector pickle ---
|
358
|
+
if self.config.project.settings.SAVE_APP_PARQUET:
|
359
|
+
if "wind_cf_hourly" in self.agents.columns:
|
360
|
+
self.agents.drop(columns=["wind_cf_hourly"], inplace=True, errors="ignore")
|
361
|
+
if "solar_cf_hourly" in self.agents.columns:
|
362
|
+
self.agents.drop(columns=["solar_cf_hourly"], inplace=True, errors="ignore")
|
363
|
+
|
364
|
+
f_out = self.out_path / f"{self.run_name}.pqt"
|
365
|
+
self.agents.to_parquet(f_out)
|
366
|
+
else:
|
367
|
+
self.agents = pd.DataFrame()
|
368
|
+
|
369
|
+
def run(self):
|
370
|
+
self.prepare_agents()
|
371
|
+
self.run_valuation()
|
dwind/mp.py
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import time
|
4
|
+
from pathlib import Path
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
from rex.utilities.hpc import SLURM
|
8
|
+
|
9
|
+
from dwind.helper import split_by_index
|
10
|
+
|
11
|
+
|
12
|
+
class MultiProcess:
|
13
|
+
"""Multiprocessing interface for running batch jobs via ``SLURM``.
|
14
|
+
|
15
|
+
Parameters
|
16
|
+
----------
|
17
|
+
location : str
|
18
|
+
The state name, or an underscore-separated string of "state_county"
|
19
|
+
sector : str
|
20
|
+
One of "fom" (front of meter) or "btm" (back of the meter).
|
21
|
+
scenario : str
|
22
|
+
An underscore-separated string for the scenario to be run.
|
23
|
+
year : int
|
24
|
+
The year-basis for the scenario.
|
25
|
+
env : str | Path
|
26
|
+
The path to the ``dwind`` Python environment that should be used to run the model.
|
27
|
+
n_nodes : int
|
28
|
+
Number of nodes to request from the HPC when running an ``sbatch`` job.
|
29
|
+
memory : int
|
30
|
+
Node memory, in GB.
|
31
|
+
walltime : int
|
32
|
+
Node walltime request, in hours.
|
33
|
+
alloc : str
|
34
|
+
The HPC project (allocation) handle that will be charged for running the analysis.
|
35
|
+
feature : str
|
36
|
+
Additional flags for the SLURM job, using formatting such as ``--qos=high`` or
|
37
|
+
``--depend=[state:job_id]``.
|
38
|
+
model_config : str
|
39
|
+
The full file path and name of where the model configuration file is located.
|
40
|
+
stdout_path : str | Path | None, optional
|
41
|
+
The path where all the stdout logs should be written to, by default None. When None,
|
42
|
+
":py:attr:`dir_out` / logs" is used.
|
43
|
+
dir_out : _type_, optional
|
44
|
+
The path to save the chunked results files, by default Path.getcwd() (current working
|
45
|
+
directory).
|
46
|
+
"""
|
47
|
+
|
48
|
+
def __init__(
|
49
|
+
self,
|
50
|
+
location: str,
|
51
|
+
sector: str,
|
52
|
+
scenario: str,
|
53
|
+
year: int,
|
54
|
+
env: str | Path,
|
55
|
+
n_nodes: int,
|
56
|
+
memory: int,
|
57
|
+
walltime: int,
|
58
|
+
allocation: str,
|
59
|
+
feature: str,
|
60
|
+
repository: str | Path,
|
61
|
+
model_config: str,
|
62
|
+
stdout_path: str | Path | None = None,
|
63
|
+
dir_out: str | Path | None = None,
|
64
|
+
):
|
65
|
+
"""Initialize the ``SLURM`` interface.
|
66
|
+
|
67
|
+
Parameters
|
68
|
+
----------
|
69
|
+
location : str
|
70
|
+
The state name, or an underscore-separated string of "state_county"
|
71
|
+
sector : str
|
72
|
+
One of "fom" (front of meter) or "btm" (back of the meter).
|
73
|
+
scenario : str
|
74
|
+
An underscore-separated string for the scenario to be run, such as "baseline_2022".
|
75
|
+
year : int
|
76
|
+
The year-basis for the scenario.
|
77
|
+
env : str | Path
|
78
|
+
The path to the ``dwind`` Python environment that should be used to run the model.
|
79
|
+
n_nodes : int
|
80
|
+
Number of nodes to request from the HPC when running an ``sbatch`` job.
|
81
|
+
memory : int
|
82
|
+
Node memory, in GB.
|
83
|
+
walltime : int
|
84
|
+
Node walltime request, in hours.
|
85
|
+
allocation : str
|
86
|
+
The HPC project (allocation) handle that will be charged for running the analysis.
|
87
|
+
feature : str
|
88
|
+
Additional flags for the SLURM job, using formatting such as ``--qos=high`` or
|
89
|
+
``--depend=[state:job_id]``.
|
90
|
+
repository : str | Path
|
91
|
+
The path to the dwind repository to use for analysis.
|
92
|
+
model_config : str
|
93
|
+
The full file path and name of where the model configuration file is located.
|
94
|
+
stdout_path : str | Path | None, optional
|
95
|
+
The path where all the stdout logs should be written to, by default None. When None,
|
96
|
+
":py:attr:`dir_out` / logs" is used.
|
97
|
+
dir_out : str | Path, optional
|
98
|
+
The path to save the chunked results files, by default Path.cwd() (current working
|
99
|
+
directory).
|
100
|
+
"""
|
101
|
+
self.run_name = f"{location}_{sector}_{scenario}_{year}"
|
102
|
+
self.location = location
|
103
|
+
self.sector = sector
|
104
|
+
self.scenario = scenario
|
105
|
+
self.year = year
|
106
|
+
self.env = env
|
107
|
+
self.n_nodes = n_nodes
|
108
|
+
self.memory = memory
|
109
|
+
self.walltime = walltime
|
110
|
+
self.alloc = allocation
|
111
|
+
self.feature = feature
|
112
|
+
self.stdout_path = stdout_path
|
113
|
+
self.dir_out = dir_out
|
114
|
+
self.repository = repository
|
115
|
+
self.model_config = model_config
|
116
|
+
|
117
|
+
# Create the output directory if it doesn't already exist
|
118
|
+
self.dir_out = Path.cwd() if dir_out is None else Path(self.dir_out).resolve()
|
119
|
+
self.out_path = self.dir_out / f"chunk_files_{self.run_name}"
|
120
|
+
if not self.out_path.exists():
|
121
|
+
self.out_path.mkdir()
|
122
|
+
|
123
|
+
# Create a new path in the output directory for the logs if a path is not provided
|
124
|
+
if self.stdout_path is None:
|
125
|
+
log_dir = self.out_path / "logs"
|
126
|
+
if not log_dir.exists():
|
127
|
+
log_dir.mkdir()
|
128
|
+
self.stdout_path = log_dir
|
129
|
+
|
130
|
+
def check_status(self, job_ids: list[int]):
|
131
|
+
"""Prints the status of all :py:attr:`jobs` submitted.
|
132
|
+
|
133
|
+
Parameters
|
134
|
+
----------
|
135
|
+
job_ids : list[int]
|
136
|
+
The list of HPC ``job_id``s to check on.
|
137
|
+
"""
|
138
|
+
hpc = SLURM()
|
139
|
+
print(f"{len(job_ids)} job(s) started")
|
140
|
+
|
141
|
+
jobs_status = {j: hpc.check_status(job_id=j) for j in job_ids}
|
142
|
+
n_remaining = len([s for s in jobs_status.values() if s in ("PD", "R")])
|
143
|
+
print(f"{n_remaining} job(s) remaining: {jobs_status}")
|
144
|
+
time.sleep(30)
|
145
|
+
|
146
|
+
while n_remaining > 0:
|
147
|
+
hpc = SLURM()
|
148
|
+
for job, status in jobs_status.items():
|
149
|
+
if status in ("GC", "None"):
|
150
|
+
continue
|
151
|
+
jobs_status.update({job: hpc.check_status(job_id=job)})
|
152
|
+
|
153
|
+
n_remaining = len([s for s in jobs_status.values() if s in ("PD", "R")])
|
154
|
+
print(f"{n_remaining} job(s) remaining: {jobs_status}")
|
155
|
+
if n_remaining > 0:
|
156
|
+
time.sleep(30)
|
157
|
+
|
158
|
+
def aggregate_outputs(self):
|
159
|
+
"""Collect the chunked results files, combine them into a single output parquet file, and
|
160
|
+
delete the chunked results files.
|
161
|
+
"""
|
162
|
+
result_files = [f for f in self.out_path.iterdir() if f.suffix in (".pickle", ".pkl")]
|
163
|
+
|
164
|
+
if len(result_files) > 0:
|
165
|
+
result_agents = pd.concat([pd.read_pickle(f) for f in result_files])
|
166
|
+
f_out = self.dir_out / f"run_{self.run_name}.pqt"
|
167
|
+
result_agents.to_parquet(f_out)
|
168
|
+
|
169
|
+
for f in result_files:
|
170
|
+
f.unlink()
|
171
|
+
|
172
|
+
def run_jobs(self, agent_df: pd.DataFrame) -> None:
|
173
|
+
"""Run :py:attr:`n_jobs` number of jobs for the :py:attr:`agent_df`.
|
174
|
+
|
175
|
+
Parameters
|
176
|
+
----------
|
177
|
+
agent_df : pandas.DataFrame
|
178
|
+
The agent DataFrame to be chunked and analyzed.
|
179
|
+
"""
|
180
|
+
agent_df = agent_df.reset_index(drop=True)
|
181
|
+
# chunks = np.array_split(agent_df, self.n_nodes)
|
182
|
+
starts, ends = split_by_index(agent_df, self.n_nodes)
|
183
|
+
jobs = []
|
184
|
+
|
185
|
+
base_cmd_str = f"module load conda; conda activate {self.env}; "
|
186
|
+
base_cmd_str += "dwind run-chunk "
|
187
|
+
|
188
|
+
base_args = f"--location {self.location} "
|
189
|
+
base_args += f"--sector {self.sector} "
|
190
|
+
base_args += f"--scenario {self.scenario} "
|
191
|
+
base_args += f"--year {self.year} "
|
192
|
+
base_args += f"--repository {self.repository} "
|
193
|
+
base_args += f"--model-config {self.model_config} "
|
194
|
+
base_args += f"--out-path {self.out_path}"
|
195
|
+
|
196
|
+
for i, (start, end) in enumerate(zip(starts, ends, strict=True)):
|
197
|
+
fn = self.out_path / f"agents_{i}.pqt"
|
198
|
+
agent_df.iloc[start:end].to_parquet(fn)
|
199
|
+
|
200
|
+
job_name = f"{self.run_name}_{i}"
|
201
|
+
cmd_str = f"{base_cmd_str} --chunk-ix {i} {base_args}"
|
202
|
+
print("cmd:", cmd_str)
|
203
|
+
|
204
|
+
slurm_manager = SLURM()
|
205
|
+
job_id, err = slurm_manager.sbatch(
|
206
|
+
cmd=cmd_str,
|
207
|
+
alloc=self.alloc,
|
208
|
+
memory=self.memory,
|
209
|
+
walltime=self.walltime,
|
210
|
+
feature=self.feature,
|
211
|
+
name=job_name,
|
212
|
+
stdout_path=self.stdout_path,
|
213
|
+
)
|
214
|
+
|
215
|
+
if job_id:
|
216
|
+
jobs.append(job_id)
|
217
|
+
print(f"Kicked off job: {job_name}, with SLURM {job_id=} on Eagle.")
|
218
|
+
else:
|
219
|
+
print(
|
220
|
+
f"{job_name=} was unable to be kicked off due to the following error:\n{err}."
|
221
|
+
)
|
222
|
+
|
223
|
+
# Check on the job statuses until they're complete, then aggregate the results
|
224
|
+
self.check_status(jobs)
|
225
|
+
self.aggregate_outputs()
|