dwind 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwind/__init__.py +1 -1
- dwind/btm_sizing.py +1 -2
- dwind/cli/__init__.py +0 -0
- dwind/cli/collect.py +114 -0
- dwind/cli/debug.py +137 -0
- dwind/cli/run.py +288 -0
- dwind/cli/utils.py +166 -0
- dwind/config.py +147 -6
- dwind/main.py +20 -0
- dwind/model.py +128 -63
- dwind/mp.py +30 -35
- dwind/resource.py +120 -41
- dwind/scenarios.py +73 -36
- dwind/utils/array.py +16 -89
- dwind/utils/hpc.py +44 -2
- dwind/utils/loader.py +63 -0
- dwind/utils/progress.py +60 -0
- dwind/valuation.py +368 -239
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/METADATA +2 -1
- dwind-0.3.2.dist-info/RECORD +28 -0
- dwind-0.3.2.dist-info/entry_points.txt +2 -0
- dwind-0.3.1.dist-info/RECORD +0 -20
- dwind-0.3.1.dist-info/entry_points.txt +0 -2
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/WHEEL +0 -0
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/licenses/LICENSE.txt +0 -0
- {dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/top_level.txt +0 -0
dwind/model.py
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
"""Provides the primary ``Agent`` and ``Model`` classes for loading, preparing, and analyzing
|
2
|
+
parcel data.
|
3
|
+
"""
|
4
|
+
|
1
5
|
from __future__ import annotations
|
2
6
|
|
3
7
|
import logging
|
@@ -7,8 +11,8 @@ from pathlib import Path
|
|
7
11
|
import numpy as np
|
8
12
|
import pandas as pd
|
9
13
|
|
10
|
-
from dwind import
|
11
|
-
from dwind.
|
14
|
+
from dwind import resource, scenarios, valuation, btm_sizing
|
15
|
+
from dwind.config import Year, Sector, CRBModel, Scenario, Configuration
|
12
16
|
|
13
17
|
|
14
18
|
# POTENTIALLY DANGEROUS!
|
@@ -21,18 +25,6 @@ class Agents:
|
|
21
25
|
Agents are the modified parcels that have been truncated to the largest circle able
|
22
26
|
to be contained in the parcel, and contain all of the relevant tax lot and
|
23
27
|
geographic variables that would be found in a parcel.
|
24
|
-
|
25
|
-
Parameters
|
26
|
-
---------
|
27
|
-
agent_file : str | pathlib.Path
|
28
|
-
Either a parquet file (.pqt or .parquet) or pickle file (.pkl or .pickle)
|
29
|
-
containing the previously generated agent data.
|
30
|
-
|
31
|
-
Raises:
|
32
|
-
------
|
33
|
-
ValueError
|
34
|
-
Raised if the :py:attr:`agent_file` does not have a valid file extension for
|
35
|
-
either a pickle file (.pkl or .pickle) or a parquet file (.pqt or .parquet).
|
36
28
|
"""
|
37
29
|
|
38
30
|
def __init__(
|
@@ -43,8 +35,23 @@ class Agents:
|
|
43
35
|
*,
|
44
36
|
resource_year: int = 2018,
|
45
37
|
):
|
38
|
+
"""Initialize an instance of the Agent class.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
agent_file (str | pathlib.Path): Either a parquet file (.pqt or .parquet), pickle
|
42
|
+
file (.pkl or .pickle), or CSV file (.csv) containing the previously generated
|
43
|
+
agent data.
|
44
|
+
sector (str | None): One of "fom" (front-of-meter), "btm" (behind-the-meter), or None.
|
45
|
+
Only use None if the agent data do not need to have the :py:meth:`prepare` method
|
46
|
+
be run.
|
47
|
+
model_config (str | Path, optional): Full file path to the overall model configuration
|
48
|
+
TOML file containing SQL connections, data locations, and etc.
|
49
|
+
resource_year (int, optional): The reV resource year basis. This should either be 2012
|
50
|
+
or 2018, but it is assumed the data were created using the 2012 reV lookups.
|
51
|
+
Defaults to 2018.
|
52
|
+
"""
|
46
53
|
self.agent_file = Path(agent_file).resolve()
|
47
|
-
self.sector = sector
|
54
|
+
self.sector = sector if sector is None else Sector(sector)
|
48
55
|
self.config = model_config
|
49
56
|
self.resource_year = resource_year
|
50
57
|
self._load_agents()
|
@@ -63,6 +70,9 @@ class Agents:
|
|
63
70
|
|
64
71
|
Args:
|
65
72
|
agent_file (str | Path): The full file path of the agent parquet, CSV, or pickle data.
|
73
|
+
sector (str): One of "fom" (front-of-meter) or "btm" (behind-the-meter).
|
74
|
+
model_config (str | Path, optional): Full file path to the overall model configuration
|
75
|
+
TOML file containing SQL connections, data locations, and etc.
|
66
76
|
save_results (bool, optional): True to save any updates to the data. Defaults to False.
|
67
77
|
file_name (str | Path | None, optional): The file path and name for where to save the
|
68
78
|
prepared data, if not overwriting the existing agent data. Defaults to None.
|
@@ -103,7 +113,7 @@ class Agents:
|
|
103
113
|
f"File types ending in {suffix} can't be read as pickle, parquet, or CSV"
|
104
114
|
)
|
105
115
|
|
106
|
-
self.agents = file_reader(self.agent_file)
|
116
|
+
self.agents = file_reader(self.agent_file, dtype_backend="pyarrow")
|
107
117
|
if suffix == ".csv":
|
108
118
|
self.agents = self.agents.reset_index(drop=True)
|
109
119
|
|
@@ -128,6 +138,15 @@ class Agents:
|
|
128
138
|
self.merge_generation()
|
129
139
|
|
130
140
|
def save_agents(self, file_name: str | Path | None = None):
|
141
|
+
"""Save the prepared agent data to the path provided in :py:attr:`file_name`.
|
142
|
+
|
143
|
+
Args:
|
144
|
+
file_name (str | Path, optional): Full file path and name for where to save the agent
|
145
|
+
data. Must end in a valid pickle, parquet, or csv extension. Defaults to None.
|
146
|
+
|
147
|
+
Raises:
|
148
|
+
ValueError: _description_
|
149
|
+
"""
|
131
150
|
if file_name is None:
|
132
151
|
file_name = self.agent_file
|
133
152
|
|
@@ -146,10 +165,14 @@ class Agents:
|
|
146
165
|
file_saver(file_name)
|
147
166
|
|
148
167
|
def merge_census_data(self):
|
168
|
+
"""Merges the census tract identifies based on the agent "gid" and census "pgid" identifier
|
169
|
+
columns.
|
170
|
+
"""
|
149
171
|
census_tracts = pd.read_csv(
|
150
172
|
"/projects/dwind/configs/sizing/wind/lkup_block_to_pgid_2020.csv",
|
151
173
|
usecols=["pgid", "fips_block"],
|
152
174
|
dtype=str,
|
175
|
+
dtype_backend="pyarrow",
|
153
176
|
).drop_duplicates()
|
154
177
|
census_tracts["census_tract_id"] = [el[:11] for el in census_tracts["fips_block"]]
|
155
178
|
self.agents = (
|
@@ -158,14 +181,18 @@ class Agents:
|
|
158
181
|
.reset_index(drop=True)
|
159
182
|
)
|
160
183
|
|
161
|
-
def update_rev_id(self
|
184
|
+
def update_rev_id(self):
|
162
185
|
"""Update 2012 rev index to 2018 index."""
|
163
|
-
if resource_year !=
|
186
|
+
if self.resource_year != 2018:
|
164
187
|
return
|
165
188
|
|
166
189
|
index_file = "/projects/dwind/configs/rev/wind/lkup_rev_index_2012_to_2018.csv"
|
167
190
|
rev_index_map = (
|
168
|
-
pd.read_csv(
|
191
|
+
pd.read_csv(
|
192
|
+
index_file,
|
193
|
+
usecols=["rev_index_wind_2012", "rev_index_wind_2018"],
|
194
|
+
dtype_backend="pyarrow",
|
195
|
+
)
|
169
196
|
.rename(columns={"rev_index_wind_2012": "rev_index_wind"})
|
170
197
|
.set_index("rev_index_wind")
|
171
198
|
)
|
@@ -190,21 +217,27 @@ class Agents:
|
|
190
217
|
)
|
191
218
|
|
192
219
|
def merge_generation(self):
|
193
|
-
|
220
|
+
"""Load and merge the resource potential data for the 2018 reV basis only. See
|
221
|
+
:py:class:`dwind.resource.ResourcePotential` for more information.
|
222
|
+
"""
|
223
|
+
if self.resource_year != 2018:
|
194
224
|
return
|
195
225
|
|
196
226
|
# update 2012 rev cf/naep/aep to 2018 values
|
197
|
-
# self.agents = self.agents.drop(columns=["wind_naep", "wind_cf", "wind_aep"])
|
198
227
|
resource_potential = resource.ResourcePotential(
|
199
228
|
parcels=self.agents,
|
200
|
-
|
229
|
+
sector=self.sector,
|
201
230
|
year=self.resource_year,
|
202
|
-
model_config=self.
|
231
|
+
model_config=self.config,
|
203
232
|
)
|
204
233
|
self.agents = resource_potential.match_rev_summary_to_agents()
|
205
234
|
|
206
235
|
|
207
236
|
class Model:
|
237
|
+
"""Primary dwind model that is responsible for gathering and connecting the various data points
|
238
|
+
by parcel.
|
239
|
+
"""
|
240
|
+
|
208
241
|
def __init__(
|
209
242
|
self,
|
210
243
|
agents: pd.DataFrame,
|
@@ -216,33 +249,51 @@ class Model:
|
|
216
249
|
model_config: str | Path,
|
217
250
|
chunk_ix: int | None = None,
|
218
251
|
):
|
252
|
+
"""Initializes a :py:class:`Model` instance.
|
253
|
+
|
254
|
+
Args:
|
255
|
+
agents (pd.DataFrame): Prepared agent data with correct census and reV resource data
|
256
|
+
already attached.
|
257
|
+
location (str): Priority class or "<state>_<county>" string.
|
258
|
+
sector (str): One of "fom" (front-of-meter) or "btm" (behind-the-meter).
|
259
|
+
scenario (str): Currently only accepts "baseline" as an input.
|
260
|
+
year (int): One of 2022, 2025, 2035, or 2040 for the analysis year.
|
261
|
+
out_path (str | Path): Path for where to save any logging or output data.
|
262
|
+
model_config (str | Path): The overarching model configuration TOML file containing
|
263
|
+
key SQL connectors, data locations, and general model settings.
|
264
|
+
chunk_ix (int | None, optional): Integer representation of the chunk being run, if whole
|
265
|
+
agent file is being run, then None. Defaults to None.
|
266
|
+
"""
|
219
267
|
if chunk_ix is None:
|
220
268
|
chunk_ix = 0
|
221
269
|
self.agents = agents
|
222
270
|
self.out_path = Path(out_path).resolve()
|
223
271
|
|
224
272
|
self.full_scenario = f"{location}_{sector}_{scenario}_{year}"
|
225
|
-
self.run_name = f"{self.full_scenario}
|
273
|
+
self.run_name = f"{self.full_scenario}"
|
274
|
+
if chunk_ix is not None:
|
275
|
+
self.run_name += f"_{chunk_ix}"
|
226
276
|
self.location = location
|
227
|
-
self.sector = sector
|
228
|
-
self.scenario = scenario
|
229
|
-
self.year = year
|
277
|
+
self.sector = Sector(sector)
|
278
|
+
self.scenario = Scenario(scenario)
|
279
|
+
self.year = Year(year)
|
230
280
|
self.config = Configuration(model_config)
|
231
281
|
|
232
|
-
self.
|
282
|
+
self._init_logging()
|
233
283
|
|
234
284
|
t_dict = self.config.rev.turbine_class_dict
|
235
|
-
if self.sector
|
285
|
+
if self.sector is Sector.FOM:
|
236
286
|
apps = ["BTM, FOM", "BTM, FOM, Utility", "FOM, Utility"]
|
237
287
|
self.agents["turbine_class"] = self.agents["wind_size_kw_fom"].map(t_dict)
|
238
|
-
|
288
|
+
elif self.sector is Sector.BTM:
|
239
289
|
apps = ["BTM", "BTM, FOM", "BTM, FOM, Utility"]
|
240
290
|
self.agents["turbine_class"] = self.agents["wind_size_kw"].map(t_dict)
|
241
291
|
|
242
292
|
# filter by sector
|
243
293
|
self.agents = self.agents[self.agents["application"].isin(apps)]
|
244
294
|
|
245
|
-
def
|
295
|
+
def _init_logging(self):
|
296
|
+
"""Initializing the logging to :py:attr:`out_path` / logs / dwfs.txt."""
|
246
297
|
log_dir = self.out_path / "logs"
|
247
298
|
if not log_dir.exists():
|
248
299
|
log_dir.mkdir()
|
@@ -258,13 +309,16 @@ class Model:
|
|
258
309
|
|
259
310
|
self.log = logging.getLogger("dwfs")
|
260
311
|
|
261
|
-
def
|
312
|
+
def _get_rates(self):
|
313
|
+
"""Retrieves the tariff rates and merges them based on the "rate_id_alias" column."""
|
262
314
|
self.agents = self.agents[~self.agents["rate_id_alias"].isna()]
|
263
315
|
self.agents["rate_id_alias"] = self.agents["rate_id_alias"].astype(int)
|
264
316
|
rate_ids = np.unique(self.agents.rate_id_alias.values)
|
265
317
|
|
266
318
|
tariff = (
|
267
|
-
pd.read_parquet(
|
319
|
+
pd.read_parquet(
|
320
|
+
"/projects/dwind/data/tariffs/2025_tariffs.pqt", dtype_backend="pyarrow"
|
321
|
+
)
|
268
322
|
.loc[rate_ids]
|
269
323
|
.reset_index(drop=False) # , names="rate_id_alias")
|
270
324
|
)
|
@@ -274,25 +328,27 @@ class Model:
|
|
274
328
|
|
275
329
|
self.agents = self.agents.merge(tariff, how="left", on="rate_id_alias")
|
276
330
|
|
277
|
-
def
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
self.agents = self.agents.merge(
|
286
|
-
consumption_hourly, how="left", on=["crb_model", "hdf_index"]
|
287
|
-
)
|
331
|
+
def _get_load(self):
|
332
|
+
"""Retrieves the energy demand data, combines it with the agent data based on
|
333
|
+
the agent "land_use", "bldg_type", and "state_fips" columns, then scales the
|
334
|
+
consumption data based on the load.
|
335
|
+
"""
|
336
|
+
self.agents["crb_model_index"] = self.agents.crb_model.replace(
|
337
|
+
CRBModel.str_model_map()
|
338
|
+
).astype("uint8[pyarrow]")
|
288
339
|
|
289
340
|
# update load based on scaling factors from 2024 consumption data
|
290
341
|
f = "/projects/dwind/data/parcel_landuse_load_application_mapping.csv"
|
291
|
-
bldg_types = pd.read_csv(f
|
342
|
+
bldg_types = pd.read_csv(f, usecols=["land_use", "bldg_type"], dtype_backend="pyarrow")
|
292
343
|
self.agents = self.agents.merge(bldg_types, on="land_use", how="left")
|
293
344
|
|
294
345
|
f = "/projects/dwind/data/consumption/2024/load_scaling_factors.csv"
|
295
|
-
sfs = pd.read_csv(
|
346
|
+
sfs = pd.read_csv(
|
347
|
+
f,
|
348
|
+
dtype={"state_fips": str},
|
349
|
+
usecols=["state_fips", "bldg_type", "load_sf_2024"],
|
350
|
+
dtype_backend="pyarrow",
|
351
|
+
)
|
296
352
|
self.agents = self.agents.merge(sfs, on=["state_fips", "bldg_type"], how="left")
|
297
353
|
self.agents["load_kwh"] *= self.agents["load_sf_2024"]
|
298
354
|
self.agents["max_demand_kw"] *= self.agents["load_sf_2024"]
|
@@ -302,13 +358,15 @@ class Model:
|
|
302
358
|
# get county_id to nerc_region_abbr lkup
|
303
359
|
# from diffusion_shared.county_nerc_join (dgen_db_fy23q4_ss23)
|
304
360
|
f = "/projects/dwind/data/county_nerc_join.csv"
|
305
|
-
nerc_regions = pd.read_csv(
|
361
|
+
nerc_regions = pd.read_csv(
|
362
|
+
f, usecols=["county_id", "nerc_region_abbr"], dtype_backend="pyarrow"
|
363
|
+
)
|
306
364
|
self.agents = self.agents.merge(nerc_regions, on=["county_id"], how="left")
|
307
365
|
|
308
366
|
# get load growth projects from AEO
|
309
367
|
# from diffusion_shared.aeo_load_growth_projections_nerc_2023_updt (dgen_db_fy23q4_ss23)
|
310
368
|
f = "/projects/dwind/data/consumption/aeo_load_growth_projections_nerc_2023_updt.csv"
|
311
|
-
load_growth = pd.read_csv(f)
|
369
|
+
load_growth = pd.read_csv(f, dtype_backend="pyarrow")
|
312
370
|
load_growth = load_growth.loc[
|
313
371
|
load_growth["scenario"].eq("AEO2023 Reference case")
|
314
372
|
& load_growth["year"].eq(self.year),
|
@@ -323,20 +381,24 @@ class Model:
|
|
323
381
|
self.agents["max_demand_kw"] *= self.agents["load_multiplier"]
|
324
382
|
self.agents = self.agents.drop(columns=["load_multiplier", "nerc_region_abbr"])
|
325
383
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
384
|
+
def _get_nem(self):
|
385
|
+
"""Retrieves the NEM data and merges with the agent data based on the "state_abbr" and
|
386
|
+
"sector_abbr" columns.
|
387
|
+
"""
|
388
|
+
if self.scenario is Scenario.METERING:
|
330
389
|
self.agents["compensation_style"] = "net metering"
|
331
390
|
self.agents["nem_system_kw_limit"] = 1000000000
|
332
|
-
elif self.scenario
|
391
|
+
elif self.scenario is Scenario.BILLING:
|
333
392
|
self.agents["compensation_style"] = "net billing"
|
334
393
|
self.agents["nem_system_kw_limit"] = 1000000000
|
335
394
|
else:
|
336
395
|
cols = ["state_abbr", "sector_abbr", "compensation_style", "nem_system_kw_limit"]
|
337
396
|
nem_scenario_csv = scenarios.config_nem(self.scenario, self.year)
|
338
397
|
nem_df = (
|
339
|
-
pd.read_csv(
|
398
|
+
pd.read_csv(
|
399
|
+
self.config.project.DIR / f"data/nem/{nem_scenario_csv}",
|
400
|
+
dtype_backend="pyarrow",
|
401
|
+
)
|
340
402
|
.rename(columns={"max_pv_kw_limit": "nem_system_kw_limit"})
|
341
403
|
.loc[:, cols]
|
342
404
|
)
|
@@ -356,14 +418,15 @@ class Model:
|
|
356
418
|
] = "net billing"
|
357
419
|
|
358
420
|
def prepare_agents(self):
|
359
|
-
|
421
|
+
"""Prepare the :py:attr:`tech`- and :py:attr:`sector`-specific agent data."""
|
422
|
+
if self.sector is Sector.BTM:
|
360
423
|
# map tariffs
|
361
424
|
self.log.info("....running with pre-processed tariffs")
|
362
|
-
self.
|
425
|
+
self._get_rates()
|
363
426
|
|
364
427
|
# get hourly consumption
|
365
428
|
self.log.info("....fetching hourly consumption")
|
366
|
-
self.
|
429
|
+
self._get_load()
|
367
430
|
|
368
431
|
if self.config.project.settings.SIZE_SYSTEMS:
|
369
432
|
# size btm systems
|
@@ -372,9 +435,9 @@ class Model:
|
|
372
435
|
|
373
436
|
# map nem policies
|
374
437
|
self.log.info("....processing NEM for BTM systems")
|
375
|
-
self.
|
438
|
+
self._get_nem()
|
376
439
|
|
377
|
-
if self.sector
|
440
|
+
if self.sector is Sector.FOM:
|
378
441
|
if self.config.project.settings.SIZE_SYSTEMS:
|
379
442
|
# for fom agents, take largest wind turbine
|
380
443
|
self.agents.sort_values(
|
@@ -414,16 +477,17 @@ class Model:
|
|
414
477
|
)
|
415
478
|
|
416
479
|
def run_valuation(self):
|
480
|
+
"""Runs the valuation model to create the PySAM financial results."""
|
417
481
|
valuer = valuation.ValueFunctions(self.scenario, self.year, self.config)
|
418
482
|
|
419
|
-
if self.sector
|
483
|
+
if self.sector is Sector.BTM:
|
420
484
|
self.agents["application"] = "BTM"
|
421
485
|
|
422
486
|
if len(self.agents) > 0:
|
423
487
|
self.log.info("\n")
|
424
488
|
self.log.info(f"starting valuation for {len(self.agents)} BTM agents")
|
425
489
|
|
426
|
-
self.agents = valuer.
|
490
|
+
self.agents = valuer.run(agents=self.agents, sector=self.sector)
|
427
491
|
|
428
492
|
self.log.info("null counts:")
|
429
493
|
self.log.info(self.agents.isnull().sum().sort_values())
|
@@ -443,14 +507,14 @@ class Model:
|
|
443
507
|
else:
|
444
508
|
self.agents = pd.DataFrame()
|
445
509
|
|
446
|
-
if self.sector
|
510
|
+
if self.sector is Sector.FOM:
|
447
511
|
self.agents["application"] = "FOM"
|
448
512
|
|
449
513
|
if len(self.agents) > 0:
|
450
514
|
self.log.info("\n")
|
451
515
|
self.log.info(f"starting valuation for {len(self.agents)} FOM agents")
|
452
516
|
|
453
|
-
self.agents = valuer.
|
517
|
+
self.agents = valuer.run(agents=self.agents, sector=self.sector)
|
454
518
|
|
455
519
|
self.log.info("null counts:")
|
456
520
|
self.log.info(self.agents.isnull().sum().sort_values())
|
@@ -468,5 +532,6 @@ class Model:
|
|
468
532
|
self.agents = pd.DataFrame()
|
469
533
|
|
470
534
|
def run(self):
|
535
|
+
"""Runs the whole model."""
|
471
536
|
self.prepare_agents()
|
472
537
|
self.run_valuation()
|
dwind/mp.py
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
"""Provides the :py:class:`MultiProcess` class for running a model on `NREL's Kestrel HPC system`_.
|
2
|
+
|
3
|
+
.. NREL's Kestrel HPC system: https://nrel.github.io/HPC/Documentation/Systems/Kestrel/
|
4
|
+
"""
|
5
|
+
|
1
6
|
from __future__ import annotations
|
2
7
|
|
3
8
|
import time
|
@@ -118,7 +123,7 @@ class MultiProcess:
|
|
118
123
|
|
119
124
|
# Create the output directory if it doesn't already exist
|
120
125
|
self.dir_out = Path.cwd() if dir_out is None else Path(self.dir_out).resolve()
|
121
|
-
self.out_path = self.dir_out /
|
126
|
+
self.out_path = self.dir_out / "chunk_files"
|
122
127
|
if not self.out_path.exists():
|
123
128
|
self.out_path.mkdir()
|
124
129
|
|
@@ -149,57 +154,46 @@ class MultiProcess:
|
|
149
154
|
}
|
150
155
|
for j in job_ids
|
151
156
|
}
|
152
|
-
table, complete = hpc.
|
157
|
+
table, complete = hpc.generate_run_status_table(job_status)
|
153
158
|
with Live(table, refresh_per_second=1) as live:
|
154
159
|
while not complete:
|
155
160
|
time.sleep(5)
|
156
161
|
job_status |= hpc.update_status(job_status)
|
157
|
-
table, complete = hpc.
|
162
|
+
table, complete = hpc.generate_run_status_table(job_status)
|
158
163
|
live.update(table)
|
159
164
|
|
160
|
-
def
|
161
|
-
"""Collect the chunked results files, combine them into a single output parquet file, and
|
162
|
-
delete the chunked results files.
|
163
|
-
"""
|
164
|
-
result_files = [f for f in self.out_path.iterdir() if f.suffix == (".pqt")]
|
165
|
-
|
166
|
-
if len(result_files) > 0:
|
167
|
-
result_agents = pd.concat([pd.read_parquet(f) for f in result_files])
|
168
|
-
f_out = self.dir_out / f"run_{self.run_name}.pqt"
|
169
|
-
result_agents.to_parquet(f_out)
|
170
|
-
print(f"Aggregated results saved to: {f_out}")
|
171
|
-
|
172
|
-
for f in result_files:
|
173
|
-
f.unlink()
|
174
|
-
|
175
|
-
def run_jobs(self, agent_df: pd.DataFrame) -> None:
|
165
|
+
def run_jobs(self, agent_df: pd.DataFrame) -> dict[str, int]:
|
176
166
|
"""Run :py:attr:`n_jobs` number of jobs for the :py:attr:`agent_df`.
|
177
167
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
168
|
+
Args:
|
169
|
+
agent_df (pandas.DataFrame): The agent DataFrame to be chunked and analyzed.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
dict[str, int]: Dictionary mapping of each SLURM job id to the chunk run in that job.
|
182
173
|
"""
|
183
174
|
agent_df = agent_df.reset_index(drop=True)
|
184
175
|
# chunks = np.array_split(agent_df, self.n_nodes)
|
185
176
|
starts, ends = split_by_index(agent_df, self.n_nodes)
|
186
|
-
|
177
|
+
job_chunk_map = {}
|
187
178
|
|
188
|
-
base_cmd_str = f"module load conda; conda activate {self.env};
|
189
|
-
base_cmd_str += "dwind run
|
179
|
+
base_cmd_str = f"module load conda; conda activate {self.env};"
|
180
|
+
base_cmd_str += " dwind run chunk"
|
190
181
|
|
191
|
-
base_args = f" {self.location}
|
192
|
-
base_args += f" {self.sector}
|
193
|
-
base_args += f" {self.scenario}
|
194
|
-
base_args += f" {self.year}
|
182
|
+
base_args = f" {self.location}"
|
183
|
+
base_args += f" {self.sector}"
|
184
|
+
base_args += f" {self.scenario}"
|
185
|
+
base_args += f" {self.year}"
|
195
186
|
base_args += f" {self.out_path}"
|
196
|
-
base_args += f" {self.repository}
|
197
|
-
base_args += f" {self.model_config}
|
187
|
+
base_args += f" {self.repository}"
|
188
|
+
base_args += f" {self.model_config}"
|
189
|
+
|
190
|
+
if not (agent_path := self.out_path / "agent_chunks").is_dir():
|
191
|
+
agent_path.mkdir()
|
198
192
|
|
199
193
|
start_time = time.perf_counter()
|
200
194
|
# for i, (start, end) in enumerate(zip(starts, ends, strict=True)):
|
201
195
|
for i, (start, end) in enumerate(zip(starts, ends)): # noqa: B905
|
202
|
-
fn = self.out_path / f"agents_{i}.pqt"
|
196
|
+
fn = self.out_path / "agent_chunks" / f"agents_{i}.pqt"
|
203
197
|
agent_df.iloc[start:end].to_parquet(fn)
|
204
198
|
|
205
199
|
job_name = f"{self.run_name}_{i}"
|
@@ -218,7 +212,7 @@ class MultiProcess:
|
|
218
212
|
)
|
219
213
|
|
220
214
|
if job_id:
|
221
|
-
|
215
|
+
job_chunk_map[job_id] = i
|
222
216
|
print(f"Kicked off job: {job_name}, with SLURM {job_id=} on Eagle.")
|
223
217
|
else:
|
224
218
|
print(
|
@@ -226,5 +220,6 @@ class MultiProcess:
|
|
226
220
|
)
|
227
221
|
|
228
222
|
# Check on the job statuses until they're complete, then aggregate the results
|
223
|
+
jobs = [*job_chunk_map]
|
229
224
|
self.check_status(jobs, start_time)
|
230
|
-
|
225
|
+
return job_chunk_map
|