dwind 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwind/model.py ADDED
@@ -0,0 +1,371 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import warnings
5
+ from pathlib import Path
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from dwind import Configuration, helper, resource, scenarios, valuation, btm_sizing
11
+
12
+
13
+ # POTENTIALLY DANGEROUS!
14
+ warnings.filterwarnings("ignore")
15
+
16
+
17
+ class Agents:
18
+ """Reponsible for reading in the agent data and storing it for the ``Model`` class.
19
+
20
+ Agents are the modified parcels that have been truncated to the largest circle able
21
+ to be contained in the parcel, and contain all of the relevant tax lot and
22
+ geographic variables that would be found in a parcel.
23
+
24
+ Parameters
25
+ ---------
26
+ agent_file : str | pathlib.Path
27
+ Either a parquet file (.pqt or .parquet) or pickle file (.pkl or .pickle)
28
+ containing the previously generated agent data.
29
+
30
+ Raises:
31
+ ------
32
+ ValueError
33
+ Raised if the :py:attr:`agent_file` does not have a valid file extension for
34
+ either a pickle file (.pkl or .pickle) or a parquet file (.pqt or .parquet).
35
+ """
36
+
37
+ def __init__(self, agent_file: str | Path):
38
+ self.agent_file = Path(agent_file).resolve()
39
+ self.load_agents()
40
+
41
+ def load_agents(self):
42
+ """Loads in the agent file and drops any indices."""
43
+ suffix = self.agent_file.suffix
44
+ if suffix in (".pqt", ".parquet"):
45
+ file_reader = pd.read_parquet
46
+ elif suffix in (".pkl", ".pickle"):
47
+ file_reader = pd.read_pickle
48
+ elif suffix == ".csv":
49
+ file_reader = pd.read_csv
50
+ else:
51
+ raise ValueError(
52
+ f"File types ending in {suffix} can't be read as pickle, parquet, or CSV"
53
+ )
54
+
55
+ self.agents = file_reader(self.agent_file)
56
+ if suffix == ".csv":
57
+ self.agents = self.agents.reset_index(drop=True)
58
+
59
+ if "state_fips" not in self.agents.columns:
60
+ self.agents["state_fips"] = self.agents["fips_code"].str[:2]
61
+
62
+ if "census_tract_id" not in self.agents.columns:
63
+ census_tracts = pd.read_csv(
64
+ "/projects/dwind/configs/sizing/wind/lkup_block_to_pgid_2020.csv",
65
+ dtype={"fips_block": str, "pgid": str},
66
+ )
67
+ census_tracts["census_tract_id"] = census_tracts["fips_block"].str[:11]
68
+ census_tracts = census_tracts[["pgid", "census_tract_id"]]
69
+ census_tracts = census_tracts.drop_duplicates()
70
+ self.agents = self.agents.merge(census_tracts, how="left", on="pgid")
71
+ self.agents = self.agents.drop_duplicates(subset=["gid"])
72
+ self.agents = self.agents.reset_index(drop=True)
73
+
74
+
75
+ class Model:
76
+ def __init__(
77
+ self,
78
+ agents: pd.DataFrame,
79
+ location: str,
80
+ sector: str,
81
+ scenario: str,
82
+ year: int,
83
+ out_path: str | Path,
84
+ model_config: str | Path,
85
+ chunk_ix: int | None = None,
86
+ ):
87
+ if chunk_ix is None:
88
+ chunk_ix = 0
89
+ self.agents = agents
90
+ self.out_path = Path(out_path).resolve()
91
+
92
+ self.full_scenario = f"{location}_{sector}_{scenario}_{year}"
93
+ self.run_name = f"{self.full_scenario}_{chunk_ix}"
94
+ self.location = location
95
+ self.sector = sector
96
+ self.scenario = scenario
97
+ self.year = year
98
+ self.config = Configuration(model_config)
99
+
100
+ self.init_logging()
101
+
102
+ t_dict = self.config.rev.turbine_class_dict
103
+ if self.sector == "fom":
104
+ apps = ["BTM, FOM", "BTM, FOM, Utility", "FOM, Utility"]
105
+ self.agents["turbine_class"] = self.agents["wind_size_kw_fom"].map(t_dict)
106
+ else:
107
+ apps = ["BTM", "BTM, FOM", "BTM, FOM, Utility"]
108
+ self.agents["turbine_class"] = self.agents["wind_size_kw"].map(t_dict)
109
+
110
+ # filter by sector
111
+ self.agents = self.agents[self.agents["application"].isin(apps)]
112
+
113
+ def init_logging(self):
114
+ log_dir = self.out_path / "logs"
115
+ if not log_dir.exists():
116
+ log_dir.mkdir()
117
+
118
+ logging.basicConfig(
119
+ level=logging.INFO,
120
+ format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s",
121
+ handlers=[
122
+ logging.FileHandler(log_dir / "dwfs.txt"),
123
+ logging.StreamHandler(),
124
+ ],
125
+ )
126
+
127
+ self.log = logging.getLogger("dwfs")
128
+
129
+ def get_gen(self, resource_year="2018"):
130
+ if resource_year != "2018":
131
+ return
132
+
133
+ # update 2012 rev index to 2018 index
134
+ f = "/projects/dwind/configs/rev/wind/lkup_rev_index_2012_to_2018.csv"
135
+ lkup = pd.read_csv(f)[["rev_index_wind_2012", "rev_index_wind_2018"]]
136
+
137
+ self.agents = (
138
+ self.agents.merge(
139
+ lkup, left_on="rev_index_wind", right_on="rev_index_wind_2012", how="left"
140
+ )
141
+ .drop(columns=["rev_index_wind", "rev_index_wind_2012"])
142
+ .rename(columns={"rev_index_wind_2018": "rev_index_wind"})
143
+ .dropna(subset="rev_index_wind")
144
+ )
145
+
146
+ # update 2012 rev cf/naep/aep to 2018 values
147
+ # self.agents = self.agents.drop(columns=["wind_naep", "wind_cf", "wind_aep"])
148
+ resource_potential = resource.ResourcePotential(
149
+ parcels=self.agents,
150
+ application=self.sector,
151
+ year=resource_year,
152
+ model_config=self.config,
153
+ )
154
+ self.agents = resource_potential.match_rev_summary_to_agents()
155
+
156
+ def get_rates(self):
157
+ self.agents = self.agents[~self.agents["rate_id_alias"].isna()]
158
+ self.agents["rate_id_alias"] = self.agents["rate_id_alias"].astype(int)
159
+ rate_ids = np.unique(self.agents.rate_id_alias.values)
160
+
161
+ tariff = (
162
+ pd.read_parquet("/projects/dwind/data/tariffs/2025_tariffs.pqt")
163
+ .loc[rate_ids]
164
+ .reset_index(drop=False) # , names="rate_id_alias")
165
+ )
166
+ self.tariff_columns = [
167
+ c for c in tariff.columns if c not in ("rate_id_alias", "tariff_name")
168
+ ]
169
+
170
+ self.agents = self.agents.merge(tariff, how="left", on="rate_id_alias")
171
+
172
+ def get_load(self):
173
+ consumption_hourly = pd.read_parquet("/projects/dwind/data/crb_consumption_hourly.pqt")
174
+
175
+ consumption_hourly["scale_offset"] = 1e8
176
+ consumption_hourly = helper.scale_array_precision(
177
+ consumption_hourly, "consumption_hourly", "scale_offset"
178
+ )
179
+
180
+ self.agents = self.agents.merge(
181
+ consumption_hourly, how="left", on=["crb_model", "hdf_index"]
182
+ )
183
+
184
+ # update load based on scaling factors from 2024 consumption data
185
+ f = "/projects/dwind/data/parcel_landuse_load_application_mapping.csv"
186
+ bldg_types = pd.read_csv(f)[["land_use", "bldg_type"]]
187
+ self.agents = self.agents.merge(bldg_types, on="land_use", how="left")
188
+
189
+ f = "/projects/dwind/data/consumption/2024/load_scaling_factors.csv"
190
+ sfs = pd.read_csv(f, dtype={"state_fips": str})[["state_fips", "bldg_type", "load_sf_2024"]]
191
+ self.agents = self.agents.merge(sfs, on=["state_fips", "bldg_type"], how="left")
192
+ self.agents["load_kwh"] *= self.agents["load_sf_2024"]
193
+ self.agents["max_demand_kw"] *= self.agents["load_sf_2024"]
194
+ self.agents = self.agents.drop(columns=["load_sf_2024"])
195
+
196
+ if self.year > 2025:
197
+ # get county_id to nerc_region_abbr lkup
198
+ # from diffusion_shared.county_nerc_join (dgen_db_fy23q4_ss23)
199
+ f = "/projects/dwind/data/county_nerc_join.csv"
200
+ nerc_regions = pd.read_csv(f)[["county_id", "nerc_region_abbr"]]
201
+ self.agents = self.agents.merge(nerc_regions, on=["county_id"], how="left")
202
+
203
+ # get load growth projects from AEO
204
+ # from diffusion_shared.aeo_load_growth_projections_nerc_2023_updt (dgen_db_fy23q4_ss23)
205
+ f = "/projects/dwind/data/consumption/aeo_load_growth_projections_nerc_2023_updt.csv"
206
+ load_growth = pd.read_csv(f)
207
+ load_growth = load_growth.loc[
208
+ load_growth["scenario"].eq("AEO2023 Reference case")
209
+ & load_growth["year"].eq(self.year),
210
+ ["nerc_region_abbr", "sector_abbr", "load_multiplier"],
211
+ ]
212
+
213
+ # merge load growth projections
214
+ self.agents = self.agents.merge(
215
+ load_growth, on=["nerc_region_abbr", "sector_abbr"], how="left"
216
+ )
217
+ self.agents["load_kwh"] *= self.agents["load_multiplier"]
218
+ self.agents["max_demand_kw"] *= self.agents["load_multiplier"]
219
+ self.agents = self.agents.drop(columns=["load_multiplier", "nerc_region_abbr"])
220
+
221
+ self.agents = helper.scale_array_sum(self.agents, "consumption_hourly", "load_kwh")
222
+
223
+ def get_nem(self):
224
+ if self.scenario == "metering":
225
+ self.agents["compensation_style"] = "net metering"
226
+ self.agents["nem_system_kw_limit"] = 1000000000
227
+ elif self.scenario == "billing":
228
+ self.agents["compensation_style"] = "net billing"
229
+ self.agents["nem_system_kw_limit"] = 1000000000
230
+ else:
231
+ cols = ["state_abbr", "sector_abbr", "compensation_style", "nem_system_kw_limit"]
232
+ nem_scenario_csv = scenarios.config_nem(self.scenario, self.year)
233
+ nem_df = (
234
+ pd.read_csv(self.config.project.DIR / f"data/nem/{nem_scenario_csv}")
235
+ .rename(columns={"max_pv_kw_limit": "nem_system_kw_limit"})
236
+ .loc[:, cols]
237
+ )
238
+
239
+ self.agents = self.agents.merge(nem_df, how="left", on=["state_abbr", "sector_abbr"])
240
+
241
+ self.agents["compensation_style"] = self.agents["compensation_style"].fillna(
242
+ "net billing"
243
+ )
244
+ self.agents["nem_system_kw_limit"] = self.agents["nem_system_kw_limit"].fillna(0.0)
245
+
246
+ # check if selected system size by tech violate nem_system_kw_limit
247
+ for tech in self.config.project.settings.TECHS:
248
+ col = f"{tech}_size_kw_btm"
249
+ self.agents.loc[
250
+ (self.agents[col] > self.agents["nem_system_kw_limit"]), "compensation_style"
251
+ ] = "net billing"
252
+
253
+ def prepare_agents(self):
254
+ # get generation data
255
+ self.log.info("....fetching resource information")
256
+ self.get_gen()
257
+
258
+ if self.sector == "btm":
259
+ # map tariffs
260
+ self.log.info("....running with pre-processed tariffs")
261
+ self.get_rates()
262
+
263
+ # get hourly consumption
264
+ self.log.info("....fetching hourly consumption")
265
+ self.get_load()
266
+
267
+ if self.config.project.settings.SIZE_SYSTEMS:
268
+ # size btm systems
269
+ self.log.info("....sizing BTM systems")
270
+ self.agents = btm_sizing.sizer(self.agents, self.config)
271
+
272
+ # map nem policies
273
+ self.log.info("....processing NEM for BTM systems")
274
+ self.get_nem()
275
+
276
+ if self.sector == "fom":
277
+ if self.config.project.settings.SIZE_SYSTEMS:
278
+ # for fom agents, take largest wind turbine
279
+ self.agents.sort_values(
280
+ by=["wind_turbine_kw", "turbine_height_m"],
281
+ ascending=[False, False],
282
+ inplace=True,
283
+ )
284
+ self.agents.drop_duplicates(subset=["gid"], inplace=True)
285
+
286
+ # track FOM techpot sizes
287
+ self.agents["solar_size_kw_techpot"] = self.agents["solar_size_kw_fom"]
288
+ self.agents["wind_size_kw_techpot"] = self.agents["wind_size_kw_fom"]
289
+
290
+ # handle FOM max system sizes
291
+ if "solar" in self.config.project.settings.TECHS:
292
+ mask = (
293
+ self.agents["solar_size_kw_fom"]
294
+ > self.config.siting["solar"]["max_fom_size_kw"]
295
+ )
296
+ self.agents.loc[mask, "solar_size_kw_fom"] = self.config.siting["solar"][
297
+ "max_fom_size_kw"
298
+ ]
299
+ self.agents["solar_aep_fom"] = (
300
+ self.agents["solar_naep"] * self.agents["solar_size_kw_fom"]
301
+ )
302
+
303
+ if "wind" in self.config.project.settings.TECHS:
304
+ mask = (
305
+ self.agents["wind_size_kw_fom"]
306
+ > self.config.siting["wind"]["max_fom_size_kw"]
307
+ )
308
+ self.agents.loc[mask, "wind_size_kw_fom"] = self.config.siting["wind"][
309
+ "max_fom_size_kw"
310
+ ]
311
+ self.agents["wind_aep_fom"] = (
312
+ self.agents["wind_naep"] * self.agents["wind_size_kw_fom"]
313
+ )
314
+
315
+ def run_valuation(self):
316
+ valuer = valuation.ValueFunctions(self.scenario, self.year, self.config)
317
+
318
+ if self.sector == "btm":
319
+ self.agents["application"] = "BTM"
320
+
321
+ if len(self.agents) > 0:
322
+ self.log.info("\n")
323
+ self.log.info(f"starting valuation for {len(self.agents)} BTM agents")
324
+
325
+ self.agents = valuer.run_multiprocessing(self.agents, sector="btm")
326
+
327
+ self.log.info("null counts:")
328
+ self.log.info(self.agents.isnull().sum().sort_values())
329
+
330
+ # save pickle
331
+ if self.config.project.settings.SAVE_APP_PARQUET:
332
+ if "wind_cf_hourly" in self.agents.columns:
333
+ self.agents.drop(columns=["wind_cf_hourly"], inplace=True, errors="ignore")
334
+
335
+ if "solar_cf_hourly" in self.agents.columns:
336
+ self.agents.drop(columns=["solar_cf_hourly"], inplace=True, errors="ignore")
337
+
338
+ self.agents.drop(columns=self.tariff_columns, inplace=True, errors="ignore")
339
+
340
+ f_out = self.out_path / f"{self.run_name}.pqt"
341
+ self.agents.to_parquet(f_out)
342
+ else:
343
+ self.agents = pd.DataFrame()
344
+
345
+ if self.sector == "fom":
346
+ self.agents["application"] = "FOM"
347
+
348
+ if len(self.agents) > 0:
349
+ self.log.info("\n")
350
+ self.log.info(f"starting valuation for {len(self.agents)} FOM agents")
351
+
352
+ self.agents = valuer.run_multiprocessing(self.agents, configuration="fom")
353
+
354
+ self.log.info("null counts:")
355
+ self.log.info(self.agents.isnull().sum().sort_values())
356
+
357
+ # --- save sector pickle ---
358
+ if self.config.project.settings.SAVE_APP_PARQUET:
359
+ if "wind_cf_hourly" in self.agents.columns:
360
+ self.agents.drop(columns=["wind_cf_hourly"], inplace=True, errors="ignore")
361
+ if "solar_cf_hourly" in self.agents.columns:
362
+ self.agents.drop(columns=["solar_cf_hourly"], inplace=True, errors="ignore")
363
+
364
+ f_out = self.out_path / f"{self.run_name}.pqt"
365
+ self.agents.to_parquet(f_out)
366
+ else:
367
+ self.agents = pd.DataFrame()
368
+
369
+ def run(self):
370
+ self.prepare_agents()
371
+ self.run_valuation()
dwind/mp.py ADDED
@@ -0,0 +1,225 @@
1
+ from __future__ import annotations
2
+
3
+ import time
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ from rex.utilities.hpc import SLURM
8
+
9
+ from dwind.helper import split_by_index
10
+
11
+
12
+ class MultiProcess:
13
+ """Multiprocessing interface for running batch jobs via ``SLURM``.
14
+
15
+ Parameters
16
+ ----------
17
+ location : str
18
+ The state name, or an underscore-separated string of "state_county"
19
+ sector : str
20
+ One of "fom" (front of meter) or "btm" (back of the meter).
21
+ scenario : str
22
+ An underscore-separated string for the scenario to be run.
23
+ year : int
24
+ The year-basis for the scenario.
25
+ env : str | Path
26
+ The path to the ``dwind`` Python environment that should be used to run the model.
27
+ n_nodes : int
28
+ Number of nodes to request from the HPC when running an ``sbatch`` job.
29
+ memory : int
30
+ Node memory, in GB.
31
+ walltime : int
32
+ Node walltime request, in hours.
33
+ alloc : str
34
+ The HPC project (allocation) handle that will be charged for running the analysis.
35
+ feature : str
36
+ Additional flags for the SLURM job, using formatting such as ``--qos=high`` or
37
+ ``--depend=[state:job_id]``.
38
+ model_config : str
39
+ The full file path and name of where the model configuration file is located.
40
+ stdout_path : str | Path | None, optional
41
+ The path where all the stdout logs should be written to, by default None. When None,
42
+ ":py:attr:`dir_out` / logs" is used.
43
+ dir_out : _type_, optional
44
+ The path to save the chunked results files, by default Path.getcwd() (current working
45
+ directory).
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ location: str,
51
+ sector: str,
52
+ scenario: str,
53
+ year: int,
54
+ env: str | Path,
55
+ n_nodes: int,
56
+ memory: int,
57
+ walltime: int,
58
+ allocation: str,
59
+ feature: str,
60
+ repository: str | Path,
61
+ model_config: str,
62
+ stdout_path: str | Path | None = None,
63
+ dir_out: str | Path | None = None,
64
+ ):
65
+ """Initialize the ``SLURM`` interface.
66
+
67
+ Parameters
68
+ ----------
69
+ location : str
70
+ The state name, or an underscore-separated string of "state_county"
71
+ sector : str
72
+ One of "fom" (front of meter) or "btm" (back of the meter).
73
+ scenario : str
74
+ An underscore-separated string for the scenario to be run, such as "baseline_2022".
75
+ year : int
76
+ The year-basis for the scenario.
77
+ env : str | Path
78
+ The path to the ``dwind`` Python environment that should be used to run the model.
79
+ n_nodes : int
80
+ Number of nodes to request from the HPC when running an ``sbatch`` job.
81
+ memory : int
82
+ Node memory, in GB.
83
+ walltime : int
84
+ Node walltime request, in hours.
85
+ allocation : str
86
+ The HPC project (allocation) handle that will be charged for running the analysis.
87
+ feature : str
88
+ Additional flags for the SLURM job, using formatting such as ``--qos=high`` or
89
+ ``--depend=[state:job_id]``.
90
+ repository : str | Path
91
+ The path to the dwind repository to use for analysis.
92
+ model_config : str
93
+ The full file path and name of where the model configuration file is located.
94
+ stdout_path : str | Path | None, optional
95
+ The path where all the stdout logs should be written to, by default None. When None,
96
+ ":py:attr:`dir_out` / logs" is used.
97
+ dir_out : str | Path, optional
98
+ The path to save the chunked results files, by default Path.cwd() (current working
99
+ directory).
100
+ """
101
+ self.run_name = f"{location}_{sector}_{scenario}_{year}"
102
+ self.location = location
103
+ self.sector = sector
104
+ self.scenario = scenario
105
+ self.year = year
106
+ self.env = env
107
+ self.n_nodes = n_nodes
108
+ self.memory = memory
109
+ self.walltime = walltime
110
+ self.alloc = allocation
111
+ self.feature = feature
112
+ self.stdout_path = stdout_path
113
+ self.dir_out = dir_out
114
+ self.repository = repository
115
+ self.model_config = model_config
116
+
117
+ # Create the output directory if it doesn't already exist
118
+ self.dir_out = Path.cwd() if dir_out is None else Path(self.dir_out).resolve()
119
+ self.out_path = self.dir_out / f"chunk_files_{self.run_name}"
120
+ if not self.out_path.exists():
121
+ self.out_path.mkdir()
122
+
123
+ # Create a new path in the output directory for the logs if a path is not provided
124
+ if self.stdout_path is None:
125
+ log_dir = self.out_path / "logs"
126
+ if not log_dir.exists():
127
+ log_dir.mkdir()
128
+ self.stdout_path = log_dir
129
+
130
+ def check_status(self, job_ids: list[int]):
131
+ """Prints the status of all :py:attr:`jobs` submitted.
132
+
133
+ Parameters
134
+ ----------
135
+ job_ids : list[int]
136
+ The list of HPC ``job_id``s to check on.
137
+ """
138
+ hpc = SLURM()
139
+ print(f"{len(job_ids)} job(s) started")
140
+
141
+ jobs_status = {j: hpc.check_status(job_id=j) for j in job_ids}
142
+ n_remaining = len([s for s in jobs_status.values() if s in ("PD", "R")])
143
+ print(f"{n_remaining} job(s) remaining: {jobs_status}")
144
+ time.sleep(30)
145
+
146
+ while n_remaining > 0:
147
+ hpc = SLURM()
148
+ for job, status in jobs_status.items():
149
+ if status in ("GC", "None"):
150
+ continue
151
+ jobs_status.update({job: hpc.check_status(job_id=job)})
152
+
153
+ n_remaining = len([s for s in jobs_status.values() if s in ("PD", "R")])
154
+ print(f"{n_remaining} job(s) remaining: {jobs_status}")
155
+ if n_remaining > 0:
156
+ time.sleep(30)
157
+
158
+ def aggregate_outputs(self):
159
+ """Collect the chunked results files, combine them into a single output parquet file, and
160
+ delete the chunked results files.
161
+ """
162
+ result_files = [f for f in self.out_path.iterdir() if f.suffix in (".pickle", ".pkl")]
163
+
164
+ if len(result_files) > 0:
165
+ result_agents = pd.concat([pd.read_pickle(f) for f in result_files])
166
+ f_out = self.dir_out / f"run_{self.run_name}.pqt"
167
+ result_agents.to_parquet(f_out)
168
+
169
+ for f in result_files:
170
+ f.unlink()
171
+
172
+ def run_jobs(self, agent_df: pd.DataFrame) -> None:
173
+ """Run :py:attr:`n_jobs` number of jobs for the :py:attr:`agent_df`.
174
+
175
+ Parameters
176
+ ----------
177
+ agent_df : pandas.DataFrame
178
+ The agent DataFrame to be chunked and analyzed.
179
+ """
180
+ agent_df = agent_df.reset_index(drop=True)
181
+ # chunks = np.array_split(agent_df, self.n_nodes)
182
+ starts, ends = split_by_index(agent_df, self.n_nodes)
183
+ jobs = []
184
+
185
+ base_cmd_str = f"module load conda; conda activate {self.env}; "
186
+ base_cmd_str += "dwind run-chunk "
187
+
188
+ base_args = f"--location {self.location} "
189
+ base_args += f"--sector {self.sector} "
190
+ base_args += f"--scenario {self.scenario} "
191
+ base_args += f"--year {self.year} "
192
+ base_args += f"--repository {self.repository} "
193
+ base_args += f"--model-config {self.model_config} "
194
+ base_args += f"--out-path {self.out_path}"
195
+
196
+ for i, (start, end) in enumerate(zip(starts, ends, strict=True)):
197
+ fn = self.out_path / f"agents_{i}.pqt"
198
+ agent_df.iloc[start:end].to_parquet(fn)
199
+
200
+ job_name = f"{self.run_name}_{i}"
201
+ cmd_str = f"{base_cmd_str} --chunk-ix {i} {base_args}"
202
+ print("cmd:", cmd_str)
203
+
204
+ slurm_manager = SLURM()
205
+ job_id, err = slurm_manager.sbatch(
206
+ cmd=cmd_str,
207
+ alloc=self.alloc,
208
+ memory=self.memory,
209
+ walltime=self.walltime,
210
+ feature=self.feature,
211
+ name=job_name,
212
+ stdout_path=self.stdout_path,
213
+ )
214
+
215
+ if job_id:
216
+ jobs.append(job_id)
217
+ print(f"Kicked off job: {job_name}, with SLURM {job_id=} on Eagle.")
218
+ else:
219
+ print(
220
+ f"{job_name=} was unable to be kicked off due to the following error:\n{err}."
221
+ )
222
+
223
+ # Check on the job statuses until they're complete, then aggregate the results
224
+ self.check_status(jobs)
225
+ self.aggregate_outputs()