dwind 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwind/model.py CHANGED
@@ -1,3 +1,7 @@
1
+ """Provides the primary ``Agent`` and ``Model`` classes for loading, preparing, and analyzing
2
+ parcel data.
3
+ """
4
+
1
5
  from __future__ import annotations
2
6
 
3
7
  import logging
@@ -7,8 +11,8 @@ from pathlib import Path
7
11
  import numpy as np
8
12
  import pandas as pd
9
13
 
10
- from dwind import Configuration, resource, scenarios, valuation, btm_sizing
11
- from dwind.utils import array
14
+ from dwind import resource, scenarios, valuation, btm_sizing
15
+ from dwind.config import Year, Sector, CRBModel, Scenario, Configuration
12
16
 
13
17
 
14
18
  # POTENTIALLY DANGEROUS!
@@ -21,18 +25,6 @@ class Agents:
21
25
  Agents are the modified parcels that have been truncated to the largest circle able
22
26
  to be contained in the parcel, and contain all of the relevant tax lot and
23
27
  geographic variables that would be found in a parcel.
24
-
25
- Parameters
26
- ---------
27
- agent_file : str | pathlib.Path
28
- Either a parquet file (.pqt or .parquet) or pickle file (.pkl or .pickle)
29
- containing the previously generated agent data.
30
-
31
- Raises:
32
- ------
33
- ValueError
34
- Raised if the :py:attr:`agent_file` does not have a valid file extension for
35
- either a pickle file (.pkl or .pickle) or a parquet file (.pqt or .parquet).
36
28
  """
37
29
 
38
30
  def __init__(
@@ -43,8 +35,23 @@ class Agents:
43
35
  *,
44
36
  resource_year: int = 2018,
45
37
  ):
38
+ """Initialize an instance of the Agent class.
39
+
40
+ Args:
41
+ agent_file (str | pathlib.Path): Either a parquet file (.pqt or .parquet), pickle
42
+ file (.pkl or .pickle), or CSV file (.csv) containing the previously generated
43
+ agent data.
44
+ sector (str | None): One of "fom" (front-of-meter), "btm" (behind-the-meter), or None.
45
+ Only use None if the agent data do not need to have the :py:meth:`prepare` method
46
+ be run.
47
+ model_config (str | Path, optional): Full file path to the overall model configuration
48
+ TOML file containing SQL connections, data locations, and etc.
49
+ resource_year (int, optional): The reV resource year basis. This should either be 2012
50
+ or 2018, but it is assumed the data were created using the 2012 reV lookups.
51
+ Defaults to 2018.
52
+ """
46
53
  self.agent_file = Path(agent_file).resolve()
47
- self.sector = sector
54
+ self.sector = sector if sector is None else Sector(sector)
48
55
  self.config = model_config
49
56
  self.resource_year = resource_year
50
57
  self._load_agents()
@@ -63,6 +70,9 @@ class Agents:
63
70
 
64
71
  Args:
65
72
  agent_file (str | Path): The full file path of the agent parquet, CSV, or pickle data.
73
+ sector (str): One of "fom" (front-of-meter) or "btm" (behind-the-meter).
74
+ model_config (str | Path, optional): Full file path to the overall model configuration
75
+ TOML file containing SQL connections, data locations, and etc.
66
76
  save_results (bool, optional): True to save any updates to the data. Defaults to False.
67
77
  file_name (str | Path | None, optional): The file path and name for where to save the
68
78
  prepared data, if not overwriting the existing agent data. Defaults to None.
@@ -103,7 +113,7 @@ class Agents:
103
113
  f"File types ending in {suffix} can't be read as pickle, parquet, or CSV"
104
114
  )
105
115
 
106
- self.agents = file_reader(self.agent_file)
116
+ self.agents = file_reader(self.agent_file, dtype_backend="pyarrow")
107
117
  if suffix == ".csv":
108
118
  self.agents = self.agents.reset_index(drop=True)
109
119
 
@@ -128,6 +138,15 @@ class Agents:
128
138
  self.merge_generation()
129
139
 
130
140
  def save_agents(self, file_name: str | Path | None = None):
141
+ """Save the prepared agent data to the path provided in :py:attr:`file_name`.
142
+
143
+ Args:
144
+ file_name (str | Path, optional): Full file path and name for where to save the agent
145
+ data. Must end in a valid pickle, parquet, or csv extension. Defaults to None.
146
+
147
+ Raises:
148
+ ValueError: _description_
149
+ """
131
150
  if file_name is None:
132
151
  file_name = self.agent_file
133
152
 
@@ -146,10 +165,14 @@ class Agents:
146
165
  file_saver(file_name)
147
166
 
148
167
  def merge_census_data(self):
168
+ """Merges the census tract identifies based on the agent "gid" and census "pgid" identifier
169
+ columns.
170
+ """
149
171
  census_tracts = pd.read_csv(
150
172
  "/projects/dwind/configs/sizing/wind/lkup_block_to_pgid_2020.csv",
151
173
  usecols=["pgid", "fips_block"],
152
174
  dtype=str,
175
+ dtype_backend="pyarrow",
153
176
  ).drop_duplicates()
154
177
  census_tracts["census_tract_id"] = [el[:11] for el in census_tracts["fips_block"]]
155
178
  self.agents = (
@@ -158,14 +181,18 @@ class Agents:
158
181
  .reset_index(drop=True)
159
182
  )
160
183
 
161
- def update_rev_id(self, resource_year="2018"):
184
+ def update_rev_id(self):
162
185
  """Update 2012 rev index to 2018 index."""
163
- if resource_year != "2018":
186
+ if self.resource_year != 2018:
164
187
  return
165
188
 
166
189
  index_file = "/projects/dwind/configs/rev/wind/lkup_rev_index_2012_to_2018.csv"
167
190
  rev_index_map = (
168
- pd.read_csv(index_file, usecols=["rev_index_wind_2012", "rev_index_wind_2018"])
191
+ pd.read_csv(
192
+ index_file,
193
+ usecols=["rev_index_wind_2012", "rev_index_wind_2018"],
194
+ dtype_backend="pyarrow",
195
+ )
169
196
  .rename(columns={"rev_index_wind_2012": "rev_index_wind"})
170
197
  .set_index("rev_index_wind")
171
198
  )
@@ -190,21 +217,27 @@ class Agents:
190
217
  )
191
218
 
192
219
  def merge_generation(self):
193
- if self.resource_year != "2018":
220
+ """Load and merge the resource potential data for the 2018 reV basis only. See
221
+ :py:class:`dwind.resource.ResourcePotential` for more information.
222
+ """
223
+ if self.resource_year != 2018:
194
224
  return
195
225
 
196
226
  # update 2012 rev cf/naep/aep to 2018 values
197
- # self.agents = self.agents.drop(columns=["wind_naep", "wind_cf", "wind_aep"])
198
227
  resource_potential = resource.ResourcePotential(
199
228
  parcels=self.agents,
200
- application=self.sector,
229
+ sector=self.sector,
201
230
  year=self.resource_year,
202
- model_config=self.model_config,
231
+ model_config=self.config,
203
232
  )
204
233
  self.agents = resource_potential.match_rev_summary_to_agents()
205
234
 
206
235
 
207
236
  class Model:
237
+ """Primary dwind model that is responsible for gathering and connecting the various data points
238
+ by parcel.
239
+ """
240
+
208
241
  def __init__(
209
242
  self,
210
243
  agents: pd.DataFrame,
@@ -216,33 +249,51 @@ class Model:
216
249
  model_config: str | Path,
217
250
  chunk_ix: int | None = None,
218
251
  ):
252
+ """Initializes a :py:class:`Model` instance.
253
+
254
+ Args:
255
+ agents (pd.DataFrame): Prepared agent data with correct census and reV resource data
256
+ already attached.
257
+ location (str): Priority class or "<state>_<county>" string.
258
+ sector (str): One of "fom" (front-of-meter) or "btm" (behind-the-meter).
259
+ scenario (str): Currently only accepts "baseline" as an input.
260
+ year (int): One of 2022, 2025, 2035, or 2040 for the analysis year.
261
+ out_path (str | Path): Path for where to save any logging or output data.
262
+ model_config (str | Path): The overarching model configuration TOML file containing
263
+ key SQL connectors, data locations, and general model settings.
264
+ chunk_ix (int | None, optional): Integer representation of the chunk being run, if whole
265
+ agent file is being run, then None. Defaults to None.
266
+ """
219
267
  if chunk_ix is None:
220
268
  chunk_ix = 0
221
269
  self.agents = agents
222
270
  self.out_path = Path(out_path).resolve()
223
271
 
224
272
  self.full_scenario = f"{location}_{sector}_{scenario}_{year}"
225
- self.run_name = f"{self.full_scenario}_{chunk_ix}"
273
+ self.run_name = f"{self.full_scenario}"
274
+ if chunk_ix is not None:
275
+ self.run_name += f"_{chunk_ix}"
226
276
  self.location = location
227
- self.sector = sector
228
- self.scenario = scenario
229
- self.year = year
277
+ self.sector = Sector(sector)
278
+ self.scenario = Scenario(scenario)
279
+ self.year = Year(year)
230
280
  self.config = Configuration(model_config)
231
281
 
232
- self.init_logging()
282
+ self._init_logging()
233
283
 
234
284
  t_dict = self.config.rev.turbine_class_dict
235
- if self.sector == "fom":
285
+ if self.sector is Sector.FOM:
236
286
  apps = ["BTM, FOM", "BTM, FOM, Utility", "FOM, Utility"]
237
287
  self.agents["turbine_class"] = self.agents["wind_size_kw_fom"].map(t_dict)
238
- else:
288
+ elif self.sector is Sector.BTM:
239
289
  apps = ["BTM", "BTM, FOM", "BTM, FOM, Utility"]
240
290
  self.agents["turbine_class"] = self.agents["wind_size_kw"].map(t_dict)
241
291
 
242
292
  # filter by sector
243
293
  self.agents = self.agents[self.agents["application"].isin(apps)]
244
294
 
245
- def init_logging(self):
295
+ def _init_logging(self):
296
+ """Initializing the logging to :py:attr:`out_path` / logs / dwfs.txt."""
246
297
  log_dir = self.out_path / "logs"
247
298
  if not log_dir.exists():
248
299
  log_dir.mkdir()
@@ -258,13 +309,16 @@ class Model:
258
309
 
259
310
  self.log = logging.getLogger("dwfs")
260
311
 
261
- def get_rates(self):
312
+ def _get_rates(self):
313
+ """Retrieves the tariff rates and merges them based on the "rate_id_alias" column."""
262
314
  self.agents = self.agents[~self.agents["rate_id_alias"].isna()]
263
315
  self.agents["rate_id_alias"] = self.agents["rate_id_alias"].astype(int)
264
316
  rate_ids = np.unique(self.agents.rate_id_alias.values)
265
317
 
266
318
  tariff = (
267
- pd.read_parquet("/projects/dwind/data/tariffs/2025_tariffs.pqt")
319
+ pd.read_parquet(
320
+ "/projects/dwind/data/tariffs/2025_tariffs.pqt", dtype_backend="pyarrow"
321
+ )
268
322
  .loc[rate_ids]
269
323
  .reset_index(drop=False) # , names="rate_id_alias")
270
324
  )
@@ -274,25 +328,27 @@ class Model:
274
328
 
275
329
  self.agents = self.agents.merge(tariff, how="left", on="rate_id_alias")
276
330
 
277
- def get_load(self):
278
- consumption_hourly = pd.read_parquet("/projects/dwind/data/crb_consumption_hourly.pqt")
279
-
280
- consumption_hourly["scale_offset"] = 1e8
281
- consumption_hourly = array.scale_array_precision(
282
- consumption_hourly, "consumption_hourly", "scale_offset"
283
- )
284
-
285
- self.agents = self.agents.merge(
286
- consumption_hourly, how="left", on=["crb_model", "hdf_index"]
287
- )
331
+ def _get_load(self):
332
+ """Retrieves the energy demand data, combines it with the agent data based on
333
+ the agent "land_use", "bldg_type", and "state_fips" columns, then scales the
334
+ consumption data based on the load.
335
+ """
336
+ self.agents["crb_model_index"] = self.agents.crb_model.replace(
337
+ CRBModel.str_model_map()
338
+ ).astype("uint8[pyarrow]")
288
339
 
289
340
  # update load based on scaling factors from 2024 consumption data
290
341
  f = "/projects/dwind/data/parcel_landuse_load_application_mapping.csv"
291
- bldg_types = pd.read_csv(f)[["land_use", "bldg_type"]]
342
+ bldg_types = pd.read_csv(f, usecols=["land_use", "bldg_type"], dtype_backend="pyarrow")
292
343
  self.agents = self.agents.merge(bldg_types, on="land_use", how="left")
293
344
 
294
345
  f = "/projects/dwind/data/consumption/2024/load_scaling_factors.csv"
295
- sfs = pd.read_csv(f, dtype={"state_fips": str})[["state_fips", "bldg_type", "load_sf_2024"]]
346
+ sfs = pd.read_csv(
347
+ f,
348
+ dtype={"state_fips": str},
349
+ usecols=["state_fips", "bldg_type", "load_sf_2024"],
350
+ dtype_backend="pyarrow",
351
+ )
296
352
  self.agents = self.agents.merge(sfs, on=["state_fips", "bldg_type"], how="left")
297
353
  self.agents["load_kwh"] *= self.agents["load_sf_2024"]
298
354
  self.agents["max_demand_kw"] *= self.agents["load_sf_2024"]
@@ -302,13 +358,15 @@ class Model:
302
358
  # get county_id to nerc_region_abbr lkup
303
359
  # from diffusion_shared.county_nerc_join (dgen_db_fy23q4_ss23)
304
360
  f = "/projects/dwind/data/county_nerc_join.csv"
305
- nerc_regions = pd.read_csv(f)[["county_id", "nerc_region_abbr"]]
361
+ nerc_regions = pd.read_csv(
362
+ f, usecols=["county_id", "nerc_region_abbr"], dtype_backend="pyarrow"
363
+ )
306
364
  self.agents = self.agents.merge(nerc_regions, on=["county_id"], how="left")
307
365
 
308
366
  # get load growth projects from AEO
309
367
  # from diffusion_shared.aeo_load_growth_projections_nerc_2023_updt (dgen_db_fy23q4_ss23)
310
368
  f = "/projects/dwind/data/consumption/aeo_load_growth_projections_nerc_2023_updt.csv"
311
- load_growth = pd.read_csv(f)
369
+ load_growth = pd.read_csv(f, dtype_backend="pyarrow")
312
370
  load_growth = load_growth.loc[
313
371
  load_growth["scenario"].eq("AEO2023 Reference case")
314
372
  & load_growth["year"].eq(self.year),
@@ -323,20 +381,24 @@ class Model:
323
381
  self.agents["max_demand_kw"] *= self.agents["load_multiplier"]
324
382
  self.agents = self.agents.drop(columns=["load_multiplier", "nerc_region_abbr"])
325
383
 
326
- self.agents = array.scale_array_sum(self.agents, "consumption_hourly", "load_kwh")
327
-
328
- def get_nem(self):
329
- if self.scenario == "metering":
384
+ def _get_nem(self):
385
+ """Retrieves the NEM data and merges with the agent data based on the "state_abbr" and
386
+ "sector_abbr" columns.
387
+ """
388
+ if self.scenario is Scenario.METERING:
330
389
  self.agents["compensation_style"] = "net metering"
331
390
  self.agents["nem_system_kw_limit"] = 1000000000
332
- elif self.scenario == "billing":
391
+ elif self.scenario is Scenario.BILLING:
333
392
  self.agents["compensation_style"] = "net billing"
334
393
  self.agents["nem_system_kw_limit"] = 1000000000
335
394
  else:
336
395
  cols = ["state_abbr", "sector_abbr", "compensation_style", "nem_system_kw_limit"]
337
396
  nem_scenario_csv = scenarios.config_nem(self.scenario, self.year)
338
397
  nem_df = (
339
- pd.read_csv(self.config.project.DIR / f"data/nem/{nem_scenario_csv}")
398
+ pd.read_csv(
399
+ self.config.project.DIR / f"data/nem/{nem_scenario_csv}",
400
+ dtype_backend="pyarrow",
401
+ )
340
402
  .rename(columns={"max_pv_kw_limit": "nem_system_kw_limit"})
341
403
  .loc[:, cols]
342
404
  )
@@ -356,14 +418,15 @@ class Model:
356
418
  ] = "net billing"
357
419
 
358
420
  def prepare_agents(self):
359
- if self.sector == "btm":
421
+ """Prepare the :py:attr:`tech`- and :py:attr:`sector`-specific agent data."""
422
+ if self.sector is Sector.BTM:
360
423
  # map tariffs
361
424
  self.log.info("....running with pre-processed tariffs")
362
- self.get_rates()
425
+ self._get_rates()
363
426
 
364
427
  # get hourly consumption
365
428
  self.log.info("....fetching hourly consumption")
366
- self.get_load()
429
+ self._get_load()
367
430
 
368
431
  if self.config.project.settings.SIZE_SYSTEMS:
369
432
  # size btm systems
@@ -372,9 +435,9 @@ class Model:
372
435
 
373
436
  # map nem policies
374
437
  self.log.info("....processing NEM for BTM systems")
375
- self.get_nem()
438
+ self._get_nem()
376
439
 
377
- if self.sector == "fom":
440
+ if self.sector is Sector.FOM:
378
441
  if self.config.project.settings.SIZE_SYSTEMS:
379
442
  # for fom agents, take largest wind turbine
380
443
  self.agents.sort_values(
@@ -414,16 +477,17 @@ class Model:
414
477
  )
415
478
 
416
479
  def run_valuation(self):
480
+ """Runs the valuation model to create the PySAM financial results."""
417
481
  valuer = valuation.ValueFunctions(self.scenario, self.year, self.config)
418
482
 
419
- if self.sector == "btm":
483
+ if self.sector is Sector.BTM:
420
484
  self.agents["application"] = "BTM"
421
485
 
422
486
  if len(self.agents) > 0:
423
487
  self.log.info("\n")
424
488
  self.log.info(f"starting valuation for {len(self.agents)} BTM agents")
425
489
 
426
- self.agents = valuer.run_multiprocessing(self.agents, sector="btm")
490
+ self.agents = valuer.run(agents=self.agents, sector=self.sector)
427
491
 
428
492
  self.log.info("null counts:")
429
493
  self.log.info(self.agents.isnull().sum().sort_values())
@@ -443,14 +507,14 @@ class Model:
443
507
  else:
444
508
  self.agents = pd.DataFrame()
445
509
 
446
- if self.sector == "fom":
510
+ if self.sector is Sector.FOM:
447
511
  self.agents["application"] = "FOM"
448
512
 
449
513
  if len(self.agents) > 0:
450
514
  self.log.info("\n")
451
515
  self.log.info(f"starting valuation for {len(self.agents)} FOM agents")
452
516
 
453
- self.agents = valuer.run_multiprocessing(self.agents, "fom")
517
+ self.agents = valuer.run(agents=self.agents, sector=self.sector)
454
518
 
455
519
  self.log.info("null counts:")
456
520
  self.log.info(self.agents.isnull().sum().sort_values())
@@ -468,5 +532,6 @@ class Model:
468
532
  self.agents = pd.DataFrame()
469
533
 
470
534
  def run(self):
535
+ """Runs the whole model."""
471
536
  self.prepare_agents()
472
537
  self.run_valuation()
dwind/mp.py CHANGED
@@ -1,3 +1,8 @@
1
+ """Provides the :py:class:`MultiProcess` class for running a model on `NREL's Kestrel HPC system`_.
2
+
3
+ .. NREL's Kestrel HPC system: https://nrel.github.io/HPC/Documentation/Systems/Kestrel/
4
+ """
5
+
1
6
  from __future__ import annotations
2
7
 
3
8
  import time
@@ -118,7 +123,7 @@ class MultiProcess:
118
123
 
119
124
  # Create the output directory if it doesn't already exist
120
125
  self.dir_out = Path.cwd() if dir_out is None else Path(self.dir_out).resolve()
121
- self.out_path = self.dir_out / f"chunk_files_{self.run_name}"
126
+ self.out_path = self.dir_out / "chunk_files"
122
127
  if not self.out_path.exists():
123
128
  self.out_path.mkdir()
124
129
 
@@ -149,57 +154,46 @@ class MultiProcess:
149
154
  }
150
155
  for j in job_ids
151
156
  }
152
- table, complete = hpc.generate_table(job_status)
157
+ table, complete = hpc.generate_run_status_table(job_status)
153
158
  with Live(table, refresh_per_second=1) as live:
154
159
  while not complete:
155
160
  time.sleep(5)
156
161
  job_status |= hpc.update_status(job_status)
157
- table, complete = hpc.generate_table(job_status)
162
+ table, complete = hpc.generate_run_status_table(job_status)
158
163
  live.update(table)
159
164
 
160
- def aggregate_outputs(self):
161
- """Collect the chunked results files, combine them into a single output parquet file, and
162
- delete the chunked results files.
163
- """
164
- result_files = [f for f in self.out_path.iterdir() if f.suffix == (".pqt")]
165
-
166
- if len(result_files) > 0:
167
- result_agents = pd.concat([pd.read_parquet(f) for f in result_files])
168
- f_out = self.dir_out / f"run_{self.run_name}.pqt"
169
- result_agents.to_parquet(f_out)
170
- print(f"Aggregated results saved to: {f_out}")
171
-
172
- for f in result_files:
173
- f.unlink()
174
-
175
- def run_jobs(self, agent_df: pd.DataFrame) -> None:
165
+ def run_jobs(self, agent_df: pd.DataFrame) -> dict[str, int]:
176
166
  """Run :py:attr:`n_jobs` number of jobs for the :py:attr:`agent_df`.
177
167
 
178
- Parameters
179
- ----------
180
- agent_df : pandas.DataFrame
181
- The agent DataFrame to be chunked and analyzed.
168
+ Args:
169
+ agent_df (pandas.DataFrame): The agent DataFrame to be chunked and analyzed.
170
+
171
+ Returns:
172
+ dict[str, int]: Dictionary mapping of each SLURM job id to the chunk run in that job.
182
173
  """
183
174
  agent_df = agent_df.reset_index(drop=True)
184
175
  # chunks = np.array_split(agent_df, self.n_nodes)
185
176
  starts, ends = split_by_index(agent_df, self.n_nodes)
186
- jobs = []
177
+ job_chunk_map = {}
187
178
 
188
- base_cmd_str = f"module load conda; conda activate {self.env}; "
189
- base_cmd_str += "dwind run-chunk "
179
+ base_cmd_str = f"module load conda; conda activate {self.env};"
180
+ base_cmd_str += " dwind run chunk"
190
181
 
191
- base_args = f" {self.location} "
192
- base_args += f" {self.sector} "
193
- base_args += f" {self.scenario} "
194
- base_args += f" {self.year} "
182
+ base_args = f" {self.location}"
183
+ base_args += f" {self.sector}"
184
+ base_args += f" {self.scenario}"
185
+ base_args += f" {self.year}"
195
186
  base_args += f" {self.out_path}"
196
- base_args += f" {self.repository} "
197
- base_args += f" {self.model_config} "
187
+ base_args += f" {self.repository}"
188
+ base_args += f" {self.model_config}"
189
+
190
+ if not (agent_path := self.out_path / "agent_chunks").is_dir():
191
+ agent_path.mkdir()
198
192
 
199
193
  start_time = time.perf_counter()
200
194
  # for i, (start, end) in enumerate(zip(starts, ends, strict=True)):
201
195
  for i, (start, end) in enumerate(zip(starts, ends)): # noqa: B905
202
- fn = self.out_path / f"agents_{i}.pqt"
196
+ fn = self.out_path / "agent_chunks" / f"agents_{i}.pqt"
203
197
  agent_df.iloc[start:end].to_parquet(fn)
204
198
 
205
199
  job_name = f"{self.run_name}_{i}"
@@ -218,7 +212,7 @@ class MultiProcess:
218
212
  )
219
213
 
220
214
  if job_id:
221
- jobs.append(job_id)
215
+ job_chunk_map[job_id] = i
222
216
  print(f"Kicked off job: {job_name}, with SLURM {job_id=} on Eagle.")
223
217
  else:
224
218
  print(
@@ -226,5 +220,6 @@ class MultiProcess:
226
220
  )
227
221
 
228
222
  # Check on the job statuses until they're complete, then aggregate the results
223
+ jobs = [*job_chunk_map]
229
224
  self.check_status(jobs, start_time)
230
- self.aggregate_outputs()
225
+ return job_chunk_map