dwind 0.3__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwind/model.py CHANGED
@@ -1,3 +1,7 @@
1
+ """Provides the primary ``Agent`` and ``Model`` classes for loading, preparing, and analyzing
2
+ parcel data.
3
+ """
4
+
1
5
  from __future__ import annotations
2
6
 
3
7
  import logging
@@ -7,7 +11,8 @@ from pathlib import Path
7
11
  import numpy as np
8
12
  import pandas as pd
9
13
 
10
- from dwind import Configuration, helper, resource, scenarios, valuation, btm_sizing
14
+ from dwind import resource, scenarios, valuation, btm_sizing
15
+ from dwind.config import Year, Sector, CRBModel, Scenario, Configuration
11
16
 
12
17
 
13
18
  # POTENTIALLY DANGEROUS!
@@ -20,25 +25,81 @@ class Agents:
20
25
  Agents are the modified parcels that have been truncated to the largest circle able
21
26
  to be contained in the parcel, and contain all of the relevant tax lot and
22
27
  geographic variables that would be found in a parcel.
23
-
24
- Parameters
25
- ---------
26
- agent_file : str | pathlib.Path
27
- Either a parquet file (.pqt or .parquet) or pickle file (.pkl or .pickle)
28
- containing the previously generated agent data.
29
-
30
- Raises:
31
- ------
32
- ValueError
33
- Raised if the :py:attr:`agent_file` does not have a valid file extension for
34
- either a pickle file (.pkl or .pickle) or a parquet file (.pqt or .parquet).
35
28
  """
36
29
 
37
- def __init__(self, agent_file: str | Path):
30
+ def __init__(
31
+ self,
32
+ agent_file: str | Path,
33
+ sector: str | None = None,
34
+ model_config: str | Path | None = None,
35
+ *,
36
+ resource_year: int = 2018,
37
+ ):
38
+ """Initialize an instance of the Agent class.
39
+
40
+ Args:
41
+ agent_file (str | pathlib.Path): Either a parquet file (.pqt or .parquet), pickle
42
+ file (.pkl or .pickle), or CSV file (.csv) containing the previously generated
43
+ agent data.
44
+ sector (str | None): One of "fom" (front-of-meter), "btm" (behind-the-meter), or None.
45
+ Only use None if the agent data do not need to have the :py:meth:`prepare` method
46
+ be run.
47
+ model_config (str | Path, optional): Full file path to the overall model configuration
48
+ TOML file containing SQL connections, data locations, and etc.
49
+ resource_year (int, optional): The reV resource year basis. This should either be 2012
50
+ or 2018, but it is assumed the data were created using the 2012 reV lookups.
51
+ Defaults to 2018.
52
+ """
38
53
  self.agent_file = Path(agent_file).resolve()
39
- self.load_agents()
40
-
41
- def load_agents(self):
54
+ self.sector = sector if sector is None else Sector(sector)
55
+ self.config = model_config
56
+ self.resource_year = resource_year
57
+ self._load_agents()
58
+
59
+ @classmethod
60
+ def load_and_prepare_agents(
61
+ cls,
62
+ agent_file: str | Path,
63
+ sector: str,
64
+ model_config: str | Path,
65
+ *,
66
+ save_results: bool = False,
67
+ file_name: str | Path | None = None,
68
+ ) -> pd.DataFrame:
69
+ """Load and prepare the agent files to run through ``Model``.
70
+
71
+ Args:
72
+ agent_file (str | Path): The full file path of the agent parquet, CSV, or pickle data.
73
+ sector (str): One of "fom" (front-of-meter) or "btm" (behind-the-meter).
74
+ model_config (str | Path, optional): Full file path to the overall model configuration
75
+ TOML file containing SQL connections, data locations, and etc.
76
+ save_results (bool, optional): True to save any updates to the data. Defaults to False.
77
+ file_name (str | Path | None, optional): The file path and name for where to save the
78
+ prepared data, if not overwriting the existing agent data. Defaults to None.
79
+
80
+ Returns:
81
+ pd.DataFrame: The prepared agent data.
82
+ """
83
+ agents = cls(agent_file, sector, model_config)
84
+ agents.prepare()
85
+ if save_results:
86
+ agents.save_agents(file_name=file_name)
87
+ return agents.agents
88
+
89
+ @classmethod
90
+ def load_agents(cls, agent_file: str | Path) -> pd.DataFrame:
91
+ """Load the agent data without making any additional modifications.
92
+
93
+ Args:
94
+ agent_file (str | Path): The full file path of the agent parquet, pickle, or CSV data.
95
+
96
+ Returns:
97
+ pd.DataFrame: The agent data.
98
+ """
99
+ agents = cls(agent_file)
100
+ return agents.agents
101
+
102
+ def _load_agents(self):
42
103
  """Loads in the agent file and drops any indices."""
43
104
  suffix = self.agent_file.suffix
44
105
  if suffix in (".pqt", ".parquet"):
@@ -52,27 +113,131 @@ class Agents:
52
113
  f"File types ending in {suffix} can't be read as pickle, parquet, or CSV"
53
114
  )
54
115
 
55
- self.agents = file_reader(self.agent_file)
116
+ self.agents = file_reader(self.agent_file, dtype_backend="pyarrow")
56
117
  if suffix == ".csv":
57
118
  self.agents = self.agents.reset_index(drop=True)
58
119
 
120
+ def prepare(self):
121
+ """Prepares the agent data so that it has the necessary columns required for modeling.
122
+
123
+ Steps:
124
+ 1. Extract `state_fips` from the `fips_code` column.
125
+ 2. If `census_tract_id` is missing, load and merge the 2020 census tracts
126
+ based on the `pgid` column.
127
+ 3. Convert the 2012 rev ID to the 2018 rev id in `rev_index_wind`.
128
+ 4. Attach the universal resource generation data.
129
+ """
130
+ self.config = Configuration(self.config)
59
131
  if "state_fips" not in self.agents.columns:
60
- self.agents["state_fips"] = self.agents["fips_code"].str[:2]
132
+ self.agents["state_fips"] = [el[:2] for el in self.agents["fips_code"]]
61
133
 
62
134
  if "census_tract_id" not in self.agents.columns:
63
- census_tracts = pd.read_csv(
64
- "/projects/dwind/configs/sizing/wind/lkup_block_to_pgid_2020.csv",
65
- dtype={"fips_block": str, "pgid": str},
135
+ self.merge_census_data()
136
+
137
+ self.update_rev_id()
138
+ self.merge_generation()
139
+
140
+ def save_agents(self, file_name: str | Path | None = None):
141
+ """Save the prepared agent data to the path provided in :py:attr:`file_name`.
142
+
143
+ Args:
144
+ file_name (str | Path, optional): Full file path and name for where to save the agent
145
+ data. Must end in a valid pickle, parquet, or csv extension. Defaults to None.
146
+
147
+ Raises:
148
+ ValueError: _description_
149
+ """
150
+ if file_name is None:
151
+ file_name = self.agent_file
152
+
153
+ suffix = file_name.suffix
154
+ if suffix in (".pqt", ".parquet"):
155
+ file_saver = self.agents.to_parquet
156
+ elif suffix in (".pkl", ".pickle"):
157
+ file_saver = self.agents.to_pickle
158
+ elif suffix == ".csv":
159
+ file_saver = self.agents.to_csv
160
+ else:
161
+ raise ValueError(
162
+ f"File types ending in {suffix} can't be read as pickle, parquet, or CSV"
66
163
  )
67
- census_tracts["census_tract_id"] = census_tracts["fips_block"].str[:11]
68
- census_tracts = census_tracts[["pgid", "census_tract_id"]]
69
- census_tracts = census_tracts.drop_duplicates()
70
- self.agents = self.agents.merge(census_tracts, how="left", on="pgid")
71
- self.agents = self.agents.drop_duplicates(subset=["gid"])
72
- self.agents = self.agents.reset_index(drop=True)
164
+
165
+ file_saver(file_name)
166
+
167
+ def merge_census_data(self):
168
+ """Merges the census tract identifies based on the agent "gid" and census "pgid" identifier
169
+ columns.
170
+ """
171
+ census_tracts = pd.read_csv(
172
+ "/projects/dwind/configs/sizing/wind/lkup_block_to_pgid_2020.csv",
173
+ usecols=["pgid", "fips_block"],
174
+ dtype=str,
175
+ dtype_backend="pyarrow",
176
+ ).drop_duplicates()
177
+ census_tracts["census_tract_id"] = [el[:11] for el in census_tracts["fips_block"]]
178
+ self.agents = (
179
+ self.agents.merge(census_tracts, how="left", on="pgid")
180
+ .drop_duplicates(subset=["gid"])
181
+ .reset_index(drop=True)
182
+ )
183
+
184
+ def update_rev_id(self):
185
+ """Update 2012 rev index to 2018 index."""
186
+ if self.resource_year != 2018:
187
+ return
188
+
189
+ index_file = "/projects/dwind/configs/rev/wind/lkup_rev_index_2012_to_2018.csv"
190
+ rev_index_map = (
191
+ pd.read_csv(
192
+ index_file,
193
+ usecols=["rev_index_wind_2012", "rev_index_wind_2018"],
194
+ dtype_backend="pyarrow",
195
+ )
196
+ .rename(columns={"rev_index_wind_2012": "rev_index_wind"})
197
+ .set_index("rev_index_wind")
198
+ )
199
+
200
+ ix_original = self.agents.index.name
201
+ if ix_original is None:
202
+ self.agents = (
203
+ self.agents.set_index("rev_index_wind", drop=True)
204
+ .join(rev_index_map, how="left")
205
+ .reset_index(drop=True)
206
+ .rename(columns={"rev_index_wind_2018": "rev_index_wind"})
207
+ .dropna(subset="rev_index_wind")
208
+ )
209
+ else:
210
+ self.agents = (
211
+ self.agents.reset_index(drop=False)
212
+ .set_index("rev_index_wind")
213
+ .join(rev_index_map, how="left")
214
+ .set_index(ix_original, drop=True)
215
+ .rename(columns={"rev_index_wind_2018": "rev_index_wind"})
216
+ .dropna(subset="rev_index_wind")
217
+ )
218
+
219
+ def merge_generation(self):
220
+ """Load and merge the resource potential data for the 2018 reV basis only. See
221
+ :py:class:`dwind.resource.ResourcePotential` for more information.
222
+ """
223
+ if self.resource_year != 2018:
224
+ return
225
+
226
+ # update 2012 rev cf/naep/aep to 2018 values
227
+ resource_potential = resource.ResourcePotential(
228
+ parcels=self.agents,
229
+ sector=self.sector,
230
+ year=self.resource_year,
231
+ model_config=self.config,
232
+ )
233
+ self.agents = resource_potential.match_rev_summary_to_agents()
73
234
 
74
235
 
75
236
  class Model:
237
+ """Primary dwind model that is responsible for gathering and connecting the various data points
238
+ by parcel.
239
+ """
240
+
76
241
  def __init__(
77
242
  self,
78
243
  agents: pd.DataFrame,
@@ -84,33 +249,51 @@ class Model:
84
249
  model_config: str | Path,
85
250
  chunk_ix: int | None = None,
86
251
  ):
252
+ """Initializes a :py:class:`Model` instance.
253
+
254
+ Args:
255
+ agents (pd.DataFrame): Prepared agent data with correct census and reV resource data
256
+ already attached.
257
+ location (str): Priority class or "<state>_<county>" string.
258
+ sector (str): One of "fom" (front-of-meter) or "btm" (behind-the-meter).
259
+ scenario (str): Currently only accepts "baseline" as an input.
260
+ year (int): One of 2022, 2025, 2035, or 2040 for the analysis year.
261
+ out_path (str | Path): Path for where to save any logging or output data.
262
+ model_config (str | Path): The overarching model configuration TOML file containing
263
+ key SQL connectors, data locations, and general model settings.
264
+ chunk_ix (int | None, optional): Integer representation of the chunk being run, if whole
265
+ agent file is being run, then None. Defaults to None.
266
+ """
87
267
  if chunk_ix is None:
88
268
  chunk_ix = 0
89
269
  self.agents = agents
90
270
  self.out_path = Path(out_path).resolve()
91
271
 
92
272
  self.full_scenario = f"{location}_{sector}_{scenario}_{year}"
93
- self.run_name = f"{self.full_scenario}_{chunk_ix}"
273
+ self.run_name = f"{self.full_scenario}"
274
+ if chunk_ix is not None:
275
+ self.run_name += f"_{chunk_ix}"
94
276
  self.location = location
95
- self.sector = sector
96
- self.scenario = scenario
97
- self.year = year
277
+ self.sector = Sector(sector)
278
+ self.scenario = Scenario(scenario)
279
+ self.year = Year(year)
98
280
  self.config = Configuration(model_config)
99
281
 
100
- self.init_logging()
282
+ self._init_logging()
101
283
 
102
284
  t_dict = self.config.rev.turbine_class_dict
103
- if self.sector == "fom":
285
+ if self.sector is Sector.FOM:
104
286
  apps = ["BTM, FOM", "BTM, FOM, Utility", "FOM, Utility"]
105
287
  self.agents["turbine_class"] = self.agents["wind_size_kw_fom"].map(t_dict)
106
- else:
288
+ elif self.sector is Sector.BTM:
107
289
  apps = ["BTM", "BTM, FOM", "BTM, FOM, Utility"]
108
290
  self.agents["turbine_class"] = self.agents["wind_size_kw"].map(t_dict)
109
291
 
110
292
  # filter by sector
111
293
  self.agents = self.agents[self.agents["application"].isin(apps)]
112
294
 
113
- def init_logging(self):
295
+ def _init_logging(self):
296
+ """Initializing the logging to :py:attr:`out_path` / logs / dwfs.txt."""
114
297
  log_dir = self.out_path / "logs"
115
298
  if not log_dir.exists():
116
299
  log_dir.mkdir()
@@ -126,40 +309,16 @@ class Model:
126
309
 
127
310
  self.log = logging.getLogger("dwfs")
128
311
 
129
- def get_gen(self, resource_year="2018"):
130
- if resource_year != "2018":
131
- return
132
-
133
- # update 2012 rev index to 2018 index
134
- f = "/projects/dwind/configs/rev/wind/lkup_rev_index_2012_to_2018.csv"
135
- lkup = pd.read_csv(f)[["rev_index_wind_2012", "rev_index_wind_2018"]]
136
-
137
- self.agents = (
138
- self.agents.merge(
139
- lkup, left_on="rev_index_wind", right_on="rev_index_wind_2012", how="left"
140
- )
141
- .drop(columns=["rev_index_wind", "rev_index_wind_2012"])
142
- .rename(columns={"rev_index_wind_2018": "rev_index_wind"})
143
- .dropna(subset="rev_index_wind")
144
- )
145
-
146
- # update 2012 rev cf/naep/aep to 2018 values
147
- # self.agents = self.agents.drop(columns=["wind_naep", "wind_cf", "wind_aep"])
148
- resource_potential = resource.ResourcePotential(
149
- parcels=self.agents,
150
- application=self.sector,
151
- year=resource_year,
152
- model_config=self.config,
153
- )
154
- self.agents = resource_potential.match_rev_summary_to_agents()
155
-
156
- def get_rates(self):
312
+ def _get_rates(self):
313
+ """Retrieves the tariff rates and merges them based on the "rate_id_alias" column."""
157
314
  self.agents = self.agents[~self.agents["rate_id_alias"].isna()]
158
315
  self.agents["rate_id_alias"] = self.agents["rate_id_alias"].astype(int)
159
316
  rate_ids = np.unique(self.agents.rate_id_alias.values)
160
317
 
161
318
  tariff = (
162
- pd.read_parquet("/projects/dwind/data/tariffs/2025_tariffs.pqt")
319
+ pd.read_parquet(
320
+ "/projects/dwind/data/tariffs/2025_tariffs.pqt", dtype_backend="pyarrow"
321
+ )
163
322
  .loc[rate_ids]
164
323
  .reset_index(drop=False) # , names="rate_id_alias")
165
324
  )
@@ -169,25 +328,27 @@ class Model:
169
328
 
170
329
  self.agents = self.agents.merge(tariff, how="left", on="rate_id_alias")
171
330
 
172
- def get_load(self):
173
- consumption_hourly = pd.read_parquet("/projects/dwind/data/crb_consumption_hourly.pqt")
174
-
175
- consumption_hourly["scale_offset"] = 1e8
176
- consumption_hourly = helper.scale_array_precision(
177
- consumption_hourly, "consumption_hourly", "scale_offset"
178
- )
179
-
180
- self.agents = self.agents.merge(
181
- consumption_hourly, how="left", on=["crb_model", "hdf_index"]
182
- )
331
+ def _get_load(self):
332
+ """Retrieves the energy demand data, combines it with the agent data based on
333
+ the agent "land_use", "bldg_type", and "state_fips" columns, then scales the
334
+ consumption data based on the load.
335
+ """
336
+ self.agents["crb_model_index"] = self.agents.crb_model.replace(
337
+ CRBModel.str_model_map()
338
+ ).astype("uint8[pyarrow]")
183
339
 
184
340
  # update load based on scaling factors from 2024 consumption data
185
341
  f = "/projects/dwind/data/parcel_landuse_load_application_mapping.csv"
186
- bldg_types = pd.read_csv(f)[["land_use", "bldg_type"]]
342
+ bldg_types = pd.read_csv(f, usecols=["land_use", "bldg_type"], dtype_backend="pyarrow")
187
343
  self.agents = self.agents.merge(bldg_types, on="land_use", how="left")
188
344
 
189
345
  f = "/projects/dwind/data/consumption/2024/load_scaling_factors.csv"
190
- sfs = pd.read_csv(f, dtype={"state_fips": str})[["state_fips", "bldg_type", "load_sf_2024"]]
346
+ sfs = pd.read_csv(
347
+ f,
348
+ dtype={"state_fips": str},
349
+ usecols=["state_fips", "bldg_type", "load_sf_2024"],
350
+ dtype_backend="pyarrow",
351
+ )
191
352
  self.agents = self.agents.merge(sfs, on=["state_fips", "bldg_type"], how="left")
192
353
  self.agents["load_kwh"] *= self.agents["load_sf_2024"]
193
354
  self.agents["max_demand_kw"] *= self.agents["load_sf_2024"]
@@ -197,13 +358,15 @@ class Model:
197
358
  # get county_id to nerc_region_abbr lkup
198
359
  # from diffusion_shared.county_nerc_join (dgen_db_fy23q4_ss23)
199
360
  f = "/projects/dwind/data/county_nerc_join.csv"
200
- nerc_regions = pd.read_csv(f)[["county_id", "nerc_region_abbr"]]
361
+ nerc_regions = pd.read_csv(
362
+ f, usecols=["county_id", "nerc_region_abbr"], dtype_backend="pyarrow"
363
+ )
201
364
  self.agents = self.agents.merge(nerc_regions, on=["county_id"], how="left")
202
365
 
203
366
  # get load growth projects from AEO
204
367
  # from diffusion_shared.aeo_load_growth_projections_nerc_2023_updt (dgen_db_fy23q4_ss23)
205
368
  f = "/projects/dwind/data/consumption/aeo_load_growth_projections_nerc_2023_updt.csv"
206
- load_growth = pd.read_csv(f)
369
+ load_growth = pd.read_csv(f, dtype_backend="pyarrow")
207
370
  load_growth = load_growth.loc[
208
371
  load_growth["scenario"].eq("AEO2023 Reference case")
209
372
  & load_growth["year"].eq(self.year),
@@ -218,20 +381,24 @@ class Model:
218
381
  self.agents["max_demand_kw"] *= self.agents["load_multiplier"]
219
382
  self.agents = self.agents.drop(columns=["load_multiplier", "nerc_region_abbr"])
220
383
 
221
- self.agents = helper.scale_array_sum(self.agents, "consumption_hourly", "load_kwh")
222
-
223
- def get_nem(self):
224
- if self.scenario == "metering":
384
+ def _get_nem(self):
385
+ """Retrieves the NEM data and merges with the agent data based on the "state_abbr" and
386
+ "sector_abbr" columns.
387
+ """
388
+ if self.scenario is Scenario.METERING:
225
389
  self.agents["compensation_style"] = "net metering"
226
390
  self.agents["nem_system_kw_limit"] = 1000000000
227
- elif self.scenario == "billing":
391
+ elif self.scenario is Scenario.BILLING:
228
392
  self.agents["compensation_style"] = "net billing"
229
393
  self.agents["nem_system_kw_limit"] = 1000000000
230
394
  else:
231
395
  cols = ["state_abbr", "sector_abbr", "compensation_style", "nem_system_kw_limit"]
232
396
  nem_scenario_csv = scenarios.config_nem(self.scenario, self.year)
233
397
  nem_df = (
234
- pd.read_csv(self.config.project.DIR / f"data/nem/{nem_scenario_csv}")
398
+ pd.read_csv(
399
+ self.config.project.DIR / f"data/nem/{nem_scenario_csv}",
400
+ dtype_backend="pyarrow",
401
+ )
235
402
  .rename(columns={"max_pv_kw_limit": "nem_system_kw_limit"})
236
403
  .loc[:, cols]
237
404
  )
@@ -251,18 +418,15 @@ class Model:
251
418
  ] = "net billing"
252
419
 
253
420
  def prepare_agents(self):
254
- # get generation data
255
- self.log.info("....fetching resource information")
256
- self.get_gen()
257
-
258
- if self.sector == "btm":
421
+ """Prepare the :py:attr:`tech`- and :py:attr:`sector`-specific agent data."""
422
+ if self.sector is Sector.BTM:
259
423
  # map tariffs
260
424
  self.log.info("....running with pre-processed tariffs")
261
- self.get_rates()
425
+ self._get_rates()
262
426
 
263
427
  # get hourly consumption
264
428
  self.log.info("....fetching hourly consumption")
265
- self.get_load()
429
+ self._get_load()
266
430
 
267
431
  if self.config.project.settings.SIZE_SYSTEMS:
268
432
  # size btm systems
@@ -271,9 +435,9 @@ class Model:
271
435
 
272
436
  # map nem policies
273
437
  self.log.info("....processing NEM for BTM systems")
274
- self.get_nem()
438
+ self._get_nem()
275
439
 
276
- if self.sector == "fom":
440
+ if self.sector is Sector.FOM:
277
441
  if self.config.project.settings.SIZE_SYSTEMS:
278
442
  # for fom agents, take largest wind turbine
279
443
  self.agents.sort_values(
@@ -313,16 +477,17 @@ class Model:
313
477
  )
314
478
 
315
479
  def run_valuation(self):
480
+ """Runs the valuation model to create the PySAM financial results."""
316
481
  valuer = valuation.ValueFunctions(self.scenario, self.year, self.config)
317
482
 
318
- if self.sector == "btm":
483
+ if self.sector is Sector.BTM:
319
484
  self.agents["application"] = "BTM"
320
485
 
321
486
  if len(self.agents) > 0:
322
487
  self.log.info("\n")
323
488
  self.log.info(f"starting valuation for {len(self.agents)} BTM agents")
324
489
 
325
- self.agents = valuer.run_multiprocessing(self.agents, sector="btm")
490
+ self.agents = valuer.run(agents=self.agents, sector=self.sector)
326
491
 
327
492
  self.log.info("null counts:")
328
493
  self.log.info(self.agents.isnull().sum().sort_values())
@@ -342,14 +507,14 @@ class Model:
342
507
  else:
343
508
  self.agents = pd.DataFrame()
344
509
 
345
- if self.sector == "fom":
510
+ if self.sector is Sector.FOM:
346
511
  self.agents["application"] = "FOM"
347
512
 
348
513
  if len(self.agents) > 0:
349
514
  self.log.info("\n")
350
515
  self.log.info(f"starting valuation for {len(self.agents)} FOM agents")
351
516
 
352
- self.agents = valuer.run_multiprocessing(self.agents, configuration="fom")
517
+ self.agents = valuer.run(agents=self.agents, sector=self.sector)
353
518
 
354
519
  self.log.info("null counts:")
355
520
  self.log.info(self.agents.isnull().sum().sort_values())
@@ -367,5 +532,6 @@ class Model:
367
532
  self.agents = pd.DataFrame()
368
533
 
369
534
  def run(self):
535
+ """Runs the whole model."""
370
536
  self.prepare_agents()
371
537
  self.run_valuation()