dwind 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwind/resource.py CHANGED
@@ -1,23 +1,75 @@
1
+ """Provides the :py:class:`ResourcePotential` class for gathering pre-calculated reV generation
2
+ data.
3
+ """
4
+
1
5
  import h5py as h5
2
6
  import pandas as pd
3
7
 
4
- from dwind import Configuration
8
+ from dwind.config import Sector, Technology, Configuration
5
9
 
6
10
 
7
11
  class ResourcePotential:
12
+ """Helper class designed to retrieve pre-calculated energy generation data from reV."""
13
+
8
14
  def __init__(
9
- self, parcels, model_config: Configuration, tech="wind", application="fom", year="2018"
15
+ self,
16
+ parcels: pd.DataFrame,
17
+ model_config: Configuration,
18
+ sector: Sector,
19
+ tech: str = "wind",
20
+ year: int = 2018,
10
21
  ):
22
+ """Initializes the :py:class:`ResourcePotential` instance.
23
+
24
+ Args:
25
+ parcels (pd.DataFrame): The agent DataFrame containing at least the following columns:
26
+ "gid", "rev_gid_{tech}", "solar_az_tilt" (solar only), "azimuth_{sector}"
27
+ (solar only), "tilt_{tech}" (solar only), "turbine_class" (wind only),
28
+ "wind_turbine_kw" (wind only), and "turbine_height_m" (wind only).
29
+ model_config (Configuration): The pre-loaded model configuration data object containing
30
+ the requisite SQL, file, and configuration data.
31
+ sector (dwind.config.Sector): A valid sector instance.
32
+ tech (str, optional): One of "solar" or "wind". Defaults to "wind".
33
+ year (int, optional): Resource year for the reV lookup. Defaults to 2018.
34
+
35
+ Raises:
36
+ ValueError: Raised if :py:attr:`parcels:` is missing any of the required columns.
37
+ """
11
38
  self.df = parcels
12
- self.tech = tech
13
- self.application = application
39
+ self.tech = Technology(tech)
40
+ self.sector = sector
14
41
  self.year = year
15
42
  self.config = model_config
16
43
 
17
- if self.tech not in ("wind", "solar"):
18
- raise ValueError("`tech` must be one of 'solar' or 'wind'.")
19
-
20
- def create_rev_gid_to_summary_lkup(self, configs, save_csv=True):
44
+ solar_cols = ("solar_az_tilt", f"azimuth_{self.sector.value}", f"tilt_{self.tech.value}")
45
+ # wind_cols = ("turbine_class", "wind_turbine_kw", "turbine_height_m")
46
+ wind_cols = ("wind_turbine_kw", "turbine_height_m")
47
+
48
+ if self.tech is Technology.WIND:
49
+ cols = wind_cols
50
+ elif self.tech is Technology.SOLAR:
51
+ cols = solar_cols
52
+
53
+ missing = set(cols).difference(self.df.columns.tolist())
54
+ if missing:
55
+ raise ValueError(f"`parcels` is missing the following columns: {', '.join(missing)}")
56
+
57
+ def create_rev_gid_to_summary_lkup(
58
+ self, configs: list[str], *, save_csv: bool = True
59
+ ) -> pd.DataFrame:
60
+ """Creates the reV summary tables based on the "gid" mappings in :py:attr:`parcels`.
61
+
62
+ Args:
63
+ configs (list[str]): The list of technology-specific configurations where the generation
64
+ data should be retrieved.
65
+ save_csv (bool, optional): If True, save the resulting lookup calculated from reV to the
66
+ reV folder definied in ``Configuration.rev.generation.{tech}_DIR``. Defaults to
67
+ True.
68
+
69
+ Returns:
70
+ pd.DataFrame: reV generation lookup table for the technology-specific configurations in
71
+ :py:attr:`configs`.
72
+ """
21
73
  config_dfs = []
22
74
  for c in configs:
23
75
  file_str = self.config.rev.DIR / f"rev_{c}_generation_{self.year}.h5"
@@ -30,10 +82,10 @@ class ResourcePotential:
30
82
 
31
83
  config_df = pd.concat([rev_index, gids, annual_energy, cf_mean], axis=1)
32
84
  config_df.columns = [
33
- f"rev_index_{self.tech}",
34
- f"rev_gid_{self.tech}",
35
- f"{self.tech}_naep",
36
- f"{self.tech}_cf",
85
+ f"rev_index_{self.tech.value}",
86
+ f"rev_gid_{self.tech.value}",
87
+ f"{self.tech.value}_naep",
88
+ f"{self.tech.value}_cf",
37
89
  ]
38
90
 
39
91
  config_df["config"] = c
@@ -43,87 +95,107 @@ class ResourcePotential:
43
95
 
44
96
  if save_csv:
45
97
  save_name = (
46
- self.config.rev.generation[f"{self.tech}_DIR"]
47
- / f"lkup_rev_gid_to_summary_{self.tech}_{self.year}.csv"
98
+ self.config.rev.generation[f"{self.tech.value}_DIR"]
99
+ / f"lkup_rev_gid_to_summary_{self.tech.value}_{self.year}.csv"
48
100
  )
49
101
  summary_df.to_csv(save_name, index=False)
50
102
 
51
103
  return summary_df
52
104
 
53
105
  def find_rev_summary_table(self):
54
- if self.tech == "solar":
106
+ """Creates the generation summary data for each of the :py:attr:`tech`-specific
107
+ configurations specified in :py:attr:`config.rev.settings.{tech}`, then maps it to the
108
+ agent data (:py:attr:`parcels`), overwriting any previously computed data.
109
+ """
110
+ if self.tech is Technology.SOLAR:
55
111
  configs = self.config.rev.settings.solar
56
112
  config_col = "solar_az_tilt"
57
- col_list = ["gid", f"rev_gid_{self.tech}", config_col]
58
- self.df[config_col] = self.df[f"azimuth_{self.application}"].map(
113
+ col_list = ["gid", f"rev_gid_{self.tech.value}", config_col]
114
+ self.df[config_col] = self.df[f"azimuth_{self.sector.value}"].map(
59
115
  self.config.rev.settings.azimuth_direction_to_degree
60
116
  )
61
117
  self.df[config_col] = (
62
- self.df[config_col].astype(str) + "_" + self.df[f"tilt_{self.tech}"].astype(str)
118
+ self.df[config_col].astype(str)
119
+ + "_"
120
+ + self.df[f"tilt_{self.tech.value}"].astype(str)
63
121
  )
64
- elif self.tech == "wind":
122
+ elif self.tech is Technology.WIND:
65
123
  configs = self.config.rev.settings.wind
66
124
  config_col = "turbine_class"
67
125
  col_list = [
68
126
  "gid",
69
- f"rev_gid_{self.tech}",
127
+ f"rev_gid_{self.tech.value}",
70
128
  config_col,
71
129
  "turbine_height_m",
72
130
  "wind_turbine_kw",
73
131
  ]
74
132
  self.df[config_col] = self.df["wind_turbine_kw"].map(self.config.rev.turbine_class_dict)
75
133
 
76
- out_cols = [*col_list, f"rev_index_{self.tech}", f"{self.tech}_naep", f"{self.tech}_cf"]
77
-
78
- drop_cols = [f"rev_gid_{self.tech}", f"{self.tech}_naep", f"{self.tech}_cf"]
134
+ out_cols = [
135
+ *col_list,
136
+ f"rev_index_{self.tech.value}",
137
+ f"{self.tech.value}_naep",
138
+ f"{self.tech.value}_cf",
139
+ ]
140
+
141
+ drop_cols = [
142
+ f"rev_gid_{self.tech.value}",
143
+ f"{self.tech.value}_naep",
144
+ f"{self.tech.value}_cf",
145
+ ]
79
146
  self.df = self.df.drop(columns=[c for c in drop_cols if c in self.df])
80
147
 
81
148
  f_gen = (
82
- self.config.rev.generation[f"{self.tech}_DIR"]
83
- / f"lkup_rev_gid_to_summary_{self.tech}_{self.year}.csv"
149
+ self.config.rev.generation[f"{self.tech.value}_DIR"]
150
+ / f"lkup_rev_gid_to_summary_{self.tech.value}_{self.year}.csv"
84
151
  )
85
152
 
86
153
  if f_gen.exists():
87
- generation_summary = pd.read_csv(f_gen)
154
+ generation_summary = pd.read_csv(f_gen, dtype_backend="pyarrow")
88
155
  else:
89
156
  generation_summary = self.create_rev_gid_to_summary_lkup(configs)
90
157
 
91
158
  generation_summary = (
92
159
  generation_summary.reset_index(drop=True)
93
- .drop_duplicates(subset=[f"rev_index_{self.tech}", "config"])
160
+ .drop_duplicates(subset=[f"rev_index_{self.tech.value}", "config"])
94
161
  .rename(columns={"config": config_col})
95
162
  )
96
163
  agents = self.df.merge(
97
- generation_summary, how="left", on=[f"rev_index_{self.tech}", config_col]
164
+ generation_summary, how="left", on=[f"rev_index_{self.tech.value}", config_col]
98
165
  )
99
166
  return agents[out_cols]
100
167
 
101
168
  def prepare_agents_for_gen(self):
102
- # create lookup column based on each tech
103
- if self.tech == "wind":
169
+ """Create lookup column based on each technology."""
170
+ if self.tech is Technology.WIND:
104
171
  # drop wind turbine size duplicates
105
172
  # SINCE WE ASSUME ANY TURBINE IN A GIVEN CLASS HAS THE SAME POWER CURVE
106
173
  self.df.drop_duplicates(subset=["gid", "wind_size_kw"], keep="last", inplace=True)
107
- # if running FOM application, only consider a single (largest) turbine size
108
- if self.application == "fom":
174
+ # if running FOM sector, only consider a single (largest) turbine size
175
+ if self.sector is Sector.FOM:
109
176
  self.df = self.df.loc[self.df["wind_size_kw"] == self.df["wind_size_kw_fom"]]
110
177
 
111
178
  self.df["turbine_class"] = self.df["wind_turbine_kw"].map(
112
179
  self.config.rev.turbine_class_dict
113
180
  )
114
181
 
115
- if self.tech == "solar":
116
- # NOTE: tilt and azimuth are application-specific
117
- self.df["solar_az_tilt"] = self.df[f"azimuth_{self.application}"].map(
182
+ if self.tech is Technology.SOLAR:
183
+ # NOTE: tilt and azimuth are sector-specific
184
+ self.df["solar_az_tilt"] = self.df[f"azimuth_{self.sector.value}"].map(
118
185
  self.config.rev.settings.azimuth_direction_to_degree
119
186
  )
120
187
  self.df["solar_az_tilt"] = self.df["solar_az_tilt"].astype(str)
121
188
  self.df["solar_az_tilt"] = (
122
- self.df["solar_az_tilt"] + "_" + self.df[f"tilt_{self.application}"].astype(str)
189
+ self.df["solar_az_tilt"] + "_" + self.df[f"tilt_{self.sector.value}"].astype(str)
123
190
  )
124
191
 
125
- def merge_gen_to_agents(self, tech_agents):
126
- if self.tech == "wind":
192
+ def merge_gen_to_agents(self, tech_agents: pd.DataFrame):
193
+ """Merges :py:attr:`tech_agents` to the parcel data :py:attr:`df`.
194
+
195
+ Args:
196
+ tech_agents (pd.DataFrame): The technology-specific energy generation data.
197
+ """
198
+ if self.tech is Technology.WIND:
127
199
  cols = ["turbine_height_m", "wind_turbine_kw", "turbine_class"]
128
200
  else:
129
201
  # NOTE: need to drop duplicates in solar agents
@@ -133,16 +205,23 @@ class ResourcePotential:
133
205
  )
134
206
  cols = ["solar_az_tilt"]
135
207
 
136
- cols.extend(["gid", f"rev_index_{self.tech}"])
208
+ cols.extend(["gid", f"rev_index_{self.tech.value}"])
137
209
 
138
210
  self.df = self.df.merge(tech_agents, how="left", on=cols)
139
211
 
140
212
  def match_rev_summary_to_agents(self):
213
+ """Runs the energy generation gathering and merging steps, and retursns back the updated
214
+ :py:attr:`df` agent/parcel data.
215
+
216
+ Returns:
217
+ pd.DataFrame: Updated agent/parcel data with rec/alculated "wind_aep" or "solar_aep"
218
+ information for each agent.
219
+ """
141
220
  self.prepare_agents_for_gen()
142
221
  tech_agents = self.find_rev_summary_table()
143
222
  self.merge_gen_to_agents(tech_agents)
144
223
 
145
- if self.tech == "wind":
224
+ if self.tech is Technology.WIND:
146
225
  # fill nan generation values
147
226
  self.df = self.df.loc[
148
227
  ~((self.df["wind_naep"].isnull()) & (self.df["turbine_class"] != "none"))
@@ -153,7 +232,7 @@ class ResourcePotential:
153
232
  # calculate annual energy production (aep)
154
233
  self.df["wind_aep"] = self.df["wind_naep"] * self.df["wind_turbine_kw"]
155
234
  # self.df = self.df.drop(columns="turbine_class")
156
- else:
235
+ elif self.tech is Technology.SOLAR:
157
236
  # fill nan generation values
158
237
  self.df = self.df.loc[~(self.df["solar_naep"].isnull())]
159
238
  # size groundmount system to equal wind aep
dwind/scenarios.py CHANGED
@@ -1,48 +1,80 @@
1
+ """Provides the scenario-specific mapping for varying financial and model configuration data."""
2
+
1
3
  import json
2
4
  from pathlib import Path
3
5
 
4
6
  import pandas as pd
5
7
 
8
+ from dwind.config import Year, Scenario
6
9
 
7
- def config_nem(scenario, year):
8
- # NEM_SCENARIO_CSV
9
- nem_opt_scens = ["highrecost", "lowrecost", "re100"]
10
- # nem_opt_scens = ['der_value_HighREcost', 'der_value_LowREcost', 're_100']
11
- if scenario in nem_opt_scens:
12
- nem_scenario_csv = "nem_optimistic_der_value_2035.csv"
13
- elif scenario == "baseline" and year in (2022, 2025, 2035):
14
- nem_scenario_csv = f"nem_baseline_{year}.csv"
15
- else:
16
- nem_scenario_csv = "nem_baseline_2035.csv"
17
10
 
18
- return nem_scenario_csv
11
+ def config_nem(scenario: Scenario, year: Year) -> str:
12
+ """Provides NEM configuration based on :py:attr:`scenario` and :py:attr:`year`.
19
13
 
14
+ Args:
15
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
16
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
20
17
 
21
- def config_cambium(scenario):
22
- # CAMBIUM_SCENARIO
23
- if scenario == "highrecost" or scenario == "re100":
24
- cambium_scenario = "StdScen20_HighRECost"
25
- elif scenario == "lowrecost":
26
- cambium_scenario = "StdScen20_LowRECost"
27
- else:
28
- # cambium_scenario = "StdScen20_MidCase"
29
- cambium_scenario = "Cambium23_MidCase"
18
+ Returns:
19
+ str: Name of the NEM scenario file to use.
20
+ """
21
+ if scenario in (Scenario.HIGHRECOST, Scenario.LOWRECOST, Scenario.RE100):
22
+ return "nem_optimistic_der_value_2035.csv"
23
+
24
+ if scenario is Scenario.BASELINE and year in (Year._2022, Year._2025, Year._2035):
25
+ return f"nem_baseline_{year.value}.csv"
26
+
27
+ return "nem_baseline_2035.csv"
28
+
29
+
30
+ def config_cambium(scenario: Scenario) -> str:
31
+ """Loads the cambium configuration name based on :py:attr:`scenario`.
32
+
33
+ Args:
34
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
30
35
 
31
- return cambium_scenario
36
+ Returns:
37
+ str: Name of the Cambium scenario to use.
38
+ """
39
+ if scenario in (Scenario.HIGHRECOST, Scenario.RE100):
40
+ return "StdScen20_HighRECost"
32
41
 
42
+ if scenario is Scenario.LOWRECOST:
43
+ return "StdScen20_LowRECost"
33
44
 
34
- def config_costs(scenario, year):
35
- # COST_INPUTS
36
- f = Path(f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario}_{year}.json").resolve()
45
+ return "Cambium23_MidCase"
46
+
47
+
48
+ def config_costs(scenario: Scenario, year: Year) -> dict:
49
+ """Loads the cost configuration based on the ATB analysis.
50
+
51
+ Args:
52
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
53
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
54
+
55
+ Returns:
56
+ dict: Dictionary of ATB assumptions to be used for PySAM's cost inputs.
57
+ """
58
+ f = Path(
59
+ f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario.value}_{year.value}.json"
60
+ ).resolve()
37
61
  with f.open("r") as f_in:
38
62
  cost_inputs = json.load(f_in)
39
63
 
40
64
  return cost_inputs
41
65
 
42
66
 
43
- def config_performance(scenario, year):
44
- # PERFORMANCE_INPUTS
45
- if scenario == "baseline" and year == 2022:
67
+ def config_performance(scenario: Scenario, year: Year) -> pd.DataFrame:
68
+ """Loads the technology performance configurations.
69
+
70
+ Args:
71
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
72
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
73
+
74
+ Returns:
75
+ pd.DataFrame: Performance data based on the scale of each technology.
76
+ """
77
+ if scenario is Scenario.BASELINE and year is Year._2022:
46
78
  performance_inputs = {
47
79
  "solar": pd.DataFrame(
48
80
  [
@@ -108,16 +140,21 @@ def config_performance(scenario, year):
108
140
  return performance_inputs
109
141
 
110
142
 
111
- def config_financial(scenario, year):
112
- # FINANCIAL_INPUTS
113
- scenarios = ("baseline", "metering", "billing")
114
- if scenario in scenarios and year == 2025:
143
+ def config_financial(scenario: Scenario, year: Year) -> dict:
144
+ """Loads the financial configuration based on the ATB analysis.
145
+
146
+ Args:
147
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
148
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
149
+
150
+ Returns:
151
+ dict: Dictionary of ATB assumptions to be used for configuration PySAM.
152
+ """
153
+ if year is Year._2025:
115
154
  f = f"/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_{year}.json"
116
- i = Path("/projects/dwind/data/incentives/2025_incentives.json").resolve()
117
- with i.open("r") as i_in:
118
- incentives = pd.DataFrame.from_dict(json.load(i_in)).T
119
- incentives.index.name = "census_tract_id"
120
- elif scenario in scenarios and year in (2035, 2040):
155
+ i = Path("/projects/dwind/data/incentives/2025_incentives.pqt").resolve()
156
+ incentives = pd.read_parquet(i, dtype_backend="pyarrow")
157
+ elif year in (Year._2035, Year._2040):
121
158
  f = "/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_2035.json"
122
159
  else:
123
160
  # use old assumptions
dwind/utils/array.py CHANGED
@@ -1,11 +1,25 @@
1
+ """Provides a series of generic NumPy and Pandas utility functions."""
2
+
1
3
  from __future__ import annotations
2
4
 
3
5
  import numpy as np
4
6
  import pandas as pd
5
7
 
6
8
 
7
- def memory_downcaster(df):
8
- assert isinstance(df, pd.DataFrame) | isinstance(df, pd.Series)
9
+ def memory_downcaster(df: pd.DataFrame | pd.Series) -> pd.DataFrame | pd.Series:
10
+ """Downcasts ``int`` and ``float`` columns to the lowest memory alternative possible. For
11
+ integers this means converting to either signed or unsigned 8-, 16-, 32-, or 64-bit integers,
12
+ and for floats, converting to ``np.float32``.
13
+
14
+ Args:
15
+ df (pd.DataFrame | pd.Series): DataFrame or Series to have its memory footprint reduced.
16
+
17
+ Returns:
18
+ pd.DataFrame | pd.Series: Reduced footprint version of the passed :py:attr:`df`.
19
+ """
20
+ # if not isinstance(df, pd.DataFrame | pd.Series):
21
+ if not isinstance(df, (pd.DataFrame, pd.Series)): # noqa
22
+ raise TypeError("Input value must be a Pandas DataFrame or Series.")
9
23
 
10
24
  NAlist = []
11
25
  for col in df.select_dtypes(include=[np.number]).columns:
@@ -56,93 +70,6 @@ def memory_downcaster(df):
56
70
  return df
57
71
 
58
72
 
59
- def interpolate_array(row, col_1, col_2, col_in, col_out):
60
- if row[col_in] != 0:
61
- interpolated = row[col_in] * (row[col_2] - row[col_1]) + row[col_1]
62
- else:
63
- interpolated = row[col_1]
64
-
65
- row[col_out] = interpolated
66
-
67
- return row
68
-
69
-
70
- def scale_array_precision(df: pd.DataFrame, hourly_col: str, prec_offset_col: str):
71
- """Scales the precision of :py:attr:`hourly_col` by the :py:attr:`prec_offset_col`.
72
-
73
- Args:
74
- df (pd.DataFrame): A Pandas DataFrame containing :py:att:`hourly_col` and
75
- :py:att:`prec_offset_col`.
76
- hourly_col (str) The column to adjust the precision.
77
- prec_offset_col (str): The column for scaling the precison of :py:attr:`hourly_col`.
78
-
79
- Returns:
80
- pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`hourly_col` scaled.
81
- """
82
- df[hourly_col] = (
83
- np.array(df[hourly_col].values.tolist(), dtype="float64")
84
- / df[prec_offset_col].values.reshape(-1, 1)
85
- ).tolist()
86
- return df
87
-
88
-
89
- def scale_array_deprecision(df: pd.DataFrame, col: str | list[str]) -> pd.DataFrame:
90
- """Rounds the column(s) :py:attr:`col` to the nearest 2nd decimal and converts to NumPy's
91
- float32.
92
-
93
- Args:
94
- df (pd.DataFrame): A Pandas DataFrame containing :py:att:`col`.
95
- col (str | list[str]): The column(s) to have reduced precision.
96
-
97
- Returns:
98
- pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`col` lowered.
99
- """
100
- df[col] = np.round(np.round(df[col], 2).astype(np.float32), 2)
101
- return df
102
-
103
-
104
- def scale_array_sum(df: pd.DataFrame, hourly_col: str, scale_col: str) -> pd.DataFrame:
105
- """Scales the :py:attr:`hourly_col` by its sum and multiples by the :py:attr:`scale_col`.
106
-
107
- Args:
108
- df (pd.DataFrame): Pandas DataFrame containing the :py:attr:`hourly_col` and
109
- :py:attr:`scale_col`.
110
- hourly_col (str): The name of the column to be scaled whose values are lists.
111
- scale_col (str): The column to scale the :py:attr:`hourly_col`.
112
-
113
- Returns:
114
- pandas.DataFrame: The input dataframe, but with the values of the :py:attr:`hourly_col`
115
- scaled appropriately.
116
- """
117
- hourly_array = np.array(df[hourly_col].values.tolist())
118
- df[hourly_col] = (
119
- hourly_array / hourly_array.sum(axis=1).reshape(-1, 1) * df[scale_col].values.reshape(-1, 1)
120
- ).tolist()
121
- return df
122
-
123
-
124
- def scale_array_multiplier(
125
- df: pd.DataFrame, hourly_col: str, multiplier_col: str, col_out: str
126
- ) -> pd.DataFrame:
127
- """Scales the :py:attr:hourly_col` values by the :py:attr:`multiplier_col`, and places it in
128
- the :py:attr:`col_out`.
129
-
130
- Args:
131
- df (pd.DataFrame): The Pandas DataFrame containing the :py:attr:`hourly_col` and
132
- :py:attr:`multiplier_col`.
133
- hourly_col (str): A column of hourly values as a list of floats in each cell.
134
- multiplier_col (str): The column used to scale the :py:attr:`hourly_col`.
135
- col_out (str): A new column that will contain the scaled data.
136
-
137
- Returns:
138
- pd.DataFrame: A new copy of the original data (:py:attr:`df`) containing the
139
- :py:attr:`col_out` column.
140
- """
141
- hourly_array = np.array(df[hourly_col].values.tolist())
142
- df[col_out] = (hourly_array * df[multiplier_col].values.reshape(-1, 1)).tolist()
143
- return df
144
-
145
-
146
73
  def split_by_index(
147
74
  arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
148
75
  ) -> tuple[np.ndarray, np.ndarray]:
dwind/utils/hpc.py CHANGED
@@ -1,5 +1,14 @@
1
+ """Provides the live timing table functionalities for the Kestrel :py:class:`MultiProcess` class."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import re
1
7
  import time
8
+ import subprocess
9
+ from copy import deepcopy
2
10
 
11
+ import pandas as pd
3
12
  from rich.table import Table
4
13
  from rex.utilities.hpc import SLURM
5
14
 
@@ -68,7 +77,7 @@ def update_status(job_status: dict) -> dict:
68
77
  return update
69
78
 
70
79
 
71
- def generate_table(job_status: dict) -> tuple[Table, bool]:
80
+ def generate_run_status_table(job_status: dict) -> tuple[Table, bool]:
72
81
  """Generate the job status run time statistics table.
73
82
 
74
83
  Args:
@@ -92,5 +101,38 @@ def generate_table(job_status: dict) -> tuple[Table, bool]:
92
101
  table.add_row(
93
102
  job, status, convert_seconds_for_print(_wait), convert_seconds_for_print(_run)
94
103
  )
95
- done = all(el["status"] in ("CG", None) for el in job_status.values())
104
+ done = all(el["status"] in ("CG", "CF", None) for el in job_status.values())
96
105
  return table, done
106
+
107
+
108
+ def get_finished_run_status(jobs: int | str | list[int | str]) -> dict[str, str]:
109
+ """Extracts a dictionary of job_id and status from the ``sacct`` output for a single
110
+ job or series of jobs.
111
+
112
+ Args:
113
+ jobs (int | str | list[int | str]): Single job ID or list of job IDs that have finished
114
+ running.
115
+
116
+ Returns:
117
+ dict[str, str]: Dictionary of {job_id_1: status_1, ..., job_id_N: status_N}.
118
+ """
119
+ if isinstance(jobs, (int, str)): # noqa
120
+ jobs = [jobs]
121
+ jobs = [str(j) for j in jobs]
122
+
123
+ # Format the command to be in the form of [sacct, -j, job_id_1, ..., -j job_id_N]
124
+ command = deepcopy(jobs)
125
+ for i in range(len(command) - 1, -1, -1):
126
+ command.insert(i, "-j")
127
+ command.insert(0, "sacct")
128
+ results = subprocess.check_output(command)
129
+
130
+ # Convert the sacct string output to be table-like
131
+ buffer = io.StringIO(results.decode("utf8", "ignore"))
132
+ lines = [re.split(" +", line) for line in buffer.readlines() if not line.startswith("-")]
133
+
134
+ # Create a dataframe, and export a dictionary of the form job_id: job_status
135
+ df = pd.DataFrame(lines[1:], columns=lines[0])
136
+ df = df.loc[df.JobID.isin(jobs), ["JobID", "State"]]
137
+ df.JobID = df.JobID.astype(int)
138
+ return dict(df.values.tolist())
dwind/utils/loader.py ADDED
@@ -0,0 +1,63 @@
1
+ """Provides the core data loading methods for importing scenario data from flat files or SQL."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+ from sqlalchemy import create_engine
9
+
10
+ from dwind.config import Year
11
+
12
+
13
+ def load_df(file_or_table: str | Path, year: Year | None, sql_constructor: str | None = None):
14
+ """Loads data from either a SQL table or file to a pandas ``DataFrame``.
15
+
16
+ Args:
17
+ file_or_table (str | Path): File name or path object, or SQL table where the data are
18
+ located.
19
+ year (:py:class:`dwind.config.Year`, optional): If used, only extracts the single year from
20
+ a column called "year". Defaults to None.
21
+ sql_constructor (str | None, optional): The SQL engine constructor string. Required if
22
+ extracting from SQL. Defaults to None.
23
+ """
24
+ valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
25
+ if str(file_or_table).endswith(valid_extenstions):
26
+ return _load_from_file(filename=file_or_table, year=year)
27
+
28
+ return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
29
+
30
+
31
+ def _load_from_file(filename: str | Path, year: Year | None) -> pd.DataFrame:
32
+ """Loads tabular data from a file to a ``pandas.DataFrame``."""
33
+ if isinstance(filename, str):
34
+ filename = Path(filename).resolve()
35
+ if not isinstance(filename, Path):
36
+ raise TypeError(f"`filename` must be a valid path, not {filename=}")
37
+
38
+ if filename.suffix == ".csv":
39
+ df = pd.read_csv(filename, dtype_backend="pyarrow")
40
+ elif filename.suffix in (".parquet", ".pqt"):
41
+ df = pd.read_parquet(filename, dtype_backend="pyarrow")
42
+ elif filename.suffix in (".pickle", ".pkl"):
43
+ df = pd.read_pickle(filename, dtype_backend="pyarrow")
44
+ else:
45
+ raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
46
+
47
+ if year is not None:
48
+ df = df.loc[df.year == year]
49
+
50
+ return df
51
+
52
+
53
+ def _load_from_sql(table: str, sql_constructor: str, year: Year | None) -> pd.DataFrame:
54
+ """Load tabular data from SQL."""
55
+ where = f"where year = {year}" if year is not None else ""
56
+ sql = f"""select * from diffusion_shared."{table}" {where};"""
57
+ atlas_engine = create_engine(sql_constructor)
58
+
59
+ with atlas_engine.connect() as conn:
60
+ df = pd.read_sql(sql, con=conn.connection, dtype_backend="pyarrow")
61
+
62
+ atlas_engine.dispose()
63
+ return df