dwind 0.3__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwind/scenarios.py CHANGED
@@ -1,48 +1,80 @@
1
+ """Provides the scenario-specific mapping for varying financial and model configuration data."""
2
+
1
3
  import json
2
4
  from pathlib import Path
3
5
 
4
6
  import pandas as pd
5
7
 
8
+ from dwind.config import Year, Scenario
6
9
 
7
- def config_nem(scenario, year):
8
- # NEM_SCENARIO_CSV
9
- nem_opt_scens = ["highrecost", "lowrecost", "re100"]
10
- # nem_opt_scens = ['der_value_HighREcost', 'der_value_LowREcost', 're_100']
11
- if scenario in nem_opt_scens:
12
- nem_scenario_csv = "nem_optimistic_der_value_2035.csv"
13
- elif scenario == "baseline" and year in (2022, 2025, 2035):
14
- nem_scenario_csv = f"nem_baseline_{year}.csv"
15
- else:
16
- nem_scenario_csv = "nem_baseline_2035.csv"
17
10
 
18
- return nem_scenario_csv
11
+ def config_nem(scenario: Scenario, year: Year) -> str:
12
+ """Provides NEM configuration based on :py:attr:`scenario` and :py:attr:`year`.
19
13
 
14
+ Args:
15
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
16
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
20
17
 
21
- def config_cambium(scenario):
22
- # CAMBIUM_SCENARIO
23
- if scenario == "highrecost" or scenario == "re100":
24
- cambium_scenario = "StdScen20_HighRECost"
25
- elif scenario == "lowrecost":
26
- cambium_scenario = "StdScen20_LowRECost"
27
- else:
28
- # cambium_scenario = "StdScen20_MidCase"
29
- cambium_scenario = "Cambium23_MidCase"
18
+ Returns:
19
+ str: Name of the NEM scenario file to use.
20
+ """
21
+ if scenario in (Scenario.HIGHRECOST, Scenario.LOWRECOST, Scenario.RE100):
22
+ return "nem_optimistic_der_value_2035.csv"
23
+
24
+ if scenario is Scenario.BASELINE and year in (Year._2022, Year._2025, Year._2035):
25
+ return f"nem_baseline_{year.value}.csv"
26
+
27
+ return "nem_baseline_2035.csv"
28
+
29
+
30
+ def config_cambium(scenario: Scenario) -> str:
31
+ """Loads the cambium configuration name based on :py:attr:`scenario`.
32
+
33
+ Args:
34
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
35
+
36
+ Returns:
37
+ str: Name of the Cambium scenario to use.
38
+ """
39
+ if scenario in (Scenario.HIGHRECOST, Scenario.RE100):
40
+ return "StdScen20_HighRECost"
30
41
 
31
- return cambium_scenario
42
+ if scenario is Scenario.LOWRECOST:
43
+ return "StdScen20_LowRECost"
32
44
 
45
+ return "Cambium23_MidCase"
33
46
 
34
- def config_costs(scenario, year):
35
- # COST_INPUTS
36
- f = Path(f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario}_{year}.json").resolve()
47
+
48
+ def config_costs(scenario: Scenario, year: Year) -> dict:
49
+ """Loads the cost configuration based on the ATB analysis.
50
+
51
+ Args:
52
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
53
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
54
+
55
+ Returns:
56
+ dict: Dictionary of ATB assumptions to be used for PySAM's cost inputs.
57
+ """
58
+ f = Path(
59
+ f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario.value}_{year.value}.json"
60
+ ).resolve()
37
61
  with f.open("r") as f_in:
38
62
  cost_inputs = json.load(f_in)
39
63
 
40
64
  return cost_inputs
41
65
 
42
66
 
43
- def config_performance(scenario, year):
44
- # PERFORMANCE_INPUTS
45
- if scenario == "baseline" and year == 2022:
67
+ def config_performance(scenario: Scenario, year: Year) -> pd.DataFrame:
68
+ """Loads the technology performance configurations.
69
+
70
+ Args:
71
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
72
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
73
+
74
+ Returns:
75
+ pd.DataFrame: Performance data based on the scale of each technology.
76
+ """
77
+ if scenario is Scenario.BASELINE and year is Year._2022:
46
78
  performance_inputs = {
47
79
  "solar": pd.DataFrame(
48
80
  [
@@ -108,15 +140,21 @@ def config_performance(scenario, year):
108
140
  return performance_inputs
109
141
 
110
142
 
111
- def config_financial(scenario, year):
112
- # FINANCIAL_INPUTS
113
- scenarios = ("baseline", "metering", "billing")
114
- if scenario in scenarios and year == 2025:
143
+ def config_financial(scenario: Scenario, year: Year) -> dict:
144
+ """Loads the financial configuration based on the ATB analysis.
145
+
146
+ Args:
147
+ scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
148
+ year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
149
+
150
+ Returns:
151
+ dict: Dictionary of ATB assumptions to be used for configuration PySAM.
152
+ """
153
+ if year is Year._2025:
115
154
  f = f"/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_{year}.json"
116
- i = Path("/projects/dwind/data/incentives/2025_incentives.json").resolve()
117
- with i.open("r") as i_in:
118
- incentives = json.load(i_in)
119
- elif scenario in scenarios and year in (2035, 2040):
155
+ i = Path("/projects/dwind/data/incentives/2025_incentives.pqt").resolve()
156
+ incentives = pd.read_parquet(i, dtype_backend="pyarrow")
157
+ elif year in (Year._2035, Year._2040):
120
158
  f = "/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_2035.json"
121
159
  else:
122
160
  # use old assumptions
@@ -125,6 +163,8 @@ def config_financial(scenario, year):
125
163
 
126
164
  with f.open("r") as f_in:
127
165
  financials = json.load(f_in)
166
+
167
+ # TODO: determine if shared settings is applicable going forward, or separate should be reserved
128
168
  if year == 2025:
129
169
  financials["BTM"]["itc_fraction_of_capex"] = incentives
130
170
  financials["FOM"]["itc_fraction_of_capex"] = incentives
File without changes
dwind/utils/array.py ADDED
@@ -0,0 +1,99 @@
1
+ """Provides a series of generic NumPy and Pandas utility functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+
9
+ def memory_downcaster(df: pd.DataFrame | pd.Series) -> pd.DataFrame | pd.Series:
10
+ """Downcasts ``int`` and ``float`` columns to the lowest memory alternative possible. For
11
+ integers this means converting to either signed or unsigned 8-, 16-, 32-, or 64-bit integers,
12
+ and for floats, converting to ``np.float32``.
13
+
14
+ Args:
15
+ df (pd.DataFrame | pd.Series): DataFrame or Series to have its memory footprint reduced.
16
+
17
+ Returns:
18
+ pd.DataFrame | pd.Series: Reduced footprint version of the passed :py:attr:`df`.
19
+ """
20
+ # if not isinstance(df, pd.DataFrame | pd.Series):
21
+ if not isinstance(df, (pd.DataFrame, pd.Series)): # noqa
22
+ raise TypeError("Input value must be a Pandas DataFrame or Series.")
23
+
24
+ NAlist = []
25
+ for col in df.select_dtypes(include=[np.number]).columns:
26
+ IsInt = False
27
+ mx = df[col].max()
28
+ mn = df[col].min()
29
+
30
+ # integer does not support na; fill na
31
+ if not np.isfinite(df[col]).all():
32
+ NAlist.append(col)
33
+ df[col].fillna(mn - 1, inplace=True)
34
+
35
+ # test if column can be converted to an integer
36
+ asint = df[col].fillna(0).astype(np.int64)
37
+ result = df[col] - asint
38
+ result = result.sum()
39
+ if result > -0.01 and result < 0.01:
40
+ IsInt = True
41
+
42
+ # make integer/unsigned integer datatypes
43
+ if IsInt:
44
+ try:
45
+ if mn >= 0:
46
+ if mx < 255:
47
+ df[col] = df[col].astype(np.uint8)
48
+ elif mx < 65535:
49
+ df[col] = df[col].astype(np.uint16)
50
+ elif mx < 4294967295:
51
+ df[col] = df[col].astype(np.uint32)
52
+ else:
53
+ df[col] = df[col].astype(np.uint64)
54
+ else:
55
+ if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
56
+ df[col] = df[col].astype(np.int8)
57
+ elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
58
+ df[col] = df[col].astype(np.int16)
59
+ elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
60
+ df[col] = df[col].astype(np.int32)
61
+ elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
62
+ df[col] = df[col].astype(np.int64)
63
+ except: # noqa: E722
64
+ df[col] = df[col].astype(np.float32)
65
+
66
+ # make float datatypes 32 bit
67
+ else:
68
+ df[col] = df[col].astype(np.float32)
69
+
70
+ return df
71
+
72
+
73
+ def split_by_index(
74
+ arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
75
+ ) -> tuple[np.ndarray, np.ndarray]:
76
+ """Split a DataFrame, Series, or array like with np.array_split, but only return the start and
77
+ stop indices, rather than chunks. For Pandas objects, this are equivalent to
78
+ ``arr.iloc[start: end]`` and for NumPy: ``arr[start: end]``. Splits are done according
79
+ to the 0th dimension.
80
+
81
+ Args:
82
+ arr(pd.DataFrame | pd.Series | np.ndarray): The array, data frame, or series to split.
83
+ n_splits(:obj:`int`): The number of near equal or equal splits.
84
+
85
+ Returns:
86
+ tuple[np.ndarray, np.ndarray]
87
+ """
88
+ size = arr.shape[0]
89
+ base = np.arange(n_splits)
90
+ split_size = size // n_splits
91
+ extra = size % n_splits
92
+
93
+ starts = base * split_size
94
+ ends = starts + split_size
95
+
96
+ for i in range(extra):
97
+ ends[i:] += 1
98
+ starts[i + 1 :] += 1
99
+ return starts, ends
dwind/utils/hpc.py ADDED
@@ -0,0 +1,138 @@
1
+ """Provides the live timing table functionalities for the Kestrel :py:class:`MultiProcess` class."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import io
6
+ import re
7
+ import time
8
+ import subprocess
9
+ from copy import deepcopy
10
+
11
+ import pandas as pd
12
+ from rich.table import Table
13
+ from rex.utilities.hpc import SLURM
14
+
15
+
16
+ def convert_seconds_for_print(time: float) -> str:
17
+ """Convert number of seconds to number of hours, minutes, and seconds."""
18
+ div = ((60, "seconds"), (60, "minutes"), (24, "hours"))
19
+
20
+ result = []
21
+ value = time
22
+ for divisor, label in div:
23
+ if not divisor:
24
+ remainder = value
25
+ if not remainder:
26
+ break
27
+ else:
28
+ value, remainder = divmod(value, divisor)
29
+ if not value and not remainder:
30
+ break
31
+ if remainder == 1:
32
+ label = label[:-1]
33
+
34
+ # 0.2 second precision for seconds, and no decimals otherwise
35
+ if result:
36
+ result.append(f"{remainder:,.0f} {label}")
37
+ else:
38
+ result.append(f"{remainder:.1f} {label}")
39
+ if result:
40
+ return ", ".join(reversed(result))
41
+ return "0"
42
+
43
+
44
+ def update_status(job_status: dict) -> dict:
45
+ """Get an updated status and timing statistics for all running jobs on the HPC.
46
+
47
+ Args:
48
+ job_status (dict): Dictionary of job id (primary key) with sub keys of "status",
49
+ "start_time" (initial or start of run status), "wait", and "run".
50
+
51
+ Returns:
52
+ dict: Dictionary of updated statuses and timing statistics for all current queued and
53
+ running jobs.
54
+ """
55
+ slurm = SLURM()
56
+ update = {}
57
+ for job, vals in job_status.items():
58
+ original_status = vals["status"]
59
+ if original_status in ("CG", "CF", "None", None):
60
+ continue
61
+ new_status = slurm.check_status(job_id=job)
62
+ if new_status == "PD":
63
+ update[job] = vals | {"status": new_status, "wait": time.perf_counter() - vals["start"]}
64
+ elif new_status == "R":
65
+ if original_status != "R":
66
+ update[job] = vals | {
67
+ "status": new_status,
68
+ "wait": time.perf_counter() - vals["start"],
69
+ "start": time.perf_counter(),
70
+ }
71
+ else:
72
+ update[job] = vals | {"run": time.perf_counter() - vals["start"]}
73
+ elif new_status in ("CG", "CF", "None", None):
74
+ update[job] = vals | {"status": new_status, "run": time.perf_counter() - vals["start"]}
75
+ else:
76
+ raise ValueError(f"Unaccounted for status code: {new_status}")
77
+ return update
78
+
79
+
80
+ def generate_run_status_table(job_status: dict) -> tuple[Table, bool]:
81
+ """Generate the job status run time statistics table.
82
+
83
+ Args:
84
+ job_status (dict): Dictionary of job id (primary key) with sub keys of "status",
85
+ "start_time" (initial or start of run status), "wait", and "run".
86
+
87
+ Returns:
88
+ Table: ``rich.Table`` of human readable statistics.
89
+ bool: True if all jobs are complete, otherwise False.
90
+ """
91
+ table = Table()
92
+ table.add_column("Job ID")
93
+ table.add_column("Status")
94
+ table.add_column("Wait time")
95
+ table.add_column("Run time")
96
+
97
+ for job, vals in job_status.items():
98
+ status = vals["status"]
99
+ _wait = vals["wait"]
100
+ _run = vals["run"]
101
+ table.add_row(
102
+ job, status, convert_seconds_for_print(_wait), convert_seconds_for_print(_run)
103
+ )
104
+ done = all(el["status"] in ("CG", "CF", None) for el in job_status.values())
105
+ return table, done
106
+
107
+
108
+ def get_finished_run_status(jobs: int | str | list[int | str]) -> dict[str, str]:
109
+ """Extracts a dictionary of job_id and status from the ``sacct`` output for a single
110
+ job or series of jobs.
111
+
112
+ Args:
113
+ jobs (int | str | list[int | str]): Single job ID or list of job IDs that have finished
114
+ running.
115
+
116
+ Returns:
117
+ dict[str, str]: Dictionary of {job_id_1: status_1, ..., job_id_N: status_N}.
118
+ """
119
+ if isinstance(jobs, (int, str)): # noqa
120
+ jobs = [jobs]
121
+ jobs = [str(j) for j in jobs]
122
+
123
+ # Format the command to be in the form of [sacct, -j, job_id_1, ..., -j job_id_N]
124
+ command = deepcopy(jobs)
125
+ for i in range(len(command) - 1, -1, -1):
126
+ command.insert(i, "-j")
127
+ command.insert(0, "sacct")
128
+ results = subprocess.check_output(command)
129
+
130
+ # Convert the sacct string output to be table-like
131
+ buffer = io.StringIO(results.decode("utf8", "ignore"))
132
+ lines = [re.split(" +", line) for line in buffer.readlines() if not line.startswith("-")]
133
+
134
+ # Create a dataframe, and export a dictionary of the form job_id: job_status
135
+ df = pd.DataFrame(lines[1:], columns=lines[0])
136
+ df = df.loc[df.JobID.isin(jobs), ["JobID", "State"]]
137
+ df.JobID = df.JobID.astype(int)
138
+ return dict(df.values.tolist())
dwind/utils/loader.py ADDED
@@ -0,0 +1,63 @@
1
+ """Provides the core data loading methods for importing scenario data from flat files or SQL."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import pandas as pd
8
+ from sqlalchemy import create_engine
9
+
10
+ from dwind.config import Year
11
+
12
+
13
+ def load_df(file_or_table: str | Path, year: Year | None, sql_constructor: str | None = None):
14
+ """Loads data from either a SQL table or file to a pandas ``DataFrame``.
15
+
16
+ Args:
17
+ file_or_table (str | Path): File name or path object, or SQL table where the data are
18
+ located.
19
+ year (:py:class:`dwind.config.Year`, optional): If used, only extracts the single year from
20
+ a column called "year". Defaults to None.
21
+ sql_constructor (str | None, optional): The SQL engine constructor string. Required if
22
+ extracting from SQL. Defaults to None.
23
+ """
24
+ valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
25
+ if str(file_or_table).endswith(valid_extenstions):
26
+ return _load_from_file(filename=file_or_table, year=year)
27
+
28
+ return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
29
+
30
+
31
+ def _load_from_file(filename: str | Path, year: Year | None) -> pd.DataFrame:
32
+ """Loads tabular data from a file to a ``pandas.DataFrame``."""
33
+ if isinstance(filename, str):
34
+ filename = Path(filename).resolve()
35
+ if not isinstance(filename, Path):
36
+ raise TypeError(f"`filename` must be a valid path, not {filename=}")
37
+
38
+ if filename.suffix == ".csv":
39
+ df = pd.read_csv(filename, dtype_backend="pyarrow")
40
+ elif filename.suffix in (".parquet", ".pqt"):
41
+ df = pd.read_parquet(filename, dtype_backend="pyarrow")
42
+ elif filename.suffix in (".pickle", ".pkl"):
43
+ df = pd.read_pickle(filename, dtype_backend="pyarrow")
44
+ else:
45
+ raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
46
+
47
+ if year is not None:
48
+ df = df.loc[df.year == year]
49
+
50
+ return df
51
+
52
+
53
+ def _load_from_sql(table: str, sql_constructor: str, year: Year | None) -> pd.DataFrame:
54
+ """Load tabular data from SQL."""
55
+ where = f"where year = {year}" if year is not None else ""
56
+ sql = f"""select * from diffusion_shared."{table}" {where};"""
57
+ atlas_engine = create_engine(sql_constructor)
58
+
59
+ with atlas_engine.connect() as conn:
60
+ df = pd.read_sql(sql, con=conn.connection, dtype_backend="pyarrow")
61
+
62
+ atlas_engine.dispose()
63
+ return df
@@ -0,0 +1,60 @@
1
+ import numpy as np
2
+ from joblib import Parallel, delayed
3
+ from threading import Thread
4
+ from rich.progress import Progress, BarColumn, TimeRemainingColumn, TextColumn
5
+ from rich.console import Console
6
+ from rich.live import Live
7
+ import time
8
+
9
+ # Define the number of tasks and create a shared memory numpy array to hold their progress
10
+ num_tasks = 4
11
+ progress_array = np.memmap("progress.mmap2", dtype=np.float32, mode="w+", shape=N)
12
+
13
+ # Define a function that performs a task and updates the progress array
14
+ def perform_task(task_idx, progress_array):
15
+ for i in range(100):
16
+ # Do some work here
17
+ # ...
18
+
19
+ # Update the progress array
20
+ time.sleep(0.1)
21
+ progress_array[task_idx] = i / 100
22
+
23
+ # Update the progress array to 100% on completion
24
+ progress_array[task_idx] = 1
25
+
26
+ # Define a function to continuously update the Rich progress bar
27
+ def update_progress_bar(
28
+ progress_array=progress_array,
29
+ num_tasks=num_tasks,
30
+ ):
31
+ with Progress(
32
+ TextColumn("[bold blue]{task.fields[name]}"),
33
+ BarColumn(),
34
+ TextColumn("[bold green]{task.fields[status]}"),
35
+ TimeRemainingColumn(),
36
+ # console=console,
37
+ ) as progress:
38
+ tasks = [
39
+ progress.add_task(
40
+ description=f"Task {i}",
41
+ name=f"Task {i}",
42
+ status="pending",
43
+ total=100,
44
+ )
45
+ for i in range(num_tasks)
46
+ ]
47
+
48
+ while not all(progress_array == 1):
49
+ for i, task in enumerate(tasks):
50
+ progress.update(task, completed=int(progress_array[i] * 100))
51
+ time.sleep(0.1 * 2 ** abs(*np.random.randn(1)))
52
+
53
+
54
+ # Launch the progress bar update function in a separate thread
55
+ Thread(target=update_progress_bar, args=[progress_array, num_tasks]).start()
56
+
57
+ # Launch the tasks in parallel using joblib and the perform_task function
58
+ Parallel(n_jobs=-2, backend="loky")(
59
+ delayed(perform_task)(i, progress_array) for i in range(num_tasks)
60
+ )