dwind 0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dwind/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ from .config import Configuration
2
+
3
+ __version__ = "0.3"
dwind/btm_sizing.py ADDED
@@ -0,0 +1,129 @@
1
+ """Provides the behind the meter sizing methods."""
2
+
3
+ import logging
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from dwind import Configuration, helper
9
+
10
+
11
+ log = logging.getLogger("dwfs")
12
+
13
+
14
+ def sizer(agents: pd.DataFrame, config: Configuration):
15
+ """Finalize system sizes.
16
+ Calculates BTM system sizes.
17
+ Evaluates wind first to downselect to a single row.
18
+ """
19
+
20
+ def find_optimal_size(row):
21
+ techpot = row["wind_size_kw_fom"]
22
+ height = row["turbine_height_m"]
23
+ load_kwh = row["load_kwh"]
24
+ naep = row["wind_naep"]
25
+ instances = row["turbine_instances"]
26
+
27
+ # set the target kwh according to NEM availability
28
+ target_kwh = load_kwh * config.btm.SYS_SIZE_TARGET_NO_NEM
29
+ # also set the oversize limit according to NEM availability
30
+ oversize_limit_kwh = load_kwh * config.btm.SYS_OVERSIZE_LIMIT_NO_NEM
31
+
32
+ # save techpot system sizes - equal to FOM size for wind
33
+ # NOTE: techpot sizes are NOT constrained by config-supplied system size limits
34
+ row["wind_size_kw_techpot"] = techpot
35
+
36
+ # find the optimal btm system size according to 'wind_size_kw_techpot'
37
+ sizes = [2.5, 5, 10, 20, 50, 100, 250, 500, 750, 1000, 1500]
38
+
39
+ aep_btm = {}
40
+ scoe_btm = {}
41
+
42
+ for size in sizes:
43
+ if size <= techpot:
44
+ # calculate the system generation from naep and kw size
45
+ aep = size * naep
46
+
47
+ # calculate scoe
48
+ scoe = np.absolute(aep - target_kwh)
49
+
50
+ aep_btm[size] = aep
51
+ scoe_btm[size] = scoe
52
+
53
+ # handle special cases
54
+ # buildings requiring more electricity than can be generated by the largest turbine
55
+ # (1.5 MW) return very low rank score and the optimal continuous number of turbines
56
+ if size == 1500 and aep < target_kwh:
57
+ scoe_btm[size] = 0.0
58
+
59
+ # identify oversized projects and zero production turbines
60
+ # where either condition is true, set a high score and zero turbines
61
+ if aep > oversize_limit_kwh or aep == 0:
62
+ scoe_btm[size] = np.array([1e8]) + size * 100 + height
63
+
64
+ # for each agent, find the optimal turbine
65
+ optimal_size = min(scoe_btm, key=scoe_btm.get)
66
+ optimal_aep = aep_btm[optimal_size]
67
+
68
+ # handle BTM max system sizes
69
+ max_size = config.siting.wind.max_btm_size_kw
70
+ if optimal_size > max_size:
71
+ optimal_size = max_size
72
+ optimal_aep = naep * optimal_size
73
+
74
+ row["wind_size_kw_btm"] = optimal_size * instances
75
+ row["wind_turbine_kw_btm"] = optimal_size
76
+ row["wind_aep_btm"] = optimal_aep
77
+
78
+ return row
79
+
80
+ try:
81
+ if "wind" in config.project.settings.TECHS:
82
+ agents = agents.apply(find_optimal_size, axis=1)
83
+
84
+ # drop columns to avoid duplicates in full dataframe
85
+ # and get rid of unnecessary/intermediate columns
86
+ agents.drop(columns=["wind_turbine_kw", "wind_size_kw"], inplace=True, errors="ignore")
87
+ agents.drop_duplicates(subset=["gid"], inplace=True)
88
+
89
+ if "solar" in config.project.settings.TECHS:
90
+ # max system size is either load-limited or roof-limited
91
+ # (already calculated as 'solar_size_kw')
92
+ solar_map = config.siting.solar.capacity_density_kw_per_sqft
93
+ agents["pv_kw_per_sqft"] = agents["sector_abbr"].map(solar_map)
94
+ agents["solar_size_kw_btm"] = np.minimum(
95
+ agents["load_kwh"] / agents["solar_naep"],
96
+ agents["developable_roof_sqft"] * agents["pv_kw_per_sqft"],
97
+ )
98
+ agents["solar_aep_btm"] = agents["solar_size_kw_btm"] * agents["solar_naep"]
99
+
100
+ # handle BTM max system sizes
101
+ max_size = config.siting.solar.max_btm_size_kw
102
+ solar_size_limit_mask = agents["solar_size_kw_btm"] > max_size
103
+ agents.loc[solar_size_limit_mask, "solar_size_kw_btm"] = max_size
104
+ agents.loc[solar_size_limit_mask, "solar_aep_btm"] = (
105
+ agents["solar_naep"] * agents["solar_size_kw_btm"]
106
+ )
107
+
108
+ # save techpot system sizes - equal to roof-constrained size for solar
109
+ # NOTE: techpot sizes are NOT constrained by config-supplied system size limits
110
+ agents["solar_size_kw_techpot"] = (
111
+ agents["developable_roof_sqft"] * agents["pv_kw_per_sqft"]
112
+ )
113
+
114
+ # drop columns to avoid duplicates in full dataframe
115
+ # and get rid of unnecessary/intermediate columns
116
+ agents.drop(columns=["pv_kw_per_sqft"], inplace=True, errors="ignore")
117
+ agents.drop_duplicates(subset=["gid"], inplace=True)
118
+
119
+ # make small
120
+ agents = helper.memory_downcaster(agents)
121
+
122
+ return agents
123
+
124
+ except Exception as e:
125
+ log.exception(e)
126
+ log.info("\n")
127
+ log.info("BTM sizing failed")
128
+
129
+ return pd.DataFrame()
dwind/config.py ADDED
@@ -0,0 +1,118 @@
1
+ """Custom configuration data class to allow for dictionary style and dot notation calling of
2
+ attributes.
3
+ """
4
+
5
+ import re
6
+ import tomllib
7
+ from pathlib import Path
8
+
9
+
10
+ class Mapping(dict):
11
+ """Dict-like class that allows for the use of dictionary style attribute calls on class
12
+ attributes.
13
+ """
14
+
15
+ def __setitem__(self, key, item):
16
+ self.__dict__[key] = item
17
+
18
+ def __getitem__(self, key):
19
+ return self.__dict__[key]
20
+
21
+ def __repr__(self):
22
+ return repr(self.__dict__)
23
+
24
+ def __len__(self):
25
+ return len(self.__dict__)
26
+
27
+ def __delitem__(self, key):
28
+ del self.__dict__[key]
29
+
30
+ def clear(self):
31
+ return self.__dict__.clear()
32
+
33
+ def copy(self):
34
+ return self.__dict__.copy()
35
+
36
+ def update(self, *args, **kwargs):
37
+ return self.__dict__.update(*args, **kwargs)
38
+
39
+ def keys(self):
40
+ return self.__dict__.keys()
41
+
42
+ def values(self):
43
+ return self.__dict__.values()
44
+
45
+ def items(self):
46
+ return self.__dict__.items()
47
+
48
+ def pop(self, *args):
49
+ return self.__dict__.pop(*args)
50
+
51
+ def __cmp__(self, dict_):
52
+ return self.__cmp__(self.__dict__, dict_)
53
+
54
+ def __contains__(self, item):
55
+ return item in self.__dict__
56
+
57
+ def __iter__(self):
58
+ return iter(self.__dict__)
59
+
60
+
61
+ class Configuration(Mapping):
62
+ """Configuration class for reading and converting nested dictionaries to allow for both
63
+ namespace style and dot notation when collecting attributes.
64
+
65
+ Customizations of the input data:
66
+ - All fields containing "DIR" will be converted to a ``pathlib.Path`` object.
67
+ - All nested data will be able to be called with dot notation and dictionary-style calls.
68
+ - The `rev.turbine_class_dict` is converted to float data automatically.
69
+ - All data in the `[sql]` section will get converted to proper constructor strings with the
70
+ associated username and password data autopopulated with the match ``{USER}`` and
71
+ ``PASSWORD`` fields in the same configuration section.
72
+ """
73
+
74
+ def __init__(self, config: str | Path | dict, *, initial: bool = True):
75
+ """Create a hybrid dictionary and name space object for a given :py:attr:`config` and
76
+ where all keys (including nested) are acessible with dictionary-style and dot notation.
77
+
78
+ Args:
79
+ config (str | Path | dict): A configuration dictionary or filename to the dictionary
80
+ to read and convert. If passing a filename, it must be a TOML file.
81
+ initial (bool, optional): Option to disable post-processing of configuration data.
82
+ """
83
+ if isinstance(config, str | Path):
84
+ config = Path(config).resolve()
85
+ with config.open("rb") as f:
86
+ config = tomllib.load(f)
87
+
88
+ for key, value in config.items():
89
+ if isinstance(value, dict):
90
+ self.__setattr__(key, Configuration(value, initial=False))
91
+ else:
92
+ if "DIR" in key:
93
+ self.__setattr__(key, Path(value).resolve())
94
+ else:
95
+ self.__setattr__(key, value)
96
+
97
+ if initial:
98
+ self._convert_sql()
99
+ self._convert_rev()
100
+
101
+ def _convert_sql(self):
102
+ """Replaces the "{USER}" and "{PASSWORD} portions of the sql constructor strings with
103
+ the actual user and password information for ease of configuration reuse between users.
104
+ """
105
+ if "sql" in self:
106
+ for key, value in self.sql.items():
107
+ if key.startswith(("USER", "PASSWORD")):
108
+ continue
109
+ for target in re.findall(r"\{(.*?)\}", value):
110
+ value = value.replace(target, self.sql[target])
111
+ value = re.sub("[{}]", "", value)
112
+ self.sql[key] = value
113
+
114
+ def _convert_rev(self):
115
+ if "rev" in self:
116
+ self.rev.turbine_class_dict = {
117
+ float(k): v for k, v in self.rev.turbine_class_dict.items()
118
+ }
dwind/helper.py ADDED
@@ -0,0 +1,172 @@
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ def memory_downcaster(df):
8
+ assert isinstance(df, pd.DataFrame) | isinstance(df, pd.Series)
9
+
10
+ NAlist = []
11
+ for col in df.select_dtypes(include=[np.number]).columns:
12
+ IsInt = False
13
+ mx = df[col].max()
14
+ mn = df[col].min()
15
+
16
+ # integer does not support na; fill na
17
+ if not np.isfinite(df[col]).all():
18
+ NAlist.append(col)
19
+ df[col].fillna(mn - 1, inplace=True)
20
+
21
+ # test if column can be converted to an integer
22
+ asint = df[col].fillna(0).astype(np.int64)
23
+ result = df[col] - asint
24
+ result = result.sum()
25
+ if result > -0.01 and result < 0.01:
26
+ IsInt = True
27
+
28
+ # make integer/unsigned integer datatypes
29
+ if IsInt:
30
+ try:
31
+ if mn >= 0:
32
+ if mx < 255:
33
+ df[col] = df[col].astype(np.uint8)
34
+ elif mx < 65535:
35
+ df[col] = df[col].astype(np.uint16)
36
+ elif mx < 4294967295:
37
+ df[col] = df[col].astype(np.uint32)
38
+ else:
39
+ df[col] = df[col].astype(np.uint64)
40
+ else:
41
+ if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
42
+ df[col] = df[col].astype(np.int8)
43
+ elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
44
+ df[col] = df[col].astype(np.int16)
45
+ elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
46
+ df[col] = df[col].astype(np.int32)
47
+ elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
48
+ df[col] = df[col].astype(np.int64)
49
+ except: # noqa: E722
50
+ df[col] = df[col].astype(np.float32)
51
+
52
+ # make float datatypes 32 bit
53
+ else:
54
+ df[col] = df[col].astype(np.float32)
55
+
56
+ return df
57
+
58
+
59
+ def interpolate_array(row, col_1, col_2, col_in, col_out):
60
+ if row[col_in] != 0:
61
+ interpolated = row[col_in] * (row[col_2] - row[col_1]) + row[col_1]
62
+ else:
63
+ interpolated = row[col_1]
64
+
65
+ row[col_out] = interpolated
66
+
67
+ return row
68
+
69
+
70
+ def scale_array_precision(df: pd.DataFrame, hourly_col: str, prec_offset_col: str):
71
+ """Scales the precision of :py:attr:`hourly_col` by the :py:attr:`prec_offset_col`.
72
+
73
+ Args:
74
+ df (pd.DataFrame): A Pandas DataFrame containing :py:att:`hourly_col` and
75
+ :py:att:`prec_offset_col`.
76
+ hourly_col (str) The column to adjust the precision.
77
+ prec_offset_col (str): The column for scaling the precison of :py:attr:`hourly_col`.
78
+
79
+ Returns:
80
+ pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`hourly_col` scaled.
81
+ """
82
+ df[hourly_col] = (
83
+ np.array(df[hourly_col].values.tolist(), dtype="float64")
84
+ / df[prec_offset_col].values.reshape(-1, 1)
85
+ ).tolist()
86
+ return df
87
+
88
+
89
+ def scale_array_deprecision(df: pd.DataFrame, col: str | list[str]) -> pd.DataFrame:
90
+ """Rounds the column(s) :py:attr:`col` to the nearest 2nd decimal and converts to NumPy's
91
+ float32.
92
+
93
+ Args:
94
+ df (pd.DataFrame): A Pandas DataFrame containing :py:att:`col`.
95
+ col (str | list[str]): The column(s) to have reduced precision.
96
+
97
+ Returns:
98
+ pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`col` lowered.
99
+ """
100
+ df[col] = np.round(np.round(df[col], 2).astype(np.float32), 2)
101
+ return df
102
+
103
+
104
+ def scale_array_sum(df: pd.DataFrame, hourly_col: str, scale_col: str) -> pd.DataFrame:
105
+ """Scales the :py:attr:`hourly_col` by its sum and multiples by the :py:attr:`scale_col`.
106
+
107
+ Args:
108
+ df (pd.DataFrame): Pandas DataFrame containing the :py:attr:`hourly_col` and
109
+ :py:attr:`scale_col`.
110
+ hourly_col (str): The name of the column to be scaled whose values are lists.
111
+ scale_col (str): The column to scale the :py:attr:`hourly_col`.
112
+
113
+ Returns:
114
+ pandas.DataFrame: The input dataframe, but with the values of the :py:attr:`hourly_col`
115
+ scaled appropriately.
116
+ """
117
+ hourly_array = np.array(df[hourly_col].values.tolist())
118
+ df[hourly_col] = (
119
+ hourly_array / hourly_array.sum(axis=1).reshape(-1, 1) * df[scale_col].values.reshape(-1, 1)
120
+ ).tolist()
121
+ return df
122
+
123
+
124
+ def scale_array_multiplier(
125
+ df: pd.DataFrame, hourly_col: str, multiplier_col: str, col_out: str
126
+ ) -> pd.DataFrame:
127
+ """Scales the :py:attr:hourly_col` values by the :py:attr:`multiplier_col`, and places it in
128
+ the :py:attr:`col_out`.
129
+
130
+ Args:
131
+ df (pd.DataFrame): The Pandas DataFrame containing the :py:attr:`hourly_col` and
132
+ :py:attr:`multiplier_col`.
133
+ hourly_col (str): A column of hourly values as a list of floats in each cell.
134
+ multiplier_col (str): The column used to scale the :py:attr:`hourly_col`.
135
+ col_out (str): A new column that will contain the scaled data.
136
+
137
+ Returns:
138
+ pd.DataFrame: A new copy of the original data (:py:attr:`df`) containing the
139
+ :py:attr:`col_out` column.
140
+ """
141
+ hourly_array = np.array(df[hourly_col].values.tolist())
142
+ df[col_out] = (hourly_array * df[multiplier_col].values.reshape(-1, 1)).tolist()
143
+ return df
144
+
145
+
146
+ def split_by_index(
147
+ arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
148
+ ) -> tuple[np.ndarray, np.ndarray]:
149
+ """Split a DataFrame, Series, or array like with np.array_split, but only return the start and
150
+ stop indices, rather than chunks. For Pandas objects, this are equivalent to
151
+ ``arr.iloc[start: end]`` and for NumPy: ``arr[start: end]``. Splits are done according
152
+ to the 0th dimension.
153
+
154
+ Args:
155
+ arr(pd.DataFrame | pd.Series | np.ndarray): The array, data frame, or series to split.
156
+ n_splits(:obj:`int`): The number of near equal or equal splits.
157
+
158
+ Returns:
159
+ tuple[np.ndarray, np.ndarray]
160
+ """
161
+ size = arr.shape[0]
162
+ base = np.arange(n_splits)
163
+ split_size = size // n_splits
164
+ extra = size % n_splits
165
+
166
+ starts = base * split_size
167
+ ends = starts + split_size
168
+
169
+ for i in range(extra):
170
+ ends[i:] += 1
171
+ starts[i + 1 :] += 1
172
+ return starts, ends
dwind/loader.py ADDED
@@ -0,0 +1,59 @@
1
+ from pathlib import Path
2
+
3
+ import pandas as pd
4
+ from sqlalchemy import create_engine
5
+
6
+
7
+ def load_df(file_or_table: str | Path, year: str | None = None, sql_constructor: str | None = None):
8
+ """Loads data from either a SQL table or file to a pandas ``DataFrame``.
9
+
10
+ Args:
11
+ file_or_table (str | Path): File name or path object, or SQL table where the data are
12
+ located.
13
+ year (int | None, optional): If used, only extracts the single year from a column called
14
+ "year". Defaults to None.
15
+ sql_constructor (str | None, optional): The SQL engine constructor string. Required if
16
+ extracting from SQL. Defaults to None.
17
+ """
18
+ valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
19
+ if str(file_or_table).endswith(valid_extenstions):
20
+ return _load_from_file(filename=file_or_table, year=year)
21
+
22
+ return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
23
+
24
+
25
+ def _load_from_file(filename: str | Path, year: str | int | None) -> pd.DataFrame:
26
+ """Loads tabular data from a file to a ``pandas.DataFrame``."""
27
+ if isinstance(filename, str):
28
+ filename = Path(filename).resolve()
29
+ if not isinstance(filename, Path):
30
+ raise TypeError(f"`filename` must be a valid path, not {filename=}")
31
+
32
+ if filename.suffix == ".csv":
33
+ df = pd.read_csv(filename)
34
+ elif filename.suffix in (".parquet", ".pqt"):
35
+ df = pd.read_parquet(filename)
36
+ elif filename.suffix in (".pickle", ".pkl"):
37
+ df = pd.read_pickle(filename)
38
+ else:
39
+ raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
40
+
41
+ if year is not None:
42
+ year = int(year)
43
+ df = df.loc[df.year == year]
44
+
45
+ return df
46
+
47
+
48
+ def _load_from_sql(table: str, sql_constructor: str, year: str | int | None) -> pd.DataFrame:
49
+ """Load tabular data from SQL."""
50
+ if year is not None:
51
+ year = int(year)
52
+ where = f"where year = {year}" if year is not None else ""
53
+ sql = f"""select * from diffusion_shared."{table}" {where};"""
54
+ atlas_engine = create_engine(sql_constructor)
55
+
56
+ with atlas_engine.connect() as conn:
57
+ pd.read_sql(sql, con=conn.connection)
58
+
59
+ atlas_engine.dispose()