dwind 0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwind/__init__.py +3 -0
- dwind/btm_sizing.py +129 -0
- dwind/config.py +118 -0
- dwind/helper.py +172 -0
- dwind/loader.py +59 -0
- dwind/model.py +371 -0
- dwind/mp.py +225 -0
- dwind/resource.py +166 -0
- dwind/run.py +288 -0
- dwind/scenarios.py +139 -0
- dwind/valuation.py +1562 -0
- dwind-0.3.dist-info/METADATA +168 -0
- dwind-0.3.dist-info/RECORD +17 -0
- dwind-0.3.dist-info/WHEEL +5 -0
- dwind-0.3.dist-info/entry_points.txt +2 -0
- dwind-0.3.dist-info/licenses/LICENSE.txt +29 -0
- dwind-0.3.dist-info/top_level.txt +1 -0
dwind/__init__.py
ADDED
dwind/btm_sizing.py
ADDED
@@ -0,0 +1,129 @@
|
|
1
|
+
"""Provides the behind the meter sizing methods."""
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
from dwind import Configuration, helper
|
9
|
+
|
10
|
+
|
11
|
+
log = logging.getLogger("dwfs")
|
12
|
+
|
13
|
+
|
14
|
+
def sizer(agents: pd.DataFrame, config: Configuration):
|
15
|
+
"""Finalize system sizes.
|
16
|
+
Calculates BTM system sizes.
|
17
|
+
Evaluates wind first to downselect to a single row.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def find_optimal_size(row):
|
21
|
+
techpot = row["wind_size_kw_fom"]
|
22
|
+
height = row["turbine_height_m"]
|
23
|
+
load_kwh = row["load_kwh"]
|
24
|
+
naep = row["wind_naep"]
|
25
|
+
instances = row["turbine_instances"]
|
26
|
+
|
27
|
+
# set the target kwh according to NEM availability
|
28
|
+
target_kwh = load_kwh * config.btm.SYS_SIZE_TARGET_NO_NEM
|
29
|
+
# also set the oversize limit according to NEM availability
|
30
|
+
oversize_limit_kwh = load_kwh * config.btm.SYS_OVERSIZE_LIMIT_NO_NEM
|
31
|
+
|
32
|
+
# save techpot system sizes - equal to FOM size for wind
|
33
|
+
# NOTE: techpot sizes are NOT constrained by config-supplied system size limits
|
34
|
+
row["wind_size_kw_techpot"] = techpot
|
35
|
+
|
36
|
+
# find the optimal btm system size according to 'wind_size_kw_techpot'
|
37
|
+
sizes = [2.5, 5, 10, 20, 50, 100, 250, 500, 750, 1000, 1500]
|
38
|
+
|
39
|
+
aep_btm = {}
|
40
|
+
scoe_btm = {}
|
41
|
+
|
42
|
+
for size in sizes:
|
43
|
+
if size <= techpot:
|
44
|
+
# calculate the system generation from naep and kw size
|
45
|
+
aep = size * naep
|
46
|
+
|
47
|
+
# calculate scoe
|
48
|
+
scoe = np.absolute(aep - target_kwh)
|
49
|
+
|
50
|
+
aep_btm[size] = aep
|
51
|
+
scoe_btm[size] = scoe
|
52
|
+
|
53
|
+
# handle special cases
|
54
|
+
# buildings requiring more electricity than can be generated by the largest turbine
|
55
|
+
# (1.5 MW) return very low rank score and the optimal continuous number of turbines
|
56
|
+
if size == 1500 and aep < target_kwh:
|
57
|
+
scoe_btm[size] = 0.0
|
58
|
+
|
59
|
+
# identify oversized projects and zero production turbines
|
60
|
+
# where either condition is true, set a high score and zero turbines
|
61
|
+
if aep > oversize_limit_kwh or aep == 0:
|
62
|
+
scoe_btm[size] = np.array([1e8]) + size * 100 + height
|
63
|
+
|
64
|
+
# for each agent, find the optimal turbine
|
65
|
+
optimal_size = min(scoe_btm, key=scoe_btm.get)
|
66
|
+
optimal_aep = aep_btm[optimal_size]
|
67
|
+
|
68
|
+
# handle BTM max system sizes
|
69
|
+
max_size = config.siting.wind.max_btm_size_kw
|
70
|
+
if optimal_size > max_size:
|
71
|
+
optimal_size = max_size
|
72
|
+
optimal_aep = naep * optimal_size
|
73
|
+
|
74
|
+
row["wind_size_kw_btm"] = optimal_size * instances
|
75
|
+
row["wind_turbine_kw_btm"] = optimal_size
|
76
|
+
row["wind_aep_btm"] = optimal_aep
|
77
|
+
|
78
|
+
return row
|
79
|
+
|
80
|
+
try:
|
81
|
+
if "wind" in config.project.settings.TECHS:
|
82
|
+
agents = agents.apply(find_optimal_size, axis=1)
|
83
|
+
|
84
|
+
# drop columns to avoid duplicates in full dataframe
|
85
|
+
# and get rid of unnecessary/intermediate columns
|
86
|
+
agents.drop(columns=["wind_turbine_kw", "wind_size_kw"], inplace=True, errors="ignore")
|
87
|
+
agents.drop_duplicates(subset=["gid"], inplace=True)
|
88
|
+
|
89
|
+
if "solar" in config.project.settings.TECHS:
|
90
|
+
# max system size is either load-limited or roof-limited
|
91
|
+
# (already calculated as 'solar_size_kw')
|
92
|
+
solar_map = config.siting.solar.capacity_density_kw_per_sqft
|
93
|
+
agents["pv_kw_per_sqft"] = agents["sector_abbr"].map(solar_map)
|
94
|
+
agents["solar_size_kw_btm"] = np.minimum(
|
95
|
+
agents["load_kwh"] / agents["solar_naep"],
|
96
|
+
agents["developable_roof_sqft"] * agents["pv_kw_per_sqft"],
|
97
|
+
)
|
98
|
+
agents["solar_aep_btm"] = agents["solar_size_kw_btm"] * agents["solar_naep"]
|
99
|
+
|
100
|
+
# handle BTM max system sizes
|
101
|
+
max_size = config.siting.solar.max_btm_size_kw
|
102
|
+
solar_size_limit_mask = agents["solar_size_kw_btm"] > max_size
|
103
|
+
agents.loc[solar_size_limit_mask, "solar_size_kw_btm"] = max_size
|
104
|
+
agents.loc[solar_size_limit_mask, "solar_aep_btm"] = (
|
105
|
+
agents["solar_naep"] * agents["solar_size_kw_btm"]
|
106
|
+
)
|
107
|
+
|
108
|
+
# save techpot system sizes - equal to roof-constrained size for solar
|
109
|
+
# NOTE: techpot sizes are NOT constrained by config-supplied system size limits
|
110
|
+
agents["solar_size_kw_techpot"] = (
|
111
|
+
agents["developable_roof_sqft"] * agents["pv_kw_per_sqft"]
|
112
|
+
)
|
113
|
+
|
114
|
+
# drop columns to avoid duplicates in full dataframe
|
115
|
+
# and get rid of unnecessary/intermediate columns
|
116
|
+
agents.drop(columns=["pv_kw_per_sqft"], inplace=True, errors="ignore")
|
117
|
+
agents.drop_duplicates(subset=["gid"], inplace=True)
|
118
|
+
|
119
|
+
# make small
|
120
|
+
agents = helper.memory_downcaster(agents)
|
121
|
+
|
122
|
+
return agents
|
123
|
+
|
124
|
+
except Exception as e:
|
125
|
+
log.exception(e)
|
126
|
+
log.info("\n")
|
127
|
+
log.info("BTM sizing failed")
|
128
|
+
|
129
|
+
return pd.DataFrame()
|
dwind/config.py
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
"""Custom configuration data class to allow for dictionary style and dot notation calling of
|
2
|
+
attributes.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import re
|
6
|
+
import tomllib
|
7
|
+
from pathlib import Path
|
8
|
+
|
9
|
+
|
10
|
+
class Mapping(dict):
|
11
|
+
"""Dict-like class that allows for the use of dictionary style attribute calls on class
|
12
|
+
attributes.
|
13
|
+
"""
|
14
|
+
|
15
|
+
def __setitem__(self, key, item):
|
16
|
+
self.__dict__[key] = item
|
17
|
+
|
18
|
+
def __getitem__(self, key):
|
19
|
+
return self.__dict__[key]
|
20
|
+
|
21
|
+
def __repr__(self):
|
22
|
+
return repr(self.__dict__)
|
23
|
+
|
24
|
+
def __len__(self):
|
25
|
+
return len(self.__dict__)
|
26
|
+
|
27
|
+
def __delitem__(self, key):
|
28
|
+
del self.__dict__[key]
|
29
|
+
|
30
|
+
def clear(self):
|
31
|
+
return self.__dict__.clear()
|
32
|
+
|
33
|
+
def copy(self):
|
34
|
+
return self.__dict__.copy()
|
35
|
+
|
36
|
+
def update(self, *args, **kwargs):
|
37
|
+
return self.__dict__.update(*args, **kwargs)
|
38
|
+
|
39
|
+
def keys(self):
|
40
|
+
return self.__dict__.keys()
|
41
|
+
|
42
|
+
def values(self):
|
43
|
+
return self.__dict__.values()
|
44
|
+
|
45
|
+
def items(self):
|
46
|
+
return self.__dict__.items()
|
47
|
+
|
48
|
+
def pop(self, *args):
|
49
|
+
return self.__dict__.pop(*args)
|
50
|
+
|
51
|
+
def __cmp__(self, dict_):
|
52
|
+
return self.__cmp__(self.__dict__, dict_)
|
53
|
+
|
54
|
+
def __contains__(self, item):
|
55
|
+
return item in self.__dict__
|
56
|
+
|
57
|
+
def __iter__(self):
|
58
|
+
return iter(self.__dict__)
|
59
|
+
|
60
|
+
|
61
|
+
class Configuration(Mapping):
|
62
|
+
"""Configuration class for reading and converting nested dictionaries to allow for both
|
63
|
+
namespace style and dot notation when collecting attributes.
|
64
|
+
|
65
|
+
Customizations of the input data:
|
66
|
+
- All fields containing "DIR" will be converted to a ``pathlib.Path`` object.
|
67
|
+
- All nested data will be able to be called with dot notation and dictionary-style calls.
|
68
|
+
- The `rev.turbine_class_dict` is converted to float data automatically.
|
69
|
+
- All data in the `[sql]` section will get converted to proper constructor strings with the
|
70
|
+
associated username and password data autopopulated with the match ``{USER}`` and
|
71
|
+
``PASSWORD`` fields in the same configuration section.
|
72
|
+
"""
|
73
|
+
|
74
|
+
def __init__(self, config: str | Path | dict, *, initial: bool = True):
|
75
|
+
"""Create a hybrid dictionary and name space object for a given :py:attr:`config` and
|
76
|
+
where all keys (including nested) are acessible with dictionary-style and dot notation.
|
77
|
+
|
78
|
+
Args:
|
79
|
+
config (str | Path | dict): A configuration dictionary or filename to the dictionary
|
80
|
+
to read and convert. If passing a filename, it must be a TOML file.
|
81
|
+
initial (bool, optional): Option to disable post-processing of configuration data.
|
82
|
+
"""
|
83
|
+
if isinstance(config, str | Path):
|
84
|
+
config = Path(config).resolve()
|
85
|
+
with config.open("rb") as f:
|
86
|
+
config = tomllib.load(f)
|
87
|
+
|
88
|
+
for key, value in config.items():
|
89
|
+
if isinstance(value, dict):
|
90
|
+
self.__setattr__(key, Configuration(value, initial=False))
|
91
|
+
else:
|
92
|
+
if "DIR" in key:
|
93
|
+
self.__setattr__(key, Path(value).resolve())
|
94
|
+
else:
|
95
|
+
self.__setattr__(key, value)
|
96
|
+
|
97
|
+
if initial:
|
98
|
+
self._convert_sql()
|
99
|
+
self._convert_rev()
|
100
|
+
|
101
|
+
def _convert_sql(self):
|
102
|
+
"""Replaces the "{USER}" and "{PASSWORD} portions of the sql constructor strings with
|
103
|
+
the actual user and password information for ease of configuration reuse between users.
|
104
|
+
"""
|
105
|
+
if "sql" in self:
|
106
|
+
for key, value in self.sql.items():
|
107
|
+
if key.startswith(("USER", "PASSWORD")):
|
108
|
+
continue
|
109
|
+
for target in re.findall(r"\{(.*?)\}", value):
|
110
|
+
value = value.replace(target, self.sql[target])
|
111
|
+
value = re.sub("[{}]", "", value)
|
112
|
+
self.sql[key] = value
|
113
|
+
|
114
|
+
def _convert_rev(self):
|
115
|
+
if "rev" in self:
|
116
|
+
self.rev.turbine_class_dict = {
|
117
|
+
float(k): v for k, v in self.rev.turbine_class_dict.items()
|
118
|
+
}
|
dwind/helper.py
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
import pandas as pd
|
5
|
+
|
6
|
+
|
7
|
+
def memory_downcaster(df):
|
8
|
+
assert isinstance(df, pd.DataFrame) | isinstance(df, pd.Series)
|
9
|
+
|
10
|
+
NAlist = []
|
11
|
+
for col in df.select_dtypes(include=[np.number]).columns:
|
12
|
+
IsInt = False
|
13
|
+
mx = df[col].max()
|
14
|
+
mn = df[col].min()
|
15
|
+
|
16
|
+
# integer does not support na; fill na
|
17
|
+
if not np.isfinite(df[col]).all():
|
18
|
+
NAlist.append(col)
|
19
|
+
df[col].fillna(mn - 1, inplace=True)
|
20
|
+
|
21
|
+
# test if column can be converted to an integer
|
22
|
+
asint = df[col].fillna(0).astype(np.int64)
|
23
|
+
result = df[col] - asint
|
24
|
+
result = result.sum()
|
25
|
+
if result > -0.01 and result < 0.01:
|
26
|
+
IsInt = True
|
27
|
+
|
28
|
+
# make integer/unsigned integer datatypes
|
29
|
+
if IsInt:
|
30
|
+
try:
|
31
|
+
if mn >= 0:
|
32
|
+
if mx < 255:
|
33
|
+
df[col] = df[col].astype(np.uint8)
|
34
|
+
elif mx < 65535:
|
35
|
+
df[col] = df[col].astype(np.uint16)
|
36
|
+
elif mx < 4294967295:
|
37
|
+
df[col] = df[col].astype(np.uint32)
|
38
|
+
else:
|
39
|
+
df[col] = df[col].astype(np.uint64)
|
40
|
+
else:
|
41
|
+
if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
|
42
|
+
df[col] = df[col].astype(np.int8)
|
43
|
+
elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
|
44
|
+
df[col] = df[col].astype(np.int16)
|
45
|
+
elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
|
46
|
+
df[col] = df[col].astype(np.int32)
|
47
|
+
elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
|
48
|
+
df[col] = df[col].astype(np.int64)
|
49
|
+
except: # noqa: E722
|
50
|
+
df[col] = df[col].astype(np.float32)
|
51
|
+
|
52
|
+
# make float datatypes 32 bit
|
53
|
+
else:
|
54
|
+
df[col] = df[col].astype(np.float32)
|
55
|
+
|
56
|
+
return df
|
57
|
+
|
58
|
+
|
59
|
+
def interpolate_array(row, col_1, col_2, col_in, col_out):
|
60
|
+
if row[col_in] != 0:
|
61
|
+
interpolated = row[col_in] * (row[col_2] - row[col_1]) + row[col_1]
|
62
|
+
else:
|
63
|
+
interpolated = row[col_1]
|
64
|
+
|
65
|
+
row[col_out] = interpolated
|
66
|
+
|
67
|
+
return row
|
68
|
+
|
69
|
+
|
70
|
+
def scale_array_precision(df: pd.DataFrame, hourly_col: str, prec_offset_col: str):
|
71
|
+
"""Scales the precision of :py:attr:`hourly_col` by the :py:attr:`prec_offset_col`.
|
72
|
+
|
73
|
+
Args:
|
74
|
+
df (pd.DataFrame): A Pandas DataFrame containing :py:att:`hourly_col` and
|
75
|
+
:py:att:`prec_offset_col`.
|
76
|
+
hourly_col (str) The column to adjust the precision.
|
77
|
+
prec_offset_col (str): The column for scaling the precison of :py:attr:`hourly_col`.
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`hourly_col` scaled.
|
81
|
+
"""
|
82
|
+
df[hourly_col] = (
|
83
|
+
np.array(df[hourly_col].values.tolist(), dtype="float64")
|
84
|
+
/ df[prec_offset_col].values.reshape(-1, 1)
|
85
|
+
).tolist()
|
86
|
+
return df
|
87
|
+
|
88
|
+
|
89
|
+
def scale_array_deprecision(df: pd.DataFrame, col: str | list[str]) -> pd.DataFrame:
|
90
|
+
"""Rounds the column(s) :py:attr:`col` to the nearest 2nd decimal and converts to NumPy's
|
91
|
+
float32.
|
92
|
+
|
93
|
+
Args:
|
94
|
+
df (pd.DataFrame): A Pandas DataFrame containing :py:att:`col`.
|
95
|
+
col (str | list[str]): The column(s) to have reduced precision.
|
96
|
+
|
97
|
+
Returns:
|
98
|
+
pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`col` lowered.
|
99
|
+
"""
|
100
|
+
df[col] = np.round(np.round(df[col], 2).astype(np.float32), 2)
|
101
|
+
return df
|
102
|
+
|
103
|
+
|
104
|
+
def scale_array_sum(df: pd.DataFrame, hourly_col: str, scale_col: str) -> pd.DataFrame:
|
105
|
+
"""Scales the :py:attr:`hourly_col` by its sum and multiples by the :py:attr:`scale_col`.
|
106
|
+
|
107
|
+
Args:
|
108
|
+
df (pd.DataFrame): Pandas DataFrame containing the :py:attr:`hourly_col` and
|
109
|
+
:py:attr:`scale_col`.
|
110
|
+
hourly_col (str): The name of the column to be scaled whose values are lists.
|
111
|
+
scale_col (str): The column to scale the :py:attr:`hourly_col`.
|
112
|
+
|
113
|
+
Returns:
|
114
|
+
pandas.DataFrame: The input dataframe, but with the values of the :py:attr:`hourly_col`
|
115
|
+
scaled appropriately.
|
116
|
+
"""
|
117
|
+
hourly_array = np.array(df[hourly_col].values.tolist())
|
118
|
+
df[hourly_col] = (
|
119
|
+
hourly_array / hourly_array.sum(axis=1).reshape(-1, 1) * df[scale_col].values.reshape(-1, 1)
|
120
|
+
).tolist()
|
121
|
+
return df
|
122
|
+
|
123
|
+
|
124
|
+
def scale_array_multiplier(
|
125
|
+
df: pd.DataFrame, hourly_col: str, multiplier_col: str, col_out: str
|
126
|
+
) -> pd.DataFrame:
|
127
|
+
"""Scales the :py:attr:hourly_col` values by the :py:attr:`multiplier_col`, and places it in
|
128
|
+
the :py:attr:`col_out`.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
df (pd.DataFrame): The Pandas DataFrame containing the :py:attr:`hourly_col` and
|
132
|
+
:py:attr:`multiplier_col`.
|
133
|
+
hourly_col (str): A column of hourly values as a list of floats in each cell.
|
134
|
+
multiplier_col (str): The column used to scale the :py:attr:`hourly_col`.
|
135
|
+
col_out (str): A new column that will contain the scaled data.
|
136
|
+
|
137
|
+
Returns:
|
138
|
+
pd.DataFrame: A new copy of the original data (:py:attr:`df`) containing the
|
139
|
+
:py:attr:`col_out` column.
|
140
|
+
"""
|
141
|
+
hourly_array = np.array(df[hourly_col].values.tolist())
|
142
|
+
df[col_out] = (hourly_array * df[multiplier_col].values.reshape(-1, 1)).tolist()
|
143
|
+
return df
|
144
|
+
|
145
|
+
|
146
|
+
def split_by_index(
|
147
|
+
arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
|
148
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
149
|
+
"""Split a DataFrame, Series, or array like with np.array_split, but only return the start and
|
150
|
+
stop indices, rather than chunks. For Pandas objects, this are equivalent to
|
151
|
+
``arr.iloc[start: end]`` and for NumPy: ``arr[start: end]``. Splits are done according
|
152
|
+
to the 0th dimension.
|
153
|
+
|
154
|
+
Args:
|
155
|
+
arr(pd.DataFrame | pd.Series | np.ndarray): The array, data frame, or series to split.
|
156
|
+
n_splits(:obj:`int`): The number of near equal or equal splits.
|
157
|
+
|
158
|
+
Returns:
|
159
|
+
tuple[np.ndarray, np.ndarray]
|
160
|
+
"""
|
161
|
+
size = arr.shape[0]
|
162
|
+
base = np.arange(n_splits)
|
163
|
+
split_size = size // n_splits
|
164
|
+
extra = size % n_splits
|
165
|
+
|
166
|
+
starts = base * split_size
|
167
|
+
ends = starts + split_size
|
168
|
+
|
169
|
+
for i in range(extra):
|
170
|
+
ends[i:] += 1
|
171
|
+
starts[i + 1 :] += 1
|
172
|
+
return starts, ends
|
dwind/loader.py
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
import pandas as pd
|
4
|
+
from sqlalchemy import create_engine
|
5
|
+
|
6
|
+
|
7
|
+
def load_df(file_or_table: str | Path, year: str | None = None, sql_constructor: str | None = None):
|
8
|
+
"""Loads data from either a SQL table or file to a pandas ``DataFrame``.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
file_or_table (str | Path): File name or path object, or SQL table where the data are
|
12
|
+
located.
|
13
|
+
year (int | None, optional): If used, only extracts the single year from a column called
|
14
|
+
"year". Defaults to None.
|
15
|
+
sql_constructor (str | None, optional): The SQL engine constructor string. Required if
|
16
|
+
extracting from SQL. Defaults to None.
|
17
|
+
"""
|
18
|
+
valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
|
19
|
+
if str(file_or_table).endswith(valid_extenstions):
|
20
|
+
return _load_from_file(filename=file_or_table, year=year)
|
21
|
+
|
22
|
+
return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
|
23
|
+
|
24
|
+
|
25
|
+
def _load_from_file(filename: str | Path, year: str | int | None) -> pd.DataFrame:
|
26
|
+
"""Loads tabular data from a file to a ``pandas.DataFrame``."""
|
27
|
+
if isinstance(filename, str):
|
28
|
+
filename = Path(filename).resolve()
|
29
|
+
if not isinstance(filename, Path):
|
30
|
+
raise TypeError(f"`filename` must be a valid path, not {filename=}")
|
31
|
+
|
32
|
+
if filename.suffix == ".csv":
|
33
|
+
df = pd.read_csv(filename)
|
34
|
+
elif filename.suffix in (".parquet", ".pqt"):
|
35
|
+
df = pd.read_parquet(filename)
|
36
|
+
elif filename.suffix in (".pickle", ".pkl"):
|
37
|
+
df = pd.read_pickle(filename)
|
38
|
+
else:
|
39
|
+
raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
|
40
|
+
|
41
|
+
if year is not None:
|
42
|
+
year = int(year)
|
43
|
+
df = df.loc[df.year == year]
|
44
|
+
|
45
|
+
return df
|
46
|
+
|
47
|
+
|
48
|
+
def _load_from_sql(table: str, sql_constructor: str, year: str | int | None) -> pd.DataFrame:
|
49
|
+
"""Load tabular data from SQL."""
|
50
|
+
if year is not None:
|
51
|
+
year = int(year)
|
52
|
+
where = f"where year = {year}" if year is not None else ""
|
53
|
+
sql = f"""select * from diffusion_shared."{table}" {where};"""
|
54
|
+
atlas_engine = create_engine(sql_constructor)
|
55
|
+
|
56
|
+
with atlas_engine.connect() as conn:
|
57
|
+
pd.read_sql(sql, con=conn.connection)
|
58
|
+
|
59
|
+
atlas_engine.dispose()
|