dwind 0.3__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dwind/__init__.py +1 -1
- dwind/btm_sizing.py +2 -2
- dwind/cli/__init__.py +0 -0
- dwind/cli/collect.py +114 -0
- dwind/cli/debug.py +137 -0
- dwind/cli/run.py +288 -0
- dwind/cli/utils.py +166 -0
- dwind/config.py +159 -8
- dwind/loader.py +4 -1
- dwind/main.py +20 -0
- dwind/model.py +265 -99
- dwind/mp.py +61 -61
- dwind/resource.py +122 -40
- dwind/run.py +50 -17
- dwind/scenarios.py +75 -35
- dwind/utils/__init__.py +0 -0
- dwind/utils/array.py +99 -0
- dwind/utils/hpc.py +138 -0
- dwind/utils/loader.py +63 -0
- dwind/utils/progress.py +60 -0
- dwind/valuation.py +396 -290
- {dwind-0.3.dist-info → dwind-0.3.2.dist-info}/METADATA +2 -1
- dwind-0.3.2.dist-info/RECORD +28 -0
- dwind-0.3.2.dist-info/entry_points.txt +2 -0
- dwind-0.3.dist-info/RECORD +0 -17
- dwind-0.3.dist-info/entry_points.txt +0 -2
- {dwind-0.3.dist-info → dwind-0.3.2.dist-info}/WHEEL +0 -0
- {dwind-0.3.dist-info → dwind-0.3.2.dist-info}/licenses/LICENSE.txt +0 -0
- {dwind-0.3.dist-info → dwind-0.3.2.dist-info}/top_level.txt +0 -0
dwind/scenarios.py
CHANGED
@@ -1,48 +1,80 @@
|
|
1
|
+
"""Provides the scenario-specific mapping for varying financial and model configuration data."""
|
2
|
+
|
1
3
|
import json
|
2
4
|
from pathlib import Path
|
3
5
|
|
4
6
|
import pandas as pd
|
5
7
|
|
8
|
+
from dwind.config import Year, Scenario
|
6
9
|
|
7
|
-
def config_nem(scenario, year):
|
8
|
-
# NEM_SCENARIO_CSV
|
9
|
-
nem_opt_scens = ["highrecost", "lowrecost", "re100"]
|
10
|
-
# nem_opt_scens = ['der_value_HighREcost', 'der_value_LowREcost', 're_100']
|
11
|
-
if scenario in nem_opt_scens:
|
12
|
-
nem_scenario_csv = "nem_optimistic_der_value_2035.csv"
|
13
|
-
elif scenario == "baseline" and year in (2022, 2025, 2035):
|
14
|
-
nem_scenario_csv = f"nem_baseline_{year}.csv"
|
15
|
-
else:
|
16
|
-
nem_scenario_csv = "nem_baseline_2035.csv"
|
17
10
|
|
18
|
-
|
11
|
+
def config_nem(scenario: Scenario, year: Year) -> str:
|
12
|
+
"""Provides NEM configuration based on :py:attr:`scenario` and :py:attr:`year`.
|
19
13
|
|
14
|
+
Args:
|
15
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
16
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
20
17
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
18
|
+
Returns:
|
19
|
+
str: Name of the NEM scenario file to use.
|
20
|
+
"""
|
21
|
+
if scenario in (Scenario.HIGHRECOST, Scenario.LOWRECOST, Scenario.RE100):
|
22
|
+
return "nem_optimistic_der_value_2035.csv"
|
23
|
+
|
24
|
+
if scenario is Scenario.BASELINE and year in (Year._2022, Year._2025, Year._2035):
|
25
|
+
return f"nem_baseline_{year.value}.csv"
|
26
|
+
|
27
|
+
return "nem_baseline_2035.csv"
|
28
|
+
|
29
|
+
|
30
|
+
def config_cambium(scenario: Scenario) -> str:
|
31
|
+
"""Loads the cambium configuration name based on :py:attr:`scenario`.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
35
|
+
|
36
|
+
Returns:
|
37
|
+
str: Name of the Cambium scenario to use.
|
38
|
+
"""
|
39
|
+
if scenario in (Scenario.HIGHRECOST, Scenario.RE100):
|
40
|
+
return "StdScen20_HighRECost"
|
30
41
|
|
31
|
-
|
42
|
+
if scenario is Scenario.LOWRECOST:
|
43
|
+
return "StdScen20_LowRECost"
|
32
44
|
|
45
|
+
return "Cambium23_MidCase"
|
33
46
|
|
34
|
-
|
35
|
-
|
36
|
-
|
47
|
+
|
48
|
+
def config_costs(scenario: Scenario, year: Year) -> dict:
|
49
|
+
"""Loads the cost configuration based on the ATB analysis.
|
50
|
+
|
51
|
+
Args:
|
52
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
53
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
dict: Dictionary of ATB assumptions to be used for PySAM's cost inputs.
|
57
|
+
"""
|
58
|
+
f = Path(
|
59
|
+
f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario.value}_{year.value}.json"
|
60
|
+
).resolve()
|
37
61
|
with f.open("r") as f_in:
|
38
62
|
cost_inputs = json.load(f_in)
|
39
63
|
|
40
64
|
return cost_inputs
|
41
65
|
|
42
66
|
|
43
|
-
def config_performance(scenario, year):
|
44
|
-
|
45
|
-
|
67
|
+
def config_performance(scenario: Scenario, year: Year) -> pd.DataFrame:
|
68
|
+
"""Loads the technology performance configurations.
|
69
|
+
|
70
|
+
Args:
|
71
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
72
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
73
|
+
|
74
|
+
Returns:
|
75
|
+
pd.DataFrame: Performance data based on the scale of each technology.
|
76
|
+
"""
|
77
|
+
if scenario is Scenario.BASELINE and year is Year._2022:
|
46
78
|
performance_inputs = {
|
47
79
|
"solar": pd.DataFrame(
|
48
80
|
[
|
@@ -108,15 +140,21 @@ def config_performance(scenario, year):
|
|
108
140
|
return performance_inputs
|
109
141
|
|
110
142
|
|
111
|
-
def config_financial(scenario, year):
|
112
|
-
|
113
|
-
|
114
|
-
|
143
|
+
def config_financial(scenario: Scenario, year: Year) -> dict:
|
144
|
+
"""Loads the financial configuration based on the ATB analysis.
|
145
|
+
|
146
|
+
Args:
|
147
|
+
scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
|
148
|
+
year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
|
149
|
+
|
150
|
+
Returns:
|
151
|
+
dict: Dictionary of ATB assumptions to be used for configuration PySAM.
|
152
|
+
"""
|
153
|
+
if year is Year._2025:
|
115
154
|
f = f"/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_{year}.json"
|
116
|
-
i = Path("/projects/dwind/data/incentives/2025_incentives.
|
117
|
-
|
118
|
-
|
119
|
-
elif scenario in scenarios and year in (2035, 2040):
|
155
|
+
i = Path("/projects/dwind/data/incentives/2025_incentives.pqt").resolve()
|
156
|
+
incentives = pd.read_parquet(i, dtype_backend="pyarrow")
|
157
|
+
elif year in (Year._2035, Year._2040):
|
120
158
|
f = "/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_2035.json"
|
121
159
|
else:
|
122
160
|
# use old assumptions
|
@@ -125,6 +163,8 @@ def config_financial(scenario, year):
|
|
125
163
|
|
126
164
|
with f.open("r") as f_in:
|
127
165
|
financials = json.load(f_in)
|
166
|
+
|
167
|
+
# TODO: determine if shared settings is applicable going forward, or separate should be reserved
|
128
168
|
if year == 2025:
|
129
169
|
financials["BTM"]["itc_fraction_of_capex"] = incentives
|
130
170
|
financials["FOM"]["itc_fraction_of_capex"] = incentives
|
dwind/utils/__init__.py
ADDED
File without changes
|
dwind/utils/array.py
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
"""Provides a series of generic NumPy and Pandas utility functions."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
|
8
|
+
|
9
|
+
def memory_downcaster(df: pd.DataFrame | pd.Series) -> pd.DataFrame | pd.Series:
|
10
|
+
"""Downcasts ``int`` and ``float`` columns to the lowest memory alternative possible. For
|
11
|
+
integers this means converting to either signed or unsigned 8-, 16-, 32-, or 64-bit integers,
|
12
|
+
and for floats, converting to ``np.float32``.
|
13
|
+
|
14
|
+
Args:
|
15
|
+
df (pd.DataFrame | pd.Series): DataFrame or Series to have its memory footprint reduced.
|
16
|
+
|
17
|
+
Returns:
|
18
|
+
pd.DataFrame | pd.Series: Reduced footprint version of the passed :py:attr:`df`.
|
19
|
+
"""
|
20
|
+
# if not isinstance(df, pd.DataFrame | pd.Series):
|
21
|
+
if not isinstance(df, (pd.DataFrame, pd.Series)): # noqa
|
22
|
+
raise TypeError("Input value must be a Pandas DataFrame or Series.")
|
23
|
+
|
24
|
+
NAlist = []
|
25
|
+
for col in df.select_dtypes(include=[np.number]).columns:
|
26
|
+
IsInt = False
|
27
|
+
mx = df[col].max()
|
28
|
+
mn = df[col].min()
|
29
|
+
|
30
|
+
# integer does not support na; fill na
|
31
|
+
if not np.isfinite(df[col]).all():
|
32
|
+
NAlist.append(col)
|
33
|
+
df[col].fillna(mn - 1, inplace=True)
|
34
|
+
|
35
|
+
# test if column can be converted to an integer
|
36
|
+
asint = df[col].fillna(0).astype(np.int64)
|
37
|
+
result = df[col] - asint
|
38
|
+
result = result.sum()
|
39
|
+
if result > -0.01 and result < 0.01:
|
40
|
+
IsInt = True
|
41
|
+
|
42
|
+
# make integer/unsigned integer datatypes
|
43
|
+
if IsInt:
|
44
|
+
try:
|
45
|
+
if mn >= 0:
|
46
|
+
if mx < 255:
|
47
|
+
df[col] = df[col].astype(np.uint8)
|
48
|
+
elif mx < 65535:
|
49
|
+
df[col] = df[col].astype(np.uint16)
|
50
|
+
elif mx < 4294967295:
|
51
|
+
df[col] = df[col].astype(np.uint32)
|
52
|
+
else:
|
53
|
+
df[col] = df[col].astype(np.uint64)
|
54
|
+
else:
|
55
|
+
if mn > np.iinfo(np.int8).min and mx < np.iinfo(np.int8).max:
|
56
|
+
df[col] = df[col].astype(np.int8)
|
57
|
+
elif mn > np.iinfo(np.int16).min and mx < np.iinfo(np.int16).max:
|
58
|
+
df[col] = df[col].astype(np.int16)
|
59
|
+
elif mn > np.iinfo(np.int32).min and mx < np.iinfo(np.int32).max:
|
60
|
+
df[col] = df[col].astype(np.int32)
|
61
|
+
elif mn > np.iinfo(np.int64).min and mx < np.iinfo(np.int64).max:
|
62
|
+
df[col] = df[col].astype(np.int64)
|
63
|
+
except: # noqa: E722
|
64
|
+
df[col] = df[col].astype(np.float32)
|
65
|
+
|
66
|
+
# make float datatypes 32 bit
|
67
|
+
else:
|
68
|
+
df[col] = df[col].astype(np.float32)
|
69
|
+
|
70
|
+
return df
|
71
|
+
|
72
|
+
|
73
|
+
def split_by_index(
|
74
|
+
arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
|
75
|
+
) -> tuple[np.ndarray, np.ndarray]:
|
76
|
+
"""Split a DataFrame, Series, or array like with np.array_split, but only return the start and
|
77
|
+
stop indices, rather than chunks. For Pandas objects, this are equivalent to
|
78
|
+
``arr.iloc[start: end]`` and for NumPy: ``arr[start: end]``. Splits are done according
|
79
|
+
to the 0th dimension.
|
80
|
+
|
81
|
+
Args:
|
82
|
+
arr(pd.DataFrame | pd.Series | np.ndarray): The array, data frame, or series to split.
|
83
|
+
n_splits(:obj:`int`): The number of near equal or equal splits.
|
84
|
+
|
85
|
+
Returns:
|
86
|
+
tuple[np.ndarray, np.ndarray]
|
87
|
+
"""
|
88
|
+
size = arr.shape[0]
|
89
|
+
base = np.arange(n_splits)
|
90
|
+
split_size = size // n_splits
|
91
|
+
extra = size % n_splits
|
92
|
+
|
93
|
+
starts = base * split_size
|
94
|
+
ends = starts + split_size
|
95
|
+
|
96
|
+
for i in range(extra):
|
97
|
+
ends[i:] += 1
|
98
|
+
starts[i + 1 :] += 1
|
99
|
+
return starts, ends
|
dwind/utils/hpc.py
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
"""Provides the live timing table functionalities for the Kestrel :py:class:`MultiProcess` class."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import io
|
6
|
+
import re
|
7
|
+
import time
|
8
|
+
import subprocess
|
9
|
+
from copy import deepcopy
|
10
|
+
|
11
|
+
import pandas as pd
|
12
|
+
from rich.table import Table
|
13
|
+
from rex.utilities.hpc import SLURM
|
14
|
+
|
15
|
+
|
16
|
+
def convert_seconds_for_print(time: float) -> str:
|
17
|
+
"""Convert number of seconds to number of hours, minutes, and seconds."""
|
18
|
+
div = ((60, "seconds"), (60, "minutes"), (24, "hours"))
|
19
|
+
|
20
|
+
result = []
|
21
|
+
value = time
|
22
|
+
for divisor, label in div:
|
23
|
+
if not divisor:
|
24
|
+
remainder = value
|
25
|
+
if not remainder:
|
26
|
+
break
|
27
|
+
else:
|
28
|
+
value, remainder = divmod(value, divisor)
|
29
|
+
if not value and not remainder:
|
30
|
+
break
|
31
|
+
if remainder == 1:
|
32
|
+
label = label[:-1]
|
33
|
+
|
34
|
+
# 0.2 second precision for seconds, and no decimals otherwise
|
35
|
+
if result:
|
36
|
+
result.append(f"{remainder:,.0f} {label}")
|
37
|
+
else:
|
38
|
+
result.append(f"{remainder:.1f} {label}")
|
39
|
+
if result:
|
40
|
+
return ", ".join(reversed(result))
|
41
|
+
return "0"
|
42
|
+
|
43
|
+
|
44
|
+
def update_status(job_status: dict) -> dict:
|
45
|
+
"""Get an updated status and timing statistics for all running jobs on the HPC.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
job_status (dict): Dictionary of job id (primary key) with sub keys of "status",
|
49
|
+
"start_time" (initial or start of run status), "wait", and "run".
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
dict: Dictionary of updated statuses and timing statistics for all current queued and
|
53
|
+
running jobs.
|
54
|
+
"""
|
55
|
+
slurm = SLURM()
|
56
|
+
update = {}
|
57
|
+
for job, vals in job_status.items():
|
58
|
+
original_status = vals["status"]
|
59
|
+
if original_status in ("CG", "CF", "None", None):
|
60
|
+
continue
|
61
|
+
new_status = slurm.check_status(job_id=job)
|
62
|
+
if new_status == "PD":
|
63
|
+
update[job] = vals | {"status": new_status, "wait": time.perf_counter() - vals["start"]}
|
64
|
+
elif new_status == "R":
|
65
|
+
if original_status != "R":
|
66
|
+
update[job] = vals | {
|
67
|
+
"status": new_status,
|
68
|
+
"wait": time.perf_counter() - vals["start"],
|
69
|
+
"start": time.perf_counter(),
|
70
|
+
}
|
71
|
+
else:
|
72
|
+
update[job] = vals | {"run": time.perf_counter() - vals["start"]}
|
73
|
+
elif new_status in ("CG", "CF", "None", None):
|
74
|
+
update[job] = vals | {"status": new_status, "run": time.perf_counter() - vals["start"]}
|
75
|
+
else:
|
76
|
+
raise ValueError(f"Unaccounted for status code: {new_status}")
|
77
|
+
return update
|
78
|
+
|
79
|
+
|
80
|
+
def generate_run_status_table(job_status: dict) -> tuple[Table, bool]:
|
81
|
+
"""Generate the job status run time statistics table.
|
82
|
+
|
83
|
+
Args:
|
84
|
+
job_status (dict): Dictionary of job id (primary key) with sub keys of "status",
|
85
|
+
"start_time" (initial or start of run status), "wait", and "run".
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
Table: ``rich.Table`` of human readable statistics.
|
89
|
+
bool: True if all jobs are complete, otherwise False.
|
90
|
+
"""
|
91
|
+
table = Table()
|
92
|
+
table.add_column("Job ID")
|
93
|
+
table.add_column("Status")
|
94
|
+
table.add_column("Wait time")
|
95
|
+
table.add_column("Run time")
|
96
|
+
|
97
|
+
for job, vals in job_status.items():
|
98
|
+
status = vals["status"]
|
99
|
+
_wait = vals["wait"]
|
100
|
+
_run = vals["run"]
|
101
|
+
table.add_row(
|
102
|
+
job, status, convert_seconds_for_print(_wait), convert_seconds_for_print(_run)
|
103
|
+
)
|
104
|
+
done = all(el["status"] in ("CG", "CF", None) for el in job_status.values())
|
105
|
+
return table, done
|
106
|
+
|
107
|
+
|
108
|
+
def get_finished_run_status(jobs: int | str | list[int | str]) -> dict[str, str]:
|
109
|
+
"""Extracts a dictionary of job_id and status from the ``sacct`` output for a single
|
110
|
+
job or series of jobs.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
jobs (int | str | list[int | str]): Single job ID or list of job IDs that have finished
|
114
|
+
running.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
dict[str, str]: Dictionary of {job_id_1: status_1, ..., job_id_N: status_N}.
|
118
|
+
"""
|
119
|
+
if isinstance(jobs, (int, str)): # noqa
|
120
|
+
jobs = [jobs]
|
121
|
+
jobs = [str(j) for j in jobs]
|
122
|
+
|
123
|
+
# Format the command to be in the form of [sacct, -j, job_id_1, ..., -j job_id_N]
|
124
|
+
command = deepcopy(jobs)
|
125
|
+
for i in range(len(command) - 1, -1, -1):
|
126
|
+
command.insert(i, "-j")
|
127
|
+
command.insert(0, "sacct")
|
128
|
+
results = subprocess.check_output(command)
|
129
|
+
|
130
|
+
# Convert the sacct string output to be table-like
|
131
|
+
buffer = io.StringIO(results.decode("utf8", "ignore"))
|
132
|
+
lines = [re.split(" +", line) for line in buffer.readlines() if not line.startswith("-")]
|
133
|
+
|
134
|
+
# Create a dataframe, and export a dictionary of the form job_id: job_status
|
135
|
+
df = pd.DataFrame(lines[1:], columns=lines[0])
|
136
|
+
df = df.loc[df.JobID.isin(jobs), ["JobID", "State"]]
|
137
|
+
df.JobID = df.JobID.astype(int)
|
138
|
+
return dict(df.values.tolist())
|
dwind/utils/loader.py
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
"""Provides the core data loading methods for importing scenario data from flat files or SQL."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
from pathlib import Path
|
6
|
+
|
7
|
+
import pandas as pd
|
8
|
+
from sqlalchemy import create_engine
|
9
|
+
|
10
|
+
from dwind.config import Year
|
11
|
+
|
12
|
+
|
13
|
+
def load_df(file_or_table: str | Path, year: Year | None, sql_constructor: str | None = None):
|
14
|
+
"""Loads data from either a SQL table or file to a pandas ``DataFrame``.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
file_or_table (str | Path): File name or path object, or SQL table where the data are
|
18
|
+
located.
|
19
|
+
year (:py:class:`dwind.config.Year`, optional): If used, only extracts the single year from
|
20
|
+
a column called "year". Defaults to None.
|
21
|
+
sql_constructor (str | None, optional): The SQL engine constructor string. Required if
|
22
|
+
extracting from SQL. Defaults to None.
|
23
|
+
"""
|
24
|
+
valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
|
25
|
+
if str(file_or_table).endswith(valid_extenstions):
|
26
|
+
return _load_from_file(filename=file_or_table, year=year)
|
27
|
+
|
28
|
+
return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
|
29
|
+
|
30
|
+
|
31
|
+
def _load_from_file(filename: str | Path, year: Year | None) -> pd.DataFrame:
|
32
|
+
"""Loads tabular data from a file to a ``pandas.DataFrame``."""
|
33
|
+
if isinstance(filename, str):
|
34
|
+
filename = Path(filename).resolve()
|
35
|
+
if not isinstance(filename, Path):
|
36
|
+
raise TypeError(f"`filename` must be a valid path, not {filename=}")
|
37
|
+
|
38
|
+
if filename.suffix == ".csv":
|
39
|
+
df = pd.read_csv(filename, dtype_backend="pyarrow")
|
40
|
+
elif filename.suffix in (".parquet", ".pqt"):
|
41
|
+
df = pd.read_parquet(filename, dtype_backend="pyarrow")
|
42
|
+
elif filename.suffix in (".pickle", ".pkl"):
|
43
|
+
df = pd.read_pickle(filename, dtype_backend="pyarrow")
|
44
|
+
else:
|
45
|
+
raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
|
46
|
+
|
47
|
+
if year is not None:
|
48
|
+
df = df.loc[df.year == year]
|
49
|
+
|
50
|
+
return df
|
51
|
+
|
52
|
+
|
53
|
+
def _load_from_sql(table: str, sql_constructor: str, year: Year | None) -> pd.DataFrame:
|
54
|
+
"""Load tabular data from SQL."""
|
55
|
+
where = f"where year = {year}" if year is not None else ""
|
56
|
+
sql = f"""select * from diffusion_shared."{table}" {where};"""
|
57
|
+
atlas_engine = create_engine(sql_constructor)
|
58
|
+
|
59
|
+
with atlas_engine.connect() as conn:
|
60
|
+
df = pd.read_sql(sql, con=conn.connection, dtype_backend="pyarrow")
|
61
|
+
|
62
|
+
atlas_engine.dispose()
|
63
|
+
return df
|
dwind/utils/progress.py
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
import numpy as np
|
2
|
+
from joblib import Parallel, delayed
|
3
|
+
from threading import Thread
|
4
|
+
from rich.progress import Progress, BarColumn, TimeRemainingColumn, TextColumn
|
5
|
+
from rich.console import Console
|
6
|
+
from rich.live import Live
|
7
|
+
import time
|
8
|
+
|
9
|
+
# Define the number of tasks and create a shared memory numpy array to hold their progress
|
10
|
+
num_tasks = 4
|
11
|
+
progress_array = np.memmap("progress.mmap2", dtype=np.float32, mode="w+", shape=N)
|
12
|
+
|
13
|
+
# Define a function that performs a task and updates the progress array
|
14
|
+
def perform_task(task_idx, progress_array):
|
15
|
+
for i in range(100):
|
16
|
+
# Do some work here
|
17
|
+
# ...
|
18
|
+
|
19
|
+
# Update the progress array
|
20
|
+
time.sleep(0.1)
|
21
|
+
progress_array[task_idx] = i / 100
|
22
|
+
|
23
|
+
# Update the progress array to 100% on completion
|
24
|
+
progress_array[task_idx] = 1
|
25
|
+
|
26
|
+
# Define a function to continuously update the Rich progress bar
|
27
|
+
def update_progress_bar(
|
28
|
+
progress_array=progress_array,
|
29
|
+
num_tasks=num_tasks,
|
30
|
+
):
|
31
|
+
with Progress(
|
32
|
+
TextColumn("[bold blue]{task.fields[name]}"),
|
33
|
+
BarColumn(),
|
34
|
+
TextColumn("[bold green]{task.fields[status]}"),
|
35
|
+
TimeRemainingColumn(),
|
36
|
+
# console=console,
|
37
|
+
) as progress:
|
38
|
+
tasks = [
|
39
|
+
progress.add_task(
|
40
|
+
description=f"Task {i}",
|
41
|
+
name=f"Task {i}",
|
42
|
+
status="pending",
|
43
|
+
total=100,
|
44
|
+
)
|
45
|
+
for i in range(num_tasks)
|
46
|
+
]
|
47
|
+
|
48
|
+
while not all(progress_array == 1):
|
49
|
+
for i, task in enumerate(tasks):
|
50
|
+
progress.update(task, completed=int(progress_array[i] * 100))
|
51
|
+
time.sleep(0.1 * 2 ** abs(*np.random.randn(1)))
|
52
|
+
|
53
|
+
|
54
|
+
# Launch the progress bar update function in a separate thread
|
55
|
+
Thread(target=update_progress_bar, args=[progress_array, num_tasks]).start()
|
56
|
+
|
57
|
+
# Launch the tasks in parallel using joblib and the perform_task function
|
58
|
+
Parallel(n_jobs=-2, backend="loky")(
|
59
|
+
delayed(perform_task)(i, progress_array) for i in range(num_tasks)
|
60
|
+
)
|