PyPI - dwind - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

dwind 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

dwind/__init__.py +1 -1
dwind/btm_sizing.py +1 -2
dwind/cli/__init__.py +0 -0
dwind/cli/collect.py +114 -0
dwind/cli/debug.py +137 -0
dwind/cli/run.py +288 -0
dwind/cli/utils.py +166 -0
dwind/config.py +147 -6
dwind/main.py +20 -0
dwind/model.py +128 -63
dwind/mp.py +30 -35
dwind/resource.py +120 -41
dwind/scenarios.py +73 -36
dwind/utils/array.py +16 -89
dwind/utils/hpc.py +44 -2
dwind/utils/loader.py +63 -0
dwind/utils/progress.py +60 -0
dwind/valuation.py +368 -239
{dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/METADATA +2 -1
dwind-0.3.2.dist-info/RECORD +28 -0
dwind-0.3.2.dist-info/entry_points.txt +2 -0
dwind-0.3.1.dist-info/RECORD +0 -20
dwind-0.3.1.dist-info/entry_points.txt +0 -2
{dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/WHEEL +0 -0
{dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/licenses/LICENSE.txt +0 -0
{dwind-0.3.1.dist-info → dwind-0.3.2.dist-info}/top_level.txt +0 -0

dwind/resource.py CHANGED Viewed

@@ -1,23 +1,75 @@
+"""Provides the :py:class:`ResourcePotential` class for gathering pre-calculated reV generation
+data.
+"""
 import h5py as h5
 import pandas as pd
-from dwind import Configuration
+from dwind.config import Sector, Technology, Configuration
 class ResourcePotential:
+    """Helper class designed to retrieve pre-calculated energy generation data from reV."""
     def __init__(
-        self, parcels, model_config: Configuration, tech="wind", application="fom", year="2018"
+        self,
+        parcels: pd.DataFrame,
+        model_config: Configuration,
+        sector: Sector,
+        tech: str = "wind",
+        year: int = 2018,
     ):
+        """Initializes the :py:class:`ResourcePotential` instance.
+        Args:
+            parcels (pd.DataFrame): The agent DataFrame containing at least the following columns:
+                "gid", "rev_gid_{tech}", "solar_az_tilt" (solar only), "azimuth_{sector}"
+                (solar only), "tilt_{tech}" (solar only), "turbine_class" (wind only),
+                "wind_turbine_kw" (wind only), and "turbine_height_m" (wind only).
+            model_config (Configuration): The pre-loaded model configuration data object containing
+                the requisite SQL, file, and configuration data.
+            sector (dwind.config.Sector): A valid sector instance.
+            tech (str, optional): One of "solar" or "wind". Defaults to "wind".
+            year (int, optional): Resource year for the reV lookup. Defaults to 2018.
+        Raises:
+            ValueError: Raised if :py:attr:`parcels:` is missing any of the required columns.
+        """
         self.df = parcels
-        self.tech = tech
-        self.application = application
+        self.tech = Technology(tech)
+        self.sector = sector
         self.year = year
         self.config = model_config
-        if self.tech not in ("wind", "solar"):
-            raise ValueError("`tech` must be one of 'solar' or 'wind'.")
-    def create_rev_gid_to_summary_lkup(self, configs, save_csv=True):
+        solar_cols = ("solar_az_tilt", f"azimuth_{self.sector.value}", f"tilt_{self.tech.value}")
+        # wind_cols = ("turbine_class", "wind_turbine_kw", "turbine_height_m")
+        wind_cols = ("wind_turbine_kw", "turbine_height_m")
+        if self.tech is Technology.WIND:
+            cols = wind_cols
+        elif self.tech is Technology.SOLAR:
+            cols = solar_cols
+        missing = set(cols).difference(self.df.columns.tolist())
+        if missing:
+            raise ValueError(f"`parcels` is missing the following columns: {', '.join(missing)}")
+    def create_rev_gid_to_summary_lkup(
+        self, configs: list[str], *, save_csv: bool = True
+    ) -> pd.DataFrame:
+        """Creates the reV summary tables based on the "gid" mappings in :py:attr:`parcels`.
+        Args:
+            configs (list[str]): The list of technology-specific configurations where the generation
+                data should be retrieved.
+            save_csv (bool, optional): If True, save the resulting lookup calculated from reV to the
+                reV folder definied in ``Configuration.rev.generation.{tech}_DIR``. Defaults to
+                True.
+        Returns:
+            pd.DataFrame: reV generation lookup table for the technology-specific configurations in
+                :py:attr:`configs`.
+        """
         config_dfs = []
         for c in configs:
             file_str = self.config.rev.DIR / f"rev_{c}_generation_{self.year}.h5"
@@ -30,10 +82,10 @@ class ResourcePotential:
             config_df = pd.concat([rev_index, gids, annual_energy, cf_mean], axis=1)
             config_df.columns = [
-                f"rev_index_{self.tech}",
-                f"rev_gid_{self.tech}",
-                f"{self.tech}_naep",
-                f"{self.tech}_cf",
+                f"rev_index_{self.tech.value}",
+                f"rev_gid_{self.tech.value}",
+                f"{self.tech.value}_naep",
+                f"{self.tech.value}_cf",
             ]
             config_df["config"] = c
@@ -43,87 +95,107 @@ class ResourcePotential:
         if save_csv:
             save_name = (
-                self.config.rev.generation[f"{self.tech}_DIR"]
-                / f"lkup_rev_gid_to_summary_{self.tech}_{self.year}.csv"
+                self.config.rev.generation[f"{self.tech.value}_DIR"]
+                / f"lkup_rev_gid_to_summary_{self.tech.value}_{self.year}.csv"
             )
             summary_df.to_csv(save_name, index=False)
         return summary_df
     def find_rev_summary_table(self):
-        if self.tech == "solar":
+        """Creates the generation summary data for each of the :py:attr:`tech`-specific
+        configurations specified in :py:attr:`config.rev.settings.{tech}`, then maps it to the
+        agent data (:py:attr:`parcels`), overwriting any previously computed data.
+        """
+        if self.tech is Technology.SOLAR:
             configs = self.config.rev.settings.solar
             config_col = "solar_az_tilt"
-            col_list = ["gid", f"rev_gid_{self.tech}", config_col]
-            self.df[config_col] = self.df[f"azimuth_{self.application}"].map(
+            col_list = ["gid", f"rev_gid_{self.tech.value}", config_col]
+            self.df[config_col] = self.df[f"azimuth_{self.sector.value}"].map(
                 self.config.rev.settings.azimuth_direction_to_degree
             )
             self.df[config_col] = (
-                self.df[config_col].astype(str) + "_" + self.df[f"tilt_{self.tech}"].astype(str)
+                self.df[config_col].astype(str)
+                + "_"
+                + self.df[f"tilt_{self.tech.value}"].astype(str)
             )
-        elif self.tech == "wind":
+        elif self.tech is Technology.WIND:
             configs = self.config.rev.settings.wind
             config_col = "turbine_class"
             col_list = [
                 "gid",
-                f"rev_gid_{self.tech}",
+                f"rev_gid_{self.tech.value}",
                 config_col,
                 "turbine_height_m",
                 "wind_turbine_kw",
             ]
             self.df[config_col] = self.df["wind_turbine_kw"].map(self.config.rev.turbine_class_dict)
-        out_cols = [*col_list, f"rev_index_{self.tech}", f"{self.tech}_naep", f"{self.tech}_cf"]
-        drop_cols = [f"rev_gid_{self.tech}", f"{self.tech}_naep", f"{self.tech}_cf"]
+        out_cols = [
+            *col_list,
+            f"rev_index_{self.tech.value}",
+            f"{self.tech.value}_naep",
+            f"{self.tech.value}_cf",
+        ]
+        drop_cols = [
+            f"rev_gid_{self.tech.value}",
+            f"{self.tech.value}_naep",
+            f"{self.tech.value}_cf",
+        ]
         self.df = self.df.drop(columns=[c for c in drop_cols if c in self.df])
         f_gen = (
-            self.config.rev.generation[f"{self.tech}_DIR"]
-            / f"lkup_rev_gid_to_summary_{self.tech}_{self.year}.csv"
+            self.config.rev.generation[f"{self.tech.value}_DIR"]
+            / f"lkup_rev_gid_to_summary_{self.tech.value}_{self.year}.csv"
         )
         if f_gen.exists():
-            generation_summary = pd.read_csv(f_gen)
+            generation_summary = pd.read_csv(f_gen, dtype_backend="pyarrow")
         else:
             generation_summary = self.create_rev_gid_to_summary_lkup(configs)
         generation_summary = (
             generation_summary.reset_index(drop=True)
-            .drop_duplicates(subset=[f"rev_index_{self.tech}", "config"])
+            .drop_duplicates(subset=[f"rev_index_{self.tech.value}", "config"])
             .rename(columns={"config": config_col})
         )
         agents = self.df.merge(
-            generation_summary, how="left", on=[f"rev_index_{self.tech}", config_col]
+            generation_summary, how="left", on=[f"rev_index_{self.tech.value}", config_col]
         )
         return agents[out_cols]
     def prepare_agents_for_gen(self):
-        # create lookup column based on each tech
-        if self.tech == "wind":
+        """Create lookup column based on each technology."""
+        if self.tech is Technology.WIND:
             # drop wind turbine size duplicates
             # SINCE WE ASSUME ANY TURBINE IN A GIVEN CLASS HAS THE SAME POWER CURVE
             self.df.drop_duplicates(subset=["gid", "wind_size_kw"], keep="last", inplace=True)
-            # if running FOM application, only consider a single (largest) turbine size
-            if self.application == "fom":
+            # if running FOM sector, only consider a single (largest) turbine size
+            if self.sector is Sector.FOM:
                 self.df = self.df.loc[self.df["wind_size_kw"] == self.df["wind_size_kw_fom"]]
             self.df["turbine_class"] = self.df["wind_turbine_kw"].map(
                 self.config.rev.turbine_class_dict
             )
-        if self.tech == "solar":
-            # NOTE: tilt and azimuth are application-specific
-            self.df["solar_az_tilt"] = self.df[f"azimuth_{self.application}"].map(
+        if self.tech is Technology.SOLAR:
+            # NOTE: tilt and azimuth are sector-specific
+            self.df["solar_az_tilt"] = self.df[f"azimuth_{self.sector.value}"].map(
                 self.config.rev.settings.azimuth_direction_to_degree
             )
             self.df["solar_az_tilt"] = self.df["solar_az_tilt"].astype(str)
             self.df["solar_az_tilt"] = (
-                self.df["solar_az_tilt"] + "_" + self.df[f"tilt_{self.application}"].astype(str)
+                self.df["solar_az_tilt"] + "_" + self.df[f"tilt_{self.sector.value}"].astype(str)
             )
-    def merge_gen_to_agents(self, tech_agents):
-        if self.tech == "wind":
+    def merge_gen_to_agents(self, tech_agents: pd.DataFrame):
+        """Merges :py:attr:`tech_agents` to the parcel data :py:attr:`df`.
+        Args:
+            tech_agents (pd.DataFrame): The technology-specific energy generation data.
+        """
+        if self.tech is Technology.WIND:
             cols = ["turbine_height_m", "wind_turbine_kw", "turbine_class"]
         else:
             # NOTE: need to drop duplicates in solar agents
@@ -133,16 +205,23 @@ class ResourcePotential:
             )
             cols = ["solar_az_tilt"]
-        cols.extend(["gid", f"rev_index_{self.tech}"])
+        cols.extend(["gid", f"rev_index_{self.tech.value}"])
         self.df = self.df.merge(tech_agents, how="left", on=cols)
     def match_rev_summary_to_agents(self):
+        """Runs the energy generation gathering and merging steps, and retursns back the updated
+        :py:attr:`df` agent/parcel data.
+        Returns:
+            pd.DataFrame: Updated agent/parcel data with rec/alculated "wind_aep" or "solar_aep"
+                information for each agent.
+        """
         self.prepare_agents_for_gen()
         tech_agents = self.find_rev_summary_table()
         self.merge_gen_to_agents(tech_agents)
-        if self.tech == "wind":
+        if self.tech is Technology.WIND:
             # fill nan generation values
             self.df = self.df.loc[
                 ~((self.df["wind_naep"].isnull()) & (self.df["turbine_class"] != "none"))
@@ -153,7 +232,7 @@ class ResourcePotential:
             # calculate annual energy production (aep)
             self.df["wind_aep"] = self.df["wind_naep"] * self.df["wind_turbine_kw"]
             # self.df = self.df.drop(columns="turbine_class")
-        else:
+        elif self.tech is Technology.SOLAR:
             # fill nan generation values
             self.df = self.df.loc[~(self.df["solar_naep"].isnull())]
             # size groundmount system to equal wind aep

dwind/scenarios.py CHANGED Viewed

@@ -1,48 +1,80 @@
+"""Provides the scenario-specific mapping for varying financial and model configuration data."""
 import json
 from pathlib import Path
 import pandas as pd
+from dwind.config import Year, Scenario
-def config_nem(scenario, year):
-    # NEM_SCENARIO_CSV
-    nem_opt_scens = ["highrecost", "lowrecost", "re100"]
-    # nem_opt_scens = ['der_value_HighREcost', 'der_value_LowREcost', 're_100']
-    if scenario in nem_opt_scens:
-        nem_scenario_csv = "nem_optimistic_der_value_2035.csv"
-    elif scenario == "baseline" and year in (2022, 2025, 2035):
-        nem_scenario_csv = f"nem_baseline_{year}.csv"
-    else:
-        nem_scenario_csv = "nem_baseline_2035.csv"
-    return nem_scenario_csv
+def config_nem(scenario: Scenario, year: Year) -> str:
+    """Provides NEM configuration based on :py:attr:`scenario` and :py:attr:`year`.
+    Args:
+        scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
+        year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
-def config_cambium(scenario):
-    # CAMBIUM_SCENARIO
-    if scenario == "highrecost" or scenario == "re100":
-        cambium_scenario = "StdScen20_HighRECost"
-    elif scenario == "lowrecost":
-        cambium_scenario = "StdScen20_LowRECost"
-    else:
-        # cambium_scenario = "StdScen20_MidCase"
-        cambium_scenario = "Cambium23_MidCase"
+    Returns:
+        str: Name of the NEM scenario file to use.
+    """
+    if scenario in (Scenario.HIGHRECOST, Scenario.LOWRECOST, Scenario.RE100):
+        return "nem_optimistic_der_value_2035.csv"
+    if scenario is Scenario.BASELINE and year in (Year._2022, Year._2025, Year._2035):
+        return f"nem_baseline_{year.value}.csv"
+    return "nem_baseline_2035.csv"
+def config_cambium(scenario: Scenario) -> str:
+    """Loads the cambium configuration name based on :py:attr:`scenario`.
+    Args:
+        scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
-    return cambium_scenario
+    Returns:
+        str: Name of the Cambium scenario to use.
+    """
+    if scenario in (Scenario.HIGHRECOST, Scenario.RE100):
+        return "StdScen20_HighRECost"
+    if scenario is Scenario.LOWRECOST:
+        return "StdScen20_LowRECost"
-def config_costs(scenario, year):
-    # COST_INPUTS
-    f = Path(f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario}_{year}.json").resolve()
+    return "Cambium23_MidCase"
+def config_costs(scenario: Scenario, year: Year) -> dict:
+    """Loads the cost configuration based on the ATB analysis.
+    Args:
+        scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
+        year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
+    Returns:
+        dict: Dictionary of ATB assumptions to be used for PySAM's cost inputs.
+    """
+    f = Path(
+        f"/projects/dwind/configs/costs/atb24/ATB24_costs_{scenario.value}_{year.value}.json"
+    ).resolve()
     with f.open("r") as f_in:
         cost_inputs = json.load(f_in)
     return cost_inputs
-def config_performance(scenario, year):
-    # PERFORMANCE_INPUTS
-    if scenario == "baseline" and year == 2022:
+def config_performance(scenario: Scenario, year: Year) -> pd.DataFrame:
+    """Loads the technology performance configurations.
+    Args:
+        scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
+        year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
+    Returns:
+        pd.DataFrame: Performance data based on the scale of each technology.
+    """
+    if scenario is Scenario.BASELINE and year is Year._2022:
         performance_inputs = {
             "solar": pd.DataFrame(
                 [
@@ -108,16 +140,21 @@ def config_performance(scenario, year):
     return performance_inputs
-def config_financial(scenario, year):
-    # FINANCIAL_INPUTS
-    scenarios = ("baseline", "metering", "billing")
-    if scenario in scenarios and year == 2025:
+def config_financial(scenario: Scenario, year: Year) -> dict:
+    """Loads the financial configuration based on the ATB analysis.
+    Args:
+        scenario (:py:class:`dwind.config.Scenario`): Valid :py:class:`dwind.config.Scenario`.
+        year (:py:class:`dwind.config.Year`): Valid :py:class:`dwind.config.Year`.
+    Returns:
+        dict: Dictionary of ATB assumptions to be used for configuration PySAM.
+    """
+    if year is Year._2025:
         f = f"/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_{year}.json"
-        i = Path("/projects/dwind/data/incentives/2025_incentives.json").resolve()
-        with i.open("r") as i_in:
-            incentives = pd.DataFrame.from_dict(json.load(i_in)).T
-        incentives.index.name = "census_tract_id"
-    elif scenario in scenarios and year in (2035, 2040):
+        i = Path("/projects/dwind/data/incentives/2025_incentives.pqt").resolve()
+        incentives = pd.read_parquet(i, dtype_backend="pyarrow")
+    elif year in (Year._2035, Year._2040):
         f = "/projects/dwind/configs/costs/atb24/ATB24_financing_baseline_2035.json"
     else:
         # use old assumptions

dwind/utils/array.py CHANGED Viewed

@@ -1,11 +1,25 @@
+"""Provides a series of generic NumPy and Pandas utility functions."""
 from __future__ import annotations
 import numpy as np
 import pandas as pd
-def memory_downcaster(df):
-    assert isinstance(df, pd.DataFrame) | isinstance(df, pd.Series)
+def memory_downcaster(df: pd.DataFrame | pd.Series) -> pd.DataFrame | pd.Series:
+    """Downcasts ``int`` and ``float`` columns to the lowest memory alternative possible. For
+    integers this means converting to either signed or unsigned 8-, 16-, 32-, or 64-bit integers,
+    and for floats, converting to ``np.float32``.
+    Args:
+        df (pd.DataFrame | pd.Series): DataFrame or Series to have its memory footprint reduced.
+    Returns:
+        pd.DataFrame | pd.Series: Reduced footprint version of the passed :py:attr:`df`.
+    """
+    # if not isinstance(df, pd.DataFrame | pd.Series):
+    if not isinstance(df, (pd.DataFrame, pd.Series)):  # noqa
+        raise TypeError("Input value must be a Pandas DataFrame or Series.")
     NAlist = []
     for col in df.select_dtypes(include=[np.number]).columns:
@@ -56,93 +70,6 @@ def memory_downcaster(df):
     return df
-def interpolate_array(row, col_1, col_2, col_in, col_out):
-    if row[col_in] != 0:
-        interpolated = row[col_in] * (row[col_2] - row[col_1]) + row[col_1]
-    else:
-        interpolated = row[col_1]
-    row[col_out] = interpolated
-    return row
-def scale_array_precision(df: pd.DataFrame, hourly_col: str, prec_offset_col: str):
-    """Scales the precision of :py:attr:`hourly_col` by the :py:attr:`prec_offset_col`.
-    Args:
-        df (pd.DataFrame): A Pandas DataFrame containing :py:att:`hourly_col` and
-            :py:att:`prec_offset_col`.
-        hourly_col (str) The column to adjust the precision.
-        prec_offset_col (str): The column for scaling the precison of :py:attr:`hourly_col`.
-    Returns:
-        pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`hourly_col` scaled.
-    """
-    df[hourly_col] = (
-        np.array(df[hourly_col].values.tolist(), dtype="float64")
-        / df[prec_offset_col].values.reshape(-1, 1)
-    ).tolist()
-    return df
-def scale_array_deprecision(df: pd.DataFrame, col: str | list[str]) -> pd.DataFrame:
-    """Rounds the column(s) :py:attr:`col` to the nearest 2nd decimal and converts to NumPy's
-    float32.
-    Args:
-        df (pd.DataFrame): A Pandas DataFrame containing :py:att:`col`.
-        col (str | list[str]): The column(s) to have reduced precision.
-    Returns:
-        pd.DataFrame: The input :py:attr:`df` with the precision of :py:attr:`col` lowered.
-    """
-    df[col] = np.round(np.round(df[col], 2).astype(np.float32), 2)
-    return df
-def scale_array_sum(df: pd.DataFrame, hourly_col: str, scale_col: str) -> pd.DataFrame:
-    """Scales the :py:attr:`hourly_col` by its sum and multiples by the :py:attr:`scale_col`.
-    Args:
-        df (pd.DataFrame): Pandas DataFrame containing the :py:attr:`hourly_col` and
-            :py:attr:`scale_col`.
-        hourly_col (str): The name of the column to be scaled whose values are lists.
-        scale_col (str): The column to scale the :py:attr:`hourly_col`.
-    Returns:
-        pandas.DataFrame: The input dataframe, but with the values of the :py:attr:`hourly_col`
-            scaled appropriately.
-    """
-    hourly_array = np.array(df[hourly_col].values.tolist())
-    df[hourly_col] = (
-        hourly_array / hourly_array.sum(axis=1).reshape(-1, 1) * df[scale_col].values.reshape(-1, 1)
-    ).tolist()
-    return df
-def scale_array_multiplier(
-    df: pd.DataFrame, hourly_col: str, multiplier_col: str, col_out: str
-) -> pd.DataFrame:
-    """Scales the :py:attr:hourly_col` values by the :py:attr:`multiplier_col`, and places it in
-    the :py:attr:`col_out`.
-    Args:
-        df (pd.DataFrame): The Pandas DataFrame containing the :py:attr:`hourly_col` and
-            :py:attr:`multiplier_col`.
-        hourly_col (str): A column of hourly values as a list of floats in each cell.
-        multiplier_col (str): The column used to scale the :py:attr:`hourly_col`.
-        col_out (str): A new column that will contain the scaled data.
-    Returns:
-        pd.DataFrame: A new copy of the original data (:py:attr:`df`) containing the
-            :py:attr:`col_out` column.
-    """
-    hourly_array = np.array(df[hourly_col].values.tolist())
-    df[col_out] = (hourly_array * df[multiplier_col].values.reshape(-1, 1)).tolist()
-    return df
 def split_by_index(
     arr: pd.DataFrame | np.ndarray | pd.Series, n_splits: int
 ) -> tuple[np.ndarray, np.ndarray]:

dwind/utils/hpc.py CHANGED Viewed

@@ -1,5 +1,14 @@
+"""Provides the live timing table functionalities for the Kestrel :py:class:`MultiProcess` class."""
+from __future__ import annotations
+import io
+import re
 import time
+import subprocess
+from copy import deepcopy
+import pandas as pd
 from rich.table import Table
 from rex.utilities.hpc import SLURM
@@ -68,7 +77,7 @@ def update_status(job_status: dict) -> dict:
     return update
-def generate_table(job_status: dict) -> tuple[Table, bool]:
+def generate_run_status_table(job_status: dict) -> tuple[Table, bool]:
     """Generate the job status run time statistics table.
     Args:
@@ -92,5 +101,38 @@ def generate_table(job_status: dict) -> tuple[Table, bool]:
         table.add_row(
             job, status, convert_seconds_for_print(_wait), convert_seconds_for_print(_run)
         )
-    done = all(el["status"] in ("CG", None) for el in job_status.values())
+    done = all(el["status"] in ("CG", "CF", None) for el in job_status.values())
     return table, done
+def get_finished_run_status(jobs: int | str | list[int | str]) -> dict[str, str]:
+    """Extracts a dictionary of job_id and status from the ``sacct`` output for a single
+    job or series of jobs.
+    Args:
+        jobs (int | str | list[int  |  str]): Single job ID or list of job IDs that have finished
+            running.
+    Returns:
+        dict[str, str]: Dictionary of {job_id_1: status_1, ..., job_id_N: status_N}.
+    """
+    if isinstance(jobs, (int, str)):  # noqa
+        jobs = [jobs]
+    jobs = [str(j) for j in jobs]
+    # Format the command to be in the form of [sacct, -j, job_id_1, ..., -j job_id_N]
+    command = deepcopy(jobs)
+    for i in range(len(command) - 1, -1, -1):
+        command.insert(i, "-j")
+    command.insert(0, "sacct")
+    results = subprocess.check_output(command)
+    # Convert the sacct string output to be table-like
+    buffer = io.StringIO(results.decode("utf8", "ignore"))
+    lines = [re.split(" +", line) for line in buffer.readlines() if not line.startswith("-")]
+    # Create a dataframe, and export a dictionary of the form job_id: job_status
+    df = pd.DataFrame(lines[1:], columns=lines[0])
+    df = df.loc[df.JobID.isin(jobs), ["JobID", "State"]]
+    df.JobID = df.JobID.astype(int)
+    return dict(df.values.tolist())

dwind/utils/loader.py ADDED Viewed

@@ -0,0 +1,63 @@
+"""Provides the core data loading methods for importing scenario data from flat files or SQL."""
+from __future__ import annotations
+from pathlib import Path
+import pandas as pd
+from sqlalchemy import create_engine
+from dwind.config import Year
+def load_df(file_or_table: str | Path, year: Year | None, sql_constructor: str | None = None):
+    """Loads data from either a SQL table or file to a pandas ``DataFrame``.
+    Args:
+        file_or_table (str | Path): File name or path object, or SQL table where the data are
+            located.
+        year (:py:class:`dwind.config.Year`, optional): If used, only extracts the single year from
+        a column called "year". Defaults to None.
+        sql_constructor (str | None, optional): The SQL engine constructor string. Required if
+            extracting from SQL. Defaults to None.
+    """
+    valid_extenstions = (".csv", ".pqt", ".parquet", ".pkl", ".pickle")
+    if str(file_or_table).endswith(valid_extenstions):
+        return _load_from_file(filename=file_or_table, year=year)
+    return _load_from_sql(table=file_or_table, sql_constructor=sql_constructor, year=year)
+def _load_from_file(filename: str | Path, year: Year | None) -> pd.DataFrame:
+    """Loads tabular data from a file to a ``pandas.DataFrame``."""
+    if isinstance(filename, str):
+        filename = Path(filename).resolve()
+    if not isinstance(filename, Path):
+        raise TypeError(f"`filename` must be a valid path, not {filename=}")
+    if filename.suffix == ".csv":
+        df = pd.read_csv(filename, dtype_backend="pyarrow")
+    elif filename.suffix in (".parquet", ".pqt"):
+        df = pd.read_parquet(filename, dtype_backend="pyarrow")
+    elif filename.suffix in (".pickle", ".pkl"):
+        df = pd.read_pickle(filename, dtype_backend="pyarrow")
+    else:
+        raise ValueError(f"Only CSV, Parquet, and Pickle files allowed, not {filename=}")
+    if year is not None:
+        df = df.loc[df.year == year]
+    return df
+def _load_from_sql(table: str, sql_constructor: str, year: Year | None) -> pd.DataFrame:
+    """Load tabular data from SQL."""
+    where = f"where year = {year}" if year is not None else ""
+    sql = f"""select * from diffusion_shared."{table}" {where};"""
+    atlas_engine = create_engine(sql_constructor)
+    with atlas_engine.connect() as conn:
+        df = pd.read_sql(sql, con=conn.connection, dtype_backend="pyarrow")
+    atlas_engine.dispose()
+    return df

dwind 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

dwind 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl