PyPI - rdtools - Versions diffs - 3.1.1__tar.gz → 3.2.0__tar.gz - Mend

rdtools 3.1.1tar.gz → 3.2.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{rdtools-3.1.1/rdtools.egg-info → rdtools-3.2.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rdtools
-Version: 3.1.1
+Version: 3.2.0
 Summary: Functions for reproducible timeseries analysis of photovoltaic systems.
 Home-page: https://github.com/NREL/rdtools
 Author: Rdtools Python Developers

{rdtools-3.1.1 → rdtools-3.2.0}/rdtools/_version.py RENAMED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2026-03-19T12:53:21-0400",
+ "date": "2026-07-01T15:51:37-0400",
  "dirty": false,
  "error": null,
- "full-revisionid": "0f8e3e739bf0ba9688b8d35fc799938b5935cfbf",
- "version": "3.1.1"
+ "full-revisionid": "5fe5e7e4fe612274c51c1fbe6dd419e8a6df5870",
+ "version": "3.2.0"
 }
 '''  # END VERSION_JSON

{rdtools-3.1.1 → rdtools-3.2.0}/rdtools/analysis_chains.py RENAMED Viewed

@@ -1053,7 +1053,7 @@ class TrendAnalysis:
             Analyses to perform as a list of strings. Valid entries are 'yoy_degradation'
             and 'srr_soiling'
         yoy_kwargs : dict
-            kwargs to pass to :py:func:`rdtools.degradation.degradation_year_on_year`
+            kwargs to pass to :py:func:`rdtools.degradation.degradation_year_on_year`.
         srr_kwargs : dict
             kwargs to pass to :py:func:`rdtools.soiling.soiling_srr`
@@ -1248,7 +1248,7 @@ class TrendAnalysis:
         ax.set_ylabel("PV Energy (Wh/timestep)")
         return fig
-    def plot_degradation_timeseries(self, case, rolling_days=365, **kwargs):
+    def plot_degradation_timeseries(self, case, rolling_days=365, center=None, **kwargs):
         """
         Plot resampled time series of degradation trend with time
@@ -1257,8 +1257,17 @@ class TrendAnalysis:
         case: str
             The workflow result to plot, allowed values are 'sensor' and 'clearsky'
         rolling_days: int, default 365
-            Number of days for rolling window. Note that the window must contain
-            at least 50% of datapoints to be included in rolling plot.
+            Number of days for rolling window. The window must contain at least
+            ``rolling_days // min_periods_divisor`` datapoints to be included in
+            the rolling plot; see
+            :py:func:`rdtools.plotting.degradation_timeseries_plot` for details
+            on ``min_periods_divisor`` and its pending default change.
+        center : bool, default False
+            If ``True``, the rolling window is centered and results are reindexed
+            using center timestamps before any calculations are performed.
+            The recommended value is ``True``; the default of ``False`` is retained
+            only for backward compatibility. A warning is raised when this argument
+            is not explicitly supplied.
         kwargs :
             Extra parameters passed to :py:func:`rdtools.plotting.degradation_timeseries_plot`
@@ -1274,7 +1283,7 @@ class TrendAnalysis:
         else:
             raise ValueError("case must be either 'sensor' or 'clearsky'")
-        fig = plotting.degradation_timeseries_plot(yoy_info, rolling_days, **kwargs)
+        fig = plotting.degradation_timeseries_plot(yoy_info, rolling_days, center=center, **kwargs)
         return fig

{rdtools-3.1.1 → rdtools-3.2.0}/rdtools/degradation.py RENAMED Viewed

@@ -179,7 +179,8 @@ def degradation_classical_decomposition(energy_normalized,
 def degradation_year_on_year(energy_normalized, recenter=True,
                              exceedance_prob=95, confidence_level=68.2,
-                             uncertainty_method='simple', block_length=30):
+                             uncertainty_method='simple', block_length=30,
+                             multi_yoy=False):
     '''
     Estimate the trend of a timeseries using the year-on-year decomposition
     approach and calculate a Monte Carlo-derived confidence interval of slope.
@@ -208,6 +209,11 @@ def degradation_year_on_year(energy_normalized, recenter=True,
         If `uncertainty_method` is 'circular_block', `block_length`
         determines the length of the blocks used in the circular block bootstrapping
         in number of days. Must be shorter than a third of the time series.
+    multi_yoy : bool, default False
+        Whether to return the standard Year-on-Year slopes where each slope
+        is calculated over points separated by 365 days (default) or
+        multi_year-on-year where points can be separated by N * 365 days
+        where N is an integer from 1 to the length of the dataset in years.
     Returns
     -------
@@ -218,14 +224,24 @@ def degradation_year_on_year(energy_normalized, recenter=True,
         degradation rate estimate
     calc_info : dict
-        * `YoY_values` - pandas series of right-labeled year on year slopes
+        * `YoY_values` - pandas series of year on year slopes with integer index.
+          When ``multi_yoy=True`` the index is non-monotonic because multiple
+          overlapping annual slopes can share the same right-endpoint position.
         * `renormalizing_factor` - float of value used to recenter data
         * `exceedance_level` - the degradation rate that was outperformed with
           probability of `exceedance_prob`
         * `usage_of_points` - number of times each point in energy_normalized
           is used to calculate a degradation slope. 0: point is never used. 1:
           point is either used as a start or endpoint. 2: point is used as both
-          start and endpoint for an Rd calculation.
+          start and endpoint for an Rd calculation. With ``multi_yoy=True``,
+          values can be larger than 2 because each point participates in
+          multiple slopes.
+        * `YoY_times` - pandas DataFrame with columns ``dt_right``, ``dt_center``,
+          and ``dt_left`` giving, for each entry in ``YoY_values``, the
+          timestamps of the right endpoint, the midpoint, and the left endpoint
+          of the slope. This can be used to recover the original timestamp-
+          indexed behavior of ``YoY_values`` (for example,
+          ``calc_info['YoY_values'].set_axis(calc_info['YoY_times']['dt_right'])``).
     '''
     # Ensure the data is in order
@@ -269,37 +285,72 @@ def degradation_year_on_year(energy_normalized, recenter=True,
     energy_normalized = energy_normalized.reset_index()
     energy_normalized['energy'] = energy_normalized['energy'] / renorm
-    energy_normalized['dt_shifted'] = energy_normalized.dt + pd.DateOffset(years=1)
-    # Merge with what happened one year ago, use tolerance of 8 days to allow
-    # for weekly aggregated data
-    df = pd.merge_asof(energy_normalized[['dt', 'energy']],
-                       energy_normalized.sort_values('dt_shifted'),
-                       left_on='dt', right_on='dt_shifted',
-                       suffixes=['', '_right'],
-                       tolerance=pd.Timedelta('8D')
-                       )
-    df['time_diff_years'] = (df.dt - df.dt_right) / pd.Timedelta('365D')
-    df['yoy'] = 100.0 * (df.energy - df.energy_right) / (df.time_diff_years)
-    df.index = df.dt
+    # dataframe container for combined year-over-year changes
+    df = pd.DataFrame()
+    if multi_yoy:
+        year_range = range(1, int((energy_normalized.iloc[-1]['dt'] -
+                                   energy_normalized.iloc[0]['dt']).days/365)+1)
+    else:
+        year_range = [1]
+    for y in year_range:
+        energy_normalized['dt_shifted'] = energy_normalized.dt + pd.DateOffset(years=y)
+        # Merge with what happened one year ago, use tolerance of 8 days to allow
+        # for weekly aggregated data
+        df_temp = pd.merge_asof(energy_normalized[['dt', 'energy']],
+                                energy_normalized.sort_values('dt_shifted'),
+                                left_on='dt', right_on='dt_shifted',
+                                suffixes=['', '_left'],
+                                tolerance=pd.Timedelta('8D')
+                                )
+        df = pd.concat([df, df_temp], ignore_index=True)
+    df['time_diff_years'] = (df.dt - df.dt_left) / pd.Timedelta('365D')
+    df['yoy'] = 100.0 * (df.energy - df.energy_left) / (df.time_diff_years)
     yoy_result = df.yoy.dropna()
-    df_right = df.set_index(df.dt_right).drop_duplicates('dt_right')
-    df['usage_of_points'] = df.yoy.notnull().astype(int).add(
-                df_right.yoy.notnull().astype(int), fill_value=0)
     if not len(yoy_result):
         raise ValueError('no year-over-year aggregated data pairs found')
     Rd_pct = yoy_result.median()
+    YoY_times = df.dropna(subset=['yoy'], inplace=False).copy()
+    # calculate usage of points.
+    df_left = YoY_times.set_index(YoY_times.dt_left)  # .drop_duplicates('dt_left')
+    df_right = YoY_times.set_index(YoY_times.dt)  # .drop_duplicates('dt')
+    usage_of_points = df_right.yoy.notnull().astype(int).add(
+                df_left.yoy.notnull().astype(int),
+                fill_value=0).groupby(level=0).sum()
+    usage_of_points.name = 'usage_of_points'
+    pandas_version = pd.__version__.split(".")
+    if int(pandas_version[0]) < 2:
+        # For old Pandas versions < 2.0.0, time columns cannot be averaged
+        # with each other, so we use a custom function to calculate center label
+        YoY_times['dt_center'] = _avg_timestamp_old_Pandas(YoY_times['dt'], YoY_times['dt_left'])
+    else:
+        YoY_times['dt_center'] = pd.to_datetime(YoY_times[['dt', 'dt_left']].mean(axis=1))
+    YoY_times = YoY_times[['dt', 'dt_center', 'dt_left']]
+    YoY_times = YoY_times.rename(columns={'dt': 'dt_right'})
+    # apply integer index to the yoy_result; multi-YoY has duplicate timestamps.
+    yoy_result.index = YoY_times.index
+    yoy_result.index.name = 'dt'
+    # the following is throwing a futurewarning if infer_objects() isn't included here.
+    # see https://github.com/pandas-dev/pandas/issues/57734
+    energy_normalized = energy_normalized.merge(usage_of_points, how='left', left_on='dt',
+                                                right_index=True, left_index=False
+                                                ).infer_objects().fillna(0.0)
     if uncertainty_method == 'simple':  # If we need the full results
         calc_info = {
             'YoY_values': yoy_result,
             'renormalizing_factor': renorm,
-            'usage_of_points': df['usage_of_points']
+            'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'],
+            'YoY_times': YoY_times[['dt_right', 'dt_center', 'dt_left']]
         }
         # bootstrap to determine 68% CI and exceedance probability
@@ -345,17 +396,79 @@ def degradation_year_on_year(energy_normalized, recenter=True,
         # Save calculation information
         calc_info = {
+            'YoY_values': yoy_result,
             'renormalizing_factor': renorm,
             'exceedance_level': exceedance_level,
-            'usage_of_points': df['usage_of_points'],
+            'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'],
+            'YoY_times': YoY_times[['dt_right', 'dt_center', 'dt_left']],
             'bootstrap_rates': bootstrap_rates}
         return (Rd_pct, Rd_CI, calc_info)
     else:  # If we do not need confidence intervals and exceedance level
+        # TODO: Consider returning a tuple for consistency with other branches, e.g.:
+        # return (Rd_pct, None, {
+        #     'YoY_values': yoy_result,
+        #     'usage_of_points': energy_normalized.set_index('dt')['usage_of_points'],
+        #     'YoY_times': YoY_times[['dt_right', 'dt_center', 'dt_left']]}
+        # )
+        # Note: Current behavior intentionally returns only the scalar Rd_pct
+        # to maintain compatibility (see test_bootstrap_module).
         return Rd_pct
+def _avg_timestamp_old_Pandas(dt, dt_left):
+    '''
+    For old Pandas versions < 2.0.0, time columns cannot be averaged
+    together.  From https://stackoverflow.com/questions/57812300/
+    python-pandas-to-calculate-mean-of-datetime-of-multiple-columns
+    Parameters
+    ----------
+    dt : pandas.Series
+        First series with datetime values
+    dt_left : pandas.Series
+        Second series with datetime values.
+    Returns
+    -------
+    pandas.Series
+        Series with the average timestamp of df1 and df2.
+    '''
+    import calendar
+    # Remove timezone from datetime values for averaging
+    temp_df = pd.DataFrame(
+        {"dt": dt.dt.tz_localize(None), "dt_left": dt_left.dt.tz_localize(None)}
+    )
+    # conversion from dates to seconds since epoch (unix time)
+    def to_unix(s):
+        if isinstance(s, pd.Timestamp):
+            return calendar.timegm(s.timetuple())
+        else:
+            return pd.NaT
+    # sum the seconds since epoch, calculate average, and convert back to readable date
+    averages = []
+    for index, row in temp_df.iterrows():
+        unix = [to_unix(i) for i in row]
+        # unix = [pd.Timestamp(i).timestamp() for i in row]
+        try:
+            average = sum(unix) / len(unix)
+            # averages.append(datetime.datetime.utcfromtimestamp(average).strftime('%Y-%m-%d'))
+            averages.append(pd.to_datetime(average, unit='s'))
+        except TypeError:
+            averages.append(pd.NaT)
+    temp_df['averages'] = averages
+    dt_center = temp_df["averages"].dt.tz_localize(dt.dt.tz)
+    dt_center.index = dt.index
+    dt_center.name = "averages"
+    return dt_center
 def _mk_test(x, alpha=0.05):
     '''
     Mann-Kendall test of significance for trend (used in classical

{rdtools-3.1.1 → rdtools-3.2.0}/rdtools/plotting.py RENAMED Viewed

@@ -54,8 +54,8 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield,
         Include extra information in the returned figure:
         * Color code points by the number of times they get used in calculating
-          Rd slopes.  Default color: 2 times (as a start and endpoint). Green:
-          1 time. Red: 0 times.
+          Rd slopes.  Default color: even times (as a start and endpoint). Green:
+          odd times. Red: 0 times.
         * The number of year-on-year slopes contributing to the histogram.
     Note
@@ -109,7 +109,11 @@ def degradation_summary_plots(yoy_rd, yoy_ci, yoy_info, normalized_yield,
     renormalized_yield = normalized_yield / yoy_info['renormalizing_factor']
     if detailed:
-        colors = yoy_info['usage_of_points'].map({0: 'red', 1: 'green', 2: plot_color})
+        # Color by usage parity: 0 -> red, odd -> green, even/non-zero or NaN -> plot_color
+        usage = yoy_info['usage_of_points']
+        colors = pd.Series(plot_color, index=usage.index)
+        colors[usage == 0] = 'red'
+        colors[usage % 2 == 1] = 'green'
     else:
         colors = plot_color
     ax1.scatter(
@@ -432,7 +436,8 @@ def availability_summary_plots(power_system, power_subsystem, loss_total,
 def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
-                                fig=None, plot_color=None, ci_color=None, **kwargs):
+                                fig=None, plot_color=None, ci_color=None,
+                                center=None, min_periods_divisor=None, **kwargs):
     '''
     Plot resampled time series of degradation trend with time
@@ -441,10 +446,14 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
     yoy_info : dict
         a dictionary with keys:
-        * YoY_values - pandas series of right-labeled year on year slopes
+        * YoY_values - pandas series of year on year slopes with integer index.
+        * YoY_times - pandas DataFrame containing a ``dt_left``, ``dt_center``
+           and ``dt_right`` timestamp columns, indexed by the same integer window
+           id as ``YoY_values``.
     rolling_days: int, default 365
-        Number of days for rolling window. Note that the window must contain
-        at least 50% of datapoints to be included in rolling plot.
+        Number of days for rolling window. The window must contain at least
+        ``rolling_days // min_periods_divisor`` datapoints to be included in
+        the rolling plot.
     include_ci : bool, default True
         calculate and plot 2-sigma confidence intervals along with rolling median
     fig     : matplotlib, optional
@@ -453,6 +462,21 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
         color of the timeseries trendline
     ci_color : str, optional
         color of the confidence interval 'fuzz'
+    center : bool, default False
+        If ``True``, the rolling window is centered and ``results_values`` is
+        reindexed using ``yoy_info['YoY_times']['dt_center']`` before any calculations are
+        performed.  The recommended value is ``True``; the default of ``False``
+        is retained only for backward compatibility.  A warning is raised when
+        this argument is not explicitly supplied.
+    min_periods_divisor : int, optional
+        Divisor applied to ``rolling_days`` to set the minimum number of
+        observations required in a window. Smaller values (e.g. 2) require
+        the window to be more populated; larger values (e.g. 4) make the
+        plot more resilient to small data outages without losing fidelity.
+        Defaults to 2 in this release to match the behavior in rdtools
+        prior to the multi-YoY changes. A ``FutureWarning`` is emitted when
+        the default is used; the default will change to 4 in a future major
+        release. Pass an explicit value to silence the warning.
     kwargs :
         Extra parameters passed to matplotlib.pyplot.axis.plot()
@@ -466,6 +490,27 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
     matplotlib.figure.Figure
     '''
+    if center is None:
+        warnings.warn(
+            "The default value of 'center' will remain False for backward "
+            "compatibility, but center=True is recommended. Pass "
+            "center=True to silence this warning.",
+            UserWarning,
+            stacklevel=2,
+        )
+        center = False
+    if min_periods_divisor is None:
+        warnings.warn(
+            "The default `min_periods_divisor=2` will change to 4 in a future "
+            "major release of rdtools, which makes the rolling plot more "
+            "resilient to small data outages. Pass `min_periods_divisor` "
+            "explicitly to silence this warning.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        min_periods_divisor = 2
     def _bootstrap(x, percentile, reps):
         # stolen from degradation_year_on_year
         n1 = len(x)
@@ -474,29 +519,52 @@ def degradation_timeseries_plot(yoy_info, rolling_days=365, include_ci=True,
         return np.percentile(mb1, percentile)
     try:
-        results_values = yoy_info['YoY_values']
+        results_values = yoy_info['YoY_values'].copy()
     except KeyError:
         raise KeyError("yoy_info input dictionary does not contain key `YoY_values`.")
+    # filter to only 2 years + 1 day length slopes to avoid over-smoothing in the multi-yoy case
+    # (applied before index reassignment while integer index still aligns with YoY_times)
+    yoy_durations = yoy_info['YoY_times']['dt_right'] - yoy_info['YoY_times']['dt_left']
+    results_values = results_values[
+        results_values.index.map(yoy_durations) <= pd.Timedelta(days=365 * 2 + 1)
+    ]
+    if center:
+        try:
+            results_values.index = results_values.index.map(yoy_info['YoY_times']['dt_center'])
+        except KeyError:
+            raise KeyError("yoy_info input dict doesn't contain key `YoY_times['dt_center']`, "
+                           "which is required when center=True.")
+    else:
+        results_values.index = results_values.index.map(yoy_info['YoY_times']['dt_right'])
+    results_values = results_values.sort_index()
     if plot_color is None:
         plot_color = 'tab:orange'
     if ci_color is None:
         ci_color = 'C0'
-    roller = results_values.rolling(f'{rolling_days}d', min_periods=rolling_days//2)
-    # unfortunately it seems that you can't return multiple values in the rolling.apply() kernel.
-    # TODO: figure out some workaround to return both percentiles in a single pass
+    roller = results_values.rolling(f'{rolling_days}D',
+                                    min_periods=rolling_days // min_periods_divisor,
+                                    center=center)
     if include_ci:
         ci_lower = roller.apply(_bootstrap, kwargs={'percentile': 2.5, 'reps': 100}, raw=True)
         ci_upper = roller.apply(_bootstrap, kwargs={'percentile': 97.5, 'reps': 100}, raw=True)
+        ci_lower = ci_lower[~ci_lower.index.duplicated(keep='last')]
+        ci_upper = ci_upper[~ci_upper.index.duplicated(keep='last')]
+    rolling_median = roller.median()
+    rolling_median = rolling_median[~rolling_median.index.duplicated(keep='last')]
     if fig is None:
         fig, ax = plt.subplots()
     else:
         ax = fig.axes[0]
     if include_ci:
         ax.fill_between(ci_lower.index, ci_lower, ci_upper, color=ci_color)
-    ax.plot(roller.median(), color=plot_color, **kwargs)
+    ax.plot(rolling_median, color=plot_color, **kwargs)
     ax.axhline(results_values.median(), c='k', ls='--')
     plt.ylabel('Degradation trend (%/yr)')
     fig.autofmt_xdate()

{rdtools-3.1.1 → rdtools-3.2.0/rdtools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rdtools
-Version: 3.1.1
+Version: 3.2.0
 Summary: Functions for reproducible timeseries analysis of photovoltaic systems.
 Home-page: https://github.com/NREL/rdtools
 Author: Rdtools Python Developers