PyPI - rdtools - Versions diffs - 2.2.0b2__tar.gz → 3.0.0a4__tar.gz - Mend

rdtools 2.2.0b2tar.gz → 3.0.0a4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{rdtools-2.2.0b2/rdtools.egg-info → rdtools-3.0.0a4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: rdtools
-Version: 2.2.0b2
+Version: 3.0.0a4
 Summary: Functions for reproducible timeseries analysis of photovoltaic systems.
 Home-page: https://github.com/NREL/rdtools
 Author: Rdtools Python Developers

{rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/_version.py RENAMED Viewed

@@ -8,11 +8,11 @@ import json
 version_json = '''
 {
- "date": "2023-12-01T15:24:38-0700",
+ "date": "2023-12-01T15:34:09-0700",
  "dirty": false,
  "error": null,
- "full-revisionid": "250e412bda8199491d8dc45673752374913b9c65",
- "version": "2.2.0-beta.2"
+ "full-revisionid": "2bd60f469d51e39b0a0b23f7a765bc093bd31823",
+ "version": "3.0.0-alpha.4"
 }
 '''  # END VERSION_JSON

{rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/aggregation.py RENAMED Viewed

@@ -22,7 +22,7 @@ def aggregation_insol(energy_normalized, insolation, frequency='D'):
     aggregated : pandas.Series
         Insolation weighted average, aggregated at frequency
     '''
-    aggregated = (insolation * energy_normalized).resample(frequency).sum() / \
-        insolation.resample(frequency).sum()
+    aggregated = (insolation * energy_normalized).resample(frequency, origin='start_day').sum() / \
+        insolation.resample(frequency, origin='start_day').sum()
     return aggregated

{rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/analysis_chains.py RENAMED Viewed

@@ -138,7 +138,7 @@ class TrendAnalysis():
             'poa_filter': {},
             'tcell_filter': {},
             'clip_filter': {},
-            'csi_filter': {},
+            'pvlib_clearsky_filter': {},
             'ad_hoc_filter': None  # use this to include an explict filter
         }
         self.filter_params_aggregated = {
@@ -236,10 +236,10 @@ class TrendAnalysis():
                                   freq='1min')
             aggregate = True
-        if self.pvlib_location is None:
+        if not hasattr(self, 'pvlib_location'):
             raise ValueError(
                 'pvlib location must be provided using set_clearsky()')
-        if self.pv_tilt is None or self.pv_azimuth is None:
+        if not hasattr(self, 'pv_tilt') or not hasattr(self, 'pv_azimuth'):
             raise ValueError(
                 'pv_tilt and pv_azimuth must be provided using set_clearsky()')
@@ -332,9 +332,9 @@ class TrendAnalysis():
         Calculate clear-sky ambient temperature and store in self.temperature_ambient_clearsky
         '''
         times = self.poa_global_clearsky.index
-        if self.pvlib_location is None:
+        if not hasattr(self, 'pvlib_location'):
             raise ValueError(
-                'pvlib location must be provided using set_clearsky()')
+                'pvlib_location must be provided using set_clearsky()')
         loc = self.pvlib_location
         cs_amb_temp = clearsky_temperature.get_clearsky_tamb(
@@ -411,6 +411,18 @@ class TrendAnalysis():
         -------
         None
         '''
+        # Clearsky filtering subroutine, called either by clearsky analysis,
+        # or sensor analysis using sensor_clearsky_filter
+        def _call_clearsky_filter(filter_string):
+            if self.poa_global is None or self.poa_global_clearsky is None:
+                raise ValueError('Both poa_global and poa_global_clearsky must be available to '
+                                 f'do clearsky filtering with {filter_string}')
+            f = filtering.pvlib_clearsky_filter(
+                self.poa_global, self.poa_global_clearsky, **self.filter_params[filter_string])
+            return f
         # Combining filters is non-trivial because of the possibility of index
         # mismatch.  Adding columns to an existing dataframe performs a left index
         # join, but probably we actually want an outer join.  We can get an outer
@@ -452,13 +464,22 @@ class TrendAnalysis():
             f = filtering.clip_filter(
                 self.pv_power, **self.filter_params['clip_filter'])
             filter_components['clip_filter'] = f
+        if 'hour_angle_filter' in self.filter_params:
+            if not hasattr(self, 'pvlib_location'):
+                raise ValueError(
+                    'The pvlib location must be provided using set_clearsky() '
+                    'or by directly setting TrendAnalysis.pvlib_location '
+                    'in order to use the hour_angle_filter')
+            loc = self.pvlib_location
+            f = filtering.hour_angle_filter(
+                energy_normalized, loc.latitude, loc.longitude,
+                 **self.filter_params['hour_angle_filter'])
+            filter_components['hour_angle_filter'] = f
         if case == 'clearsky':
-            if self.poa_global is None or self.poa_global_clearsky is None:
-                raise ValueError('Both poa_global and poa_global_clearsky must be available to '
-                                 'do clearsky filtering with csi_filter')
-            f = filtering.csi_filter(
-                self.poa_global, self.poa_global_clearsky, **self.filter_params['csi_filter'])
-            filter_components['csi_filter'] = f
+            filter_components['pvlib_clearsky_filter'] = _call_clearsky_filter('pvlib_clearsky_filter')
+        if 'sensor_pvlib_clearsky_filter' in self.filter_params:
+            filter_components['sensor_pvlib_clearsky_filter'] = _call_clearsky_filter('sensor_pvlib_clearsky_filter')
         # note: the previous implementation using the & operator treated NaN
         # filter values as False, so we do the same here for consistency:
@@ -515,7 +536,33 @@ class TrendAnalysis():
         """
         filter_components_aggregated = {'default':
                                         pd.Series(True, index=aggregated.index)}
+        if case == 'sensor':
+            insol = self.sensor_aggregated_insolation
+        if case == 'clearsky':
+            insol = self.clearsky_aggregated_insolation
         # Add daily aggregate filters as they come online here.
+        if 'two_way_window_filter' in self.filter_params_aggregated:
+            f = filtering.two_way_window_filter(
+                aggregated, **self.filter_params_aggregated['two_way_window_filter'])
+            filter_components_aggregated['two_way_window_filter'] = f
+        if 'insolation_filter' in self.filter_params_aggregated:
+            f = filtering.insolation_filter(
+                insol, **self.filter_params_aggregated['insolation_filter'])
+            filter_components_aggregated['insolation_filter'] = f
+        if 'hampel_filter' in self.filter_params_aggregated:
+            hampelmask = filtering.hampel_filter(aggregated,
+                                                 **self.filter_params_aggregated['hampel_filter'])
+            filter_components_aggregated['hampel_filter'] = hampelmask
+        if 'directional_tukey_filter' in self.filter_params_aggregated:
+            f = filtering.directional_tukey_filter(aggregated,
+                                                 **self.filter_params_aggregated['directional_tukey_filter'])
+            filter_components_aggregated['directional_tukey_filter'] = f
         # Convert the dictionary into a dataframe (after running filters)
         filter_components_aggregated = pd.DataFrame(
             filter_components_aggregated).fillna(False)
@@ -587,7 +634,7 @@ class TrendAnalysis():
         aggregated = aggregation.aggregation_insol(
             energy_normalized, insolation, self.aggregation_freq)
         aggregated_insolation = insolation.resample(
-            self.aggregation_freq).sum()
+            self.aggregation_freq, origin='start_day').sum()
         return aggregated, aggregated_insolation
@@ -677,7 +724,14 @@ class TrendAnalysis():
         if self.poa_global is None:
             raise ValueError(
                 'poa_global must be available to perform _sensor_preprocess')
+        if 'sensor_pvlib_clearsky_filter' in self.filter_params:
+            try:
+                if self.poa_global_clearsky is None:
+                    self._calc_clearsky_poa(model='isotropic')
+            except AttributeError:
+                raise AttributeError("No poa_global_clearsky. 'set_clearsky' must be run " +
+                                     "to allow filter_params['sensor_pvlib_clearsky_filter']. ")
         if self.power_expected is None:
             # Thermal details required if power_expected is not manually set.
             if self.temperature_cell is None and self.temperature_ambient is None:
@@ -694,16 +748,20 @@ class TrendAnalysis():
         self._filter(energy_normalized, 'sensor')
         aggregated, aggregated_insolation = self._aggregate(
             energy_normalized[self.sensor_filter], insolation[self.sensor_filter])
         # Run daily filters on aggregated data
+        self.sensor_aggregated_insolation = aggregated_insolation
         self._aggregated_filter(aggregated, 'sensor')
         # Apply filter to aggregated data and store
         self.sensor_aggregated_performance = aggregated[self.sensor_filter_aggregated]
         self.sensor_aggregated_insolation = aggregated_insolation[self.sensor_filter_aggregated]
         # Reindex the data after the fact, so it's on the aggregated interval
-        self.sensor_aggregated_performance = self.sensor_aggregated_performance.asfreq(
-            self.aggregation_freq)
-        self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.asfreq(
-            self.aggregation_freq)
+        self.sensor_aggregated_performance = self.sensor_aggregated_performance.resample(
+            self.aggregation_freq, origin='start_day').asfreq()
+        self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.resample(
+            self.aggregation_freq, origin='start_day').asfreq()
     def _clearsky_preprocess(self):
         '''
@@ -732,17 +790,21 @@ class TrendAnalysis():
         self._filter(cs_normalized, 'clearsky')
         cs_aggregated, cs_aggregated_insolation = self._aggregate(
             cs_normalized[self.clearsky_filter], cs_insolation[self.clearsky_filter])
         # Run daily filters on aggregated data
+        self.clearsky_aggregated_insolation = cs_aggregated_insolation
         self._aggregated_filter(cs_aggregated, 'clearsky')
         # Apply daily filter to aggregated data and store
         self.clearsky_aggregated_performance = cs_aggregated[self.clearsky_filter_aggregated]
         self.clearsky_aggregated_insolation = \
             cs_aggregated_insolation[self.clearsky_filter_aggregated]
         # Reindex the data after the fact, so it's on the aggregated interval
-        self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.asfreq(
-            self.aggregation_freq)
-        self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.asfreq(
-            self.aggregation_freq)
+        self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.resample(
+            self.aggregation_freq, origin='start_day').asfreq()
+        self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.resample(
+            self.aggregation_freq, origin='start_day').asfreq()
     def sensor_analysis(self, analyses=['yoy_degradation'], yoy_kwargs={}, srr_kwargs={}):
         '''

{rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/degradation.py RENAMED Viewed

@@ -231,10 +231,11 @@ def degradation_year_on_year(energy_normalized, recenter=True,
     energy_normalized.index.name = 'dt'
     # Detect sub-daily data:
-    if min(np.diff(energy_normalized.index.values, n=1)) < \
-            np.timedelta64(23, 'h'):
-        raise ValueError('energy_normalized must not be '
-                         'more frequent than daily')
+    # disabling this check while we experiment with morning/evening agregation
+    # if min(np.diff(energy_normalized.index.values, n=1)) < \
+    #         np.timedelta64(23, 'h'):
+    #     raise ValueError('energy_normalized must not be '
+    #                      'more frequent than daily')
     # Detect less than 2 years of data. This is complicated by two things:
     #   - leap days muddle the precise meaning of "two years of data".
@@ -276,7 +277,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
     # Merge with what happened one year ago, use tolerance of 8 days to allow
     # for weekly aggregated data
-    df = pd.merge_asof(energy_normalized[['dt', 'energy']], energy_normalized,
+    df = pd.merge_asof(energy_normalized[['dt', 'energy']],
+                       energy_normalized.sort_values('dt_shifted'),
                        left_on='dt', right_on='dt_shifted',
                        suffixes=['', '_right'],
                        tolerance=pd.Timedelta('8D')

{rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/filtering.py RENAMED Viewed

@@ -4,7 +4,9 @@ import numpy as np
 import pandas as pd
 import os
 import warnings
+import pvlib
 from numbers import Number
+from scipy.interpolate import interp1d
 import rdtools
 import xgboost as xgb
@@ -122,6 +124,107 @@ def csi_filter(poa_global_measured, poa_global_clearsky, threshold=0.15):
     return (csi >= 1.0 - threshold) & (csi <= 1.0 + threshold)
+def pvlib_clearsky_filter(poa_global_measured, poa_global_clearsky,
+                          window_length=90, mean_diff=75, max_diff=75,
+                          lower_line_length=-45, upper_line_length=80,
+                          var_diff=0.032, slope_dev=75,
+                          lookup_parameters=False, **kwargs):
+    '''
+    Filtering based on the Reno and Hansen method for clearsky filtering
+    as implimented in pvlib. Requires a regular time series with uniform
+    time steps.
+    Parameters
+    ----------
+    poa_global_measured : pandas.Series
+        Plane of array irradiance based on measurments
+    poa_global_clearsky : pandas.Series
+        Plane of array irradiance based on a clear sky model
+    window_length : int, default 10
+        Length of sliding time window in minutes. Must be greater than 2
+        periods.
+    mean_diff : float, default 75
+        Threshold value for agreement between mean values of measured
+        and clearsky in each interval, see Eq. 6 in [1]. [W/m2]
+    max_diff : float, default 75
+        Threshold value for agreement between maxima of measured and
+        clearsky values in each interval, see Eq. 7 in [1]. [W/m2]
+    lower_line_length : float, default -5
+        Lower limit of line length criterion from Eq. 8 in [1].
+        Criterion satisfied when lower_line_length < line length difference
+        < upper_line_length.
+    upper_line_length : float, default 10
+        Upper limit of line length criterion from Eq. 8 in [1].
+    var_diff : float, default 0.005
+        Threshold value in Hz for the agreement between normalized
+        standard deviations of rate of change in irradiance, see Eqs. 9
+        through 11 in [1].
+    slope_dev : float, default 8
+        Threshold value for agreement between the largest magnitude of
+        change in successive values, see Eqs. 12 through 14 in [1].
+    lookup_parameters : bool, default False
+        Look up the recomended parameters [2] based on the
+        frequency of poa_global_measured. If poa_global_measured has a defined
+        frequency, this overrides the values of window_length, max_diff,
+        var_diff, and slope_dev. For frequencies below 1 minute or greater than
+        30, the lookup uses the recomended parameters for 1 or 30 minutes
+        respectively. If poa_global_measured doesn't have a defined frequency,
+        the passed or default values of the parameters are used.
+    kwargs :
+        Additional arguments passed to pvlib.clearsky.detect_clearsky
+        return_components is set to False and not passed.
+    Returns
+    -------
+    pandas.Series
+        Boolean Series of whether or not the given time is clear.
+    References
+    ----------
+    [1] M.J. Reno and C.W. Hansen, Renewable Energy 90, pp. 520-531 (2016)
+    [2] D.C. Jordan and C.W. Hansen, Renewable Energy 209 pp. 393-400 (2023)
+    '''
+    if lookup_parameters and poa_global_measured.index.freq:
+        frequencies = np.array([1,5,15,30])
+        windows = np.array([50,60,90,120])
+        max_diffs = np.array([60,65,75,90])
+        var_diffs = np.array([0.005, 0.01, 0.032, 0.07])
+        slope_devs = np.array([50,60,75,96])
+        windows_interp = interp1d(frequencies, windows,
+            fill_value=(windows[0], windows[-1]),
+            bounds_error=False)
+        max_diffs_interp = interp1d(frequencies, max_diffs,
+            fill_value=(max_diffs[0], max_diffs[-1]),
+            bounds_error=False)
+        var_diffs_interp = interp1d(frequencies, var_diffs,
+            fill_value=(var_diffs[0], var_diffs[-1]),
+            bounds_error=False)
+        slope_devs_interp = interp1d(frequencies, slope_devs,
+            fill_value=(slope_devs[0], slope_devs[-1]),
+            bounds_error=False)
+        freq_minutes = poa_global_measured.index.freq.nanos/10**9/60
+        window_length = windows_interp(freq_minutes)
+        max_diff = max_diffs_interp(freq_minutes)
+        var_diff = var_diffs_interp(freq_minutes)
+        slope_dev = slope_devs_interp(freq_minutes)
+    df = pd.concat([poa_global_measured, poa_global_clearsky], axis=1, join='outer')
+    df.columns=['measured', 'clearsky']
+    kwargs['return_components'] = False
+    mask = pvlib.clearsky.detect_clearsky(df['measured'], df['clearsky'],
+                          window_length=window_length, mean_diff=mean_diff, max_diff=max_diff,
+                          lower_line_length=lower_line_length, upper_line_length=upper_line_length,
+                          var_diff=var_diff, slope_dev=slope_dev, **kwargs)
+    return mask
 def clip_filter(power_ac, model="quantile", **kwargs):
     """
     Master wrapper for running one of the desired clipping filters.
@@ -412,11 +515,11 @@ def logic_clip_filter(power_ac,
        detection techniques in AC power time series", 2021 IEEE 48th Photovoltaic
        Specialists Conference (PVSC). DOI: 10.1109/PVSC43889.2021.9518733.
     '''
-    # Throw a warning that this is still an experimental filter
-    warnings.warn("The logic-based filter is an experimental clipping filter "
-                  "that is still under development. The API, results, and "
-                  "default behaviors may change in future releases (including "
-                  "MINOR and PATCH). Use at your own risk!")
+    # Throw a warning that this is still an experimental filter. (Removed for 3.0.0)
+    #warnings.warn("The logic-based filter is an experimental clipping filter "
+    #              "that is still under development. The API, results, and "
+    #              "default behaviors may change in future releases (including "
+    #              "MINOR and PATCH). Use at your own risk!")
     # Format the time series
     power_ac, index_name = _format_clipping_time_series(power_ac,
                                                         mounting_type)
@@ -743,3 +846,120 @@ def xgboost_clip_filter(power_ac,
                       & (power_ac_df['scaled_value'] >= .1))
     final_clip = final_clip.reindex(index=power_ac.index, fill_value=False)
     return ~(final_clip.astype(bool))
+def two_way_window_filter(series, roll_period=pd.to_timedelta('7 Days'), outlier_threshold=0.03):
+    '''
+    Removes outliers based on forward and backward window of the rolling median. Points beyond
+    outlier_threshold from both the forward and backward-looking median are excluded by the filter.
+    Parameters
+    ----------
+    series: pandas.Series
+        Pandas time series to be filtered.
+    roll_period : int or timedelta, default 7 days
+        The window to use for backward and forward
+        rolling medians for detecting outliers.
+    outlier_threshold : default is 0.03 meaning 3%
+    '''
+    series = series/series.quantile(0.99)
+    backward_median = series.rolling(roll_period, min_periods=5, closed='both').median()
+    forward_median = series.loc[::-1].rolling(roll_period, min_periods=5, closed='both').median()
+    backward_dif = abs(series-backward_median)
+    forward_dif = abs(series-forward_median)
+    # This is a change from Matt's original logic, which can exclude
+    # points with a NaN median
+    backward_dif.fillna(0, inplace=True)
+    forward_dif.fillna(0, inplace=True)
+    dif_min=backward_dif.combine(forward_dif,min,0)
+    mask=dif_min<outlier_threshold
+    return mask
+def insolation_filter(insolation, quantile=0.1):
+    '''
+    TODO: figure out if this should be more general
+    returns a filter that excludes everything below quantile from insolation
+    '''
+    limit = insolation.quantile(quantile)
+    mask = insolation >= limit
+    return mask
+def hampel_filter(vals, k='14d', t0=3):
+    '''
+    Hampel outlier filter primarily applied on daily normalized data but broadly
+    applicable.
+    Parameters
+    ----------
+    vals : pandas.Series
+        daily normalized time series
+    k : int or time offset string e.g. 'd', default 14d
+        size of window including the sample; 14d is equal to 7 days on either
+        side of value
+    t0 : int, default 3
+        Threshold value, defaults to 3 sigma Pearson's rule.
+    Returns
+    -------
+    pandas.Series
+        Boolean Series of whether the given measurement is within 3 sigma of the
+        median.  False points indicate outliers to be removed.
+    '''
+    # Hampel Filter
+    L = 1.4826
+    rolling_median = vals.rolling(k, center=True, min_periods=1).median()
+    difference = np.abs(rolling_median-vals)
+    median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
+    threshold = t0 * L * median_abs_deviation
+    return difference <= threshold
+def _tukey_fence(series, k=1.5):
+    'Calculates the upper and lower tukey fences from a pandas series'
+    p25 = series.quantile(0.25)
+    p75 = series.quantile(0.75)
+    iqr = p75 - p25
+    upper_fence = k*iqr + p75
+    lower_fence = p25 - 1.5*iqr
+    return lower_fence, upper_fence
+def directional_tukey_filter(series, roll_period=pd.to_timedelta('7 Days'), k=1.5):
+    '''
+    Performs a forward and backward looking rolling tukey filter. Points must only
+    pass one of either the forward or backward looking filters to be kept
+    '''
+    backward_median = series.rolling(roll_period, min_periods=5, closed='both').median()
+    forward_median = series.loc[::-1].rolling(roll_period, min_periods=5, closed='both').median()
+    backward_dif = series - backward_median
+    forward_dif = series - forward_median
+    backward_dif_lower, backward_dif_upper = _tukey_fence(backward_dif, k)
+    forward_dif_lower, forward_dif_upper = _tukey_fence(forward_dif, k)
+    mask = (
+            ((forward_dif > forward_dif_lower) & (forward_dif < forward_dif_upper)) |
+            ((backward_dif > backward_dif_lower) & (backward_dif < backward_dif_upper))
+            )
+    return mask
+def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
+    '''
+    Creates a filter based on the hour angle of the sun (15 degrees per hour)
+    '''
+    times = series.index
+    spa = pvlib.solarposition.get_solarposition(times, lat, lon)
+    eot = spa['equation_of_time']
+    hour_angle = pvlib.solarposition.hour_angle(times, lon, eot)
+    hour_angle = pd.Series(hour_angle, index=times)
+    mask = (hour_angle >= min_hour_angle) & (hour_angle <= max_hour_angle)
+    return mask

{rdtools-2.2.0b2 → rdtools-3.0.0a4/rdtools.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: rdtools
-Version: 2.2.0b2
+Version: 3.0.0a4
 Summary: Functions for reproducible timeseries analysis of photovoltaic systems.
 Home-page: https://github.com/NREL/rdtools
 Author: Rdtools Python Developers