rdtools 2.2.0b2__tar.gz → 3.0.0a4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. {rdtools-2.2.0b2/rdtools.egg-info → rdtools-3.0.0a4}/PKG-INFO +1 -1
  2. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/_version.py +3 -3
  3. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/aggregation.py +2 -2
  4. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/analysis_chains.py +83 -21
  5. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/degradation.py +7 -5
  6. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/filtering.py +225 -5
  7. {rdtools-2.2.0b2 → rdtools-3.0.0a4/rdtools.egg-info}/PKG-INFO +1 -1
  8. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/LICENSE +0 -0
  9. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/MANIFEST.in +0 -0
  10. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/README.md +0 -0
  11. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/__init__.py +0 -0
  12. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/_deprecation.py +0 -0
  13. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/availability.py +0 -0
  14. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/bootstrap.py +0 -0
  15. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/clearsky_temperature.py +0 -0
  16. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/data/temperature.hdf5 +0 -0
  17. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/models/xgboost_clipping_model.json +0 -0
  18. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/normalization.py +0 -0
  19. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/plotting.py +0 -0
  20. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/soiling.py +0 -0
  21. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/SOURCES.txt +0 -0
  22. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/dependency_links.txt +0 -0
  23. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/not-zip-safe +0 -0
  24. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/requires.txt +0 -0
  25. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/top_level.txt +0 -0
  26. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/setup.cfg +0 -0
  27. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/setup.py +0 -0
  28. {rdtools-2.2.0b2 → rdtools-3.0.0a4}/versioneer.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rdtools
3
- Version: 2.2.0b2
3
+ Version: 3.0.0a4
4
4
  Summary: Functions for reproducible timeseries analysis of photovoltaic systems.
5
5
  Home-page: https://github.com/NREL/rdtools
6
6
  Author: Rdtools Python Developers
@@ -8,11 +8,11 @@ import json
8
8
 
9
9
  version_json = '''
10
10
  {
11
- "date": "2023-12-01T15:24:38-0700",
11
+ "date": "2023-12-01T15:34:09-0700",
12
12
  "dirty": false,
13
13
  "error": null,
14
- "full-revisionid": "250e412bda8199491d8dc45673752374913b9c65",
15
- "version": "2.2.0-beta.2"
14
+ "full-revisionid": "2bd60f469d51e39b0a0b23f7a765bc093bd31823",
15
+ "version": "3.0.0-alpha.4"
16
16
  }
17
17
  ''' # END VERSION_JSON
18
18
 
@@ -22,7 +22,7 @@ def aggregation_insol(energy_normalized, insolation, frequency='D'):
22
22
  aggregated : pandas.Series
23
23
  Insolation weighted average, aggregated at frequency
24
24
  '''
25
- aggregated = (insolation * energy_normalized).resample(frequency).sum() / \
26
- insolation.resample(frequency).sum()
25
+ aggregated = (insolation * energy_normalized).resample(frequency, origin='start_day').sum() / \
26
+ insolation.resample(frequency, origin='start_day').sum()
27
27
 
28
28
  return aggregated
@@ -138,7 +138,7 @@ class TrendAnalysis():
138
138
  'poa_filter': {},
139
139
  'tcell_filter': {},
140
140
  'clip_filter': {},
141
- 'csi_filter': {},
141
+ 'pvlib_clearsky_filter': {},
142
142
  'ad_hoc_filter': None # use this to include an explict filter
143
143
  }
144
144
  self.filter_params_aggregated = {
@@ -236,10 +236,10 @@ class TrendAnalysis():
236
236
  freq='1min')
237
237
  aggregate = True
238
238
 
239
- if self.pvlib_location is None:
239
+ if not hasattr(self, 'pvlib_location'):
240
240
  raise ValueError(
241
241
  'pvlib location must be provided using set_clearsky()')
242
- if self.pv_tilt is None or self.pv_azimuth is None:
242
+ if not hasattr(self, 'pv_tilt') or not hasattr(self, 'pv_azimuth'):
243
243
  raise ValueError(
244
244
  'pv_tilt and pv_azimuth must be provided using set_clearsky()')
245
245
 
@@ -332,9 +332,9 @@ class TrendAnalysis():
332
332
  Calculate clear-sky ambient temperature and store in self.temperature_ambient_clearsky
333
333
  '''
334
334
  times = self.poa_global_clearsky.index
335
- if self.pvlib_location is None:
335
+ if not hasattr(self, 'pvlib_location'):
336
336
  raise ValueError(
337
- 'pvlib location must be provided using set_clearsky()')
337
+ 'pvlib_location must be provided using set_clearsky()')
338
338
  loc = self.pvlib_location
339
339
 
340
340
  cs_amb_temp = clearsky_temperature.get_clearsky_tamb(
@@ -411,6 +411,18 @@ class TrendAnalysis():
411
411
  -------
412
412
  None
413
413
  '''
414
+
415
+
416
+ # Clearsky filtering subroutine, called either by clearsky analysis,
417
+ # or sensor analysis using sensor_clearsky_filter
418
+ def _call_clearsky_filter(filter_string):
419
+ if self.poa_global is None or self.poa_global_clearsky is None:
420
+ raise ValueError('Both poa_global and poa_global_clearsky must be available to '
421
+ f'do clearsky filtering with {filter_string}')
422
+ f = filtering.pvlib_clearsky_filter(
423
+ self.poa_global, self.poa_global_clearsky, **self.filter_params[filter_string])
424
+ return f
425
+
414
426
  # Combining filters is non-trivial because of the possibility of index
415
427
  # mismatch. Adding columns to an existing dataframe performs a left index
416
428
  # join, but probably we actually want an outer join. We can get an outer
@@ -452,13 +464,22 @@ class TrendAnalysis():
452
464
  f = filtering.clip_filter(
453
465
  self.pv_power, **self.filter_params['clip_filter'])
454
466
  filter_components['clip_filter'] = f
467
+ if 'hour_angle_filter' in self.filter_params:
468
+ if not hasattr(self, 'pvlib_location'):
469
+ raise ValueError(
470
+ 'The pvlib location must be provided using set_clearsky() '
471
+ 'or by directly setting TrendAnalysis.pvlib_location '
472
+ 'in order to use the hour_angle_filter')
473
+ loc = self.pvlib_location
474
+ f = filtering.hour_angle_filter(
475
+ energy_normalized, loc.latitude, loc.longitude,
476
+ **self.filter_params['hour_angle_filter'])
477
+ filter_components['hour_angle_filter'] = f
478
+
455
479
  if case == 'clearsky':
456
- if self.poa_global is None or self.poa_global_clearsky is None:
457
- raise ValueError('Both poa_global and poa_global_clearsky must be available to '
458
- 'do clearsky filtering with csi_filter')
459
- f = filtering.csi_filter(
460
- self.poa_global, self.poa_global_clearsky, **self.filter_params['csi_filter'])
461
- filter_components['csi_filter'] = f
480
+ filter_components['pvlib_clearsky_filter'] = _call_clearsky_filter('pvlib_clearsky_filter')
481
+ if 'sensor_pvlib_clearsky_filter' in self.filter_params:
482
+ filter_components['sensor_pvlib_clearsky_filter'] = _call_clearsky_filter('sensor_pvlib_clearsky_filter')
462
483
 
463
484
  # note: the previous implementation using the & operator treated NaN
464
485
  # filter values as False, so we do the same here for consistency:
@@ -515,7 +536,33 @@ class TrendAnalysis():
515
536
  """
516
537
  filter_components_aggregated = {'default':
517
538
  pd.Series(True, index=aggregated.index)}
539
+
540
+ if case == 'sensor':
541
+ insol = self.sensor_aggregated_insolation
542
+ if case == 'clearsky':
543
+ insol = self.clearsky_aggregated_insolation
544
+
518
545
  # Add daily aggregate filters as they come online here.
546
+ if 'two_way_window_filter' in self.filter_params_aggregated:
547
+ f = filtering.two_way_window_filter(
548
+ aggregated, **self.filter_params_aggregated['two_way_window_filter'])
549
+ filter_components_aggregated['two_way_window_filter'] = f
550
+
551
+ if 'insolation_filter' in self.filter_params_aggregated:
552
+ f = filtering.insolation_filter(
553
+ insol, **self.filter_params_aggregated['insolation_filter'])
554
+ filter_components_aggregated['insolation_filter'] = f
555
+
556
+ if 'hampel_filter' in self.filter_params_aggregated:
557
+ hampelmask = filtering.hampel_filter(aggregated,
558
+ **self.filter_params_aggregated['hampel_filter'])
559
+ filter_components_aggregated['hampel_filter'] = hampelmask
560
+
561
+ if 'directional_tukey_filter' in self.filter_params_aggregated:
562
+ f = filtering.directional_tukey_filter(aggregated,
563
+ **self.filter_params_aggregated['directional_tukey_filter'])
564
+ filter_components_aggregated['directional_tukey_filter'] = f
565
+
519
566
  # Convert the dictionary into a dataframe (after running filters)
520
567
  filter_components_aggregated = pd.DataFrame(
521
568
  filter_components_aggregated).fillna(False)
@@ -587,7 +634,7 @@ class TrendAnalysis():
587
634
  aggregated = aggregation.aggregation_insol(
588
635
  energy_normalized, insolation, self.aggregation_freq)
589
636
  aggregated_insolation = insolation.resample(
590
- self.aggregation_freq).sum()
637
+ self.aggregation_freq, origin='start_day').sum()
591
638
 
592
639
  return aggregated, aggregated_insolation
593
640
 
@@ -677,7 +724,14 @@ class TrendAnalysis():
677
724
  if self.poa_global is None:
678
725
  raise ValueError(
679
726
  'poa_global must be available to perform _sensor_preprocess')
680
-
727
+
728
+ if 'sensor_pvlib_clearsky_filter' in self.filter_params:
729
+ try:
730
+ if self.poa_global_clearsky is None:
731
+ self._calc_clearsky_poa(model='isotropic')
732
+ except AttributeError:
733
+ raise AttributeError("No poa_global_clearsky. 'set_clearsky' must be run " +
734
+ "to allow filter_params['sensor_pvlib_clearsky_filter']. ")
681
735
  if self.power_expected is None:
682
736
  # Thermal details required if power_expected is not manually set.
683
737
  if self.temperature_cell is None and self.temperature_ambient is None:
@@ -694,16 +748,20 @@ class TrendAnalysis():
694
748
  self._filter(energy_normalized, 'sensor')
695
749
  aggregated, aggregated_insolation = self._aggregate(
696
750
  energy_normalized[self.sensor_filter], insolation[self.sensor_filter])
751
+
697
752
  # Run daily filters on aggregated data
753
+ self.sensor_aggregated_insolation = aggregated_insolation
698
754
  self._aggregated_filter(aggregated, 'sensor')
755
+
699
756
  # Apply filter to aggregated data and store
700
757
  self.sensor_aggregated_performance = aggregated[self.sensor_filter_aggregated]
701
758
  self.sensor_aggregated_insolation = aggregated_insolation[self.sensor_filter_aggregated]
759
+
702
760
  # Reindex the data after the fact, so it's on the aggregated interval
703
- self.sensor_aggregated_performance = self.sensor_aggregated_performance.asfreq(
704
- self.aggregation_freq)
705
- self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.asfreq(
706
- self.aggregation_freq)
761
+ self.sensor_aggregated_performance = self.sensor_aggregated_performance.resample(
762
+ self.aggregation_freq, origin='start_day').asfreq()
763
+ self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.resample(
764
+ self.aggregation_freq, origin='start_day').asfreq()
707
765
 
708
766
  def _clearsky_preprocess(self):
709
767
  '''
@@ -732,17 +790,21 @@ class TrendAnalysis():
732
790
  self._filter(cs_normalized, 'clearsky')
733
791
  cs_aggregated, cs_aggregated_insolation = self._aggregate(
734
792
  cs_normalized[self.clearsky_filter], cs_insolation[self.clearsky_filter])
793
+
735
794
  # Run daily filters on aggregated data
795
+ self.clearsky_aggregated_insolation = cs_aggregated_insolation
736
796
  self._aggregated_filter(cs_aggregated, 'clearsky')
797
+
737
798
  # Apply daily filter to aggregated data and store
738
799
  self.clearsky_aggregated_performance = cs_aggregated[self.clearsky_filter_aggregated]
739
800
  self.clearsky_aggregated_insolation = \
740
801
  cs_aggregated_insolation[self.clearsky_filter_aggregated]
802
+
741
803
  # Reindex the data after the fact, so it's on the aggregated interval
742
- self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.asfreq(
743
- self.aggregation_freq)
744
- self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.asfreq(
745
- self.aggregation_freq)
804
+ self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.resample(
805
+ self.aggregation_freq, origin='start_day').asfreq()
806
+ self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.resample(
807
+ self.aggregation_freq, origin='start_day').asfreq()
746
808
 
747
809
  def sensor_analysis(self, analyses=['yoy_degradation'], yoy_kwargs={}, srr_kwargs={}):
748
810
  '''
@@ -231,10 +231,11 @@ def degradation_year_on_year(energy_normalized, recenter=True,
231
231
  energy_normalized.index.name = 'dt'
232
232
 
233
233
  # Detect sub-daily data:
234
- if min(np.diff(energy_normalized.index.values, n=1)) < \
235
- np.timedelta64(23, 'h'):
236
- raise ValueError('energy_normalized must not be '
237
- 'more frequent than daily')
234
+ # disabling this check while we experiment with morning/evening agregation
235
+ # if min(np.diff(energy_normalized.index.values, n=1)) < \
236
+ # np.timedelta64(23, 'h'):
237
+ # raise ValueError('energy_normalized must not be '
238
+ # 'more frequent than daily')
238
239
 
239
240
  # Detect less than 2 years of data. This is complicated by two things:
240
241
  # - leap days muddle the precise meaning of "two years of data".
@@ -276,7 +277,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
276
277
 
277
278
  # Merge with what happened one year ago, use tolerance of 8 days to allow
278
279
  # for weekly aggregated data
279
- df = pd.merge_asof(energy_normalized[['dt', 'energy']], energy_normalized,
280
+ df = pd.merge_asof(energy_normalized[['dt', 'energy']],
281
+ energy_normalized.sort_values('dt_shifted'),
280
282
  left_on='dt', right_on='dt_shifted',
281
283
  suffixes=['', '_right'],
282
284
  tolerance=pd.Timedelta('8D')
@@ -4,7 +4,9 @@ import numpy as np
4
4
  import pandas as pd
5
5
  import os
6
6
  import warnings
7
+ import pvlib
7
8
  from numbers import Number
9
+ from scipy.interpolate import interp1d
8
10
  import rdtools
9
11
  import xgboost as xgb
10
12
 
@@ -122,6 +124,107 @@ def csi_filter(poa_global_measured, poa_global_clearsky, threshold=0.15):
122
124
  return (csi >= 1.0 - threshold) & (csi <= 1.0 + threshold)
123
125
 
124
126
 
127
+ def pvlib_clearsky_filter(poa_global_measured, poa_global_clearsky,
128
+ window_length=90, mean_diff=75, max_diff=75,
129
+ lower_line_length=-45, upper_line_length=80,
130
+ var_diff=0.032, slope_dev=75,
131
+ lookup_parameters=False, **kwargs):
132
+ '''
133
+ Filtering based on the Reno and Hansen method for clearsky filtering
134
+ as implimented in pvlib. Requires a regular time series with uniform
135
+ time steps.
136
+
137
+ Parameters
138
+ ----------
139
+ poa_global_measured : pandas.Series
140
+ Plane of array irradiance based on measurments
141
+ poa_global_clearsky : pandas.Series
142
+ Plane of array irradiance based on a clear sky model
143
+ window_length : int, default 10
144
+ Length of sliding time window in minutes. Must be greater than 2
145
+ periods.
146
+ mean_diff : float, default 75
147
+ Threshold value for agreement between mean values of measured
148
+ and clearsky in each interval, see Eq. 6 in [1]. [W/m2]
149
+ max_diff : float, default 75
150
+ Threshold value for agreement between maxima of measured and
151
+ clearsky values in each interval, see Eq. 7 in [1]. [W/m2]
152
+ lower_line_length : float, default -5
153
+ Lower limit of line length criterion from Eq. 8 in [1].
154
+ Criterion satisfied when lower_line_length < line length difference
155
+ < upper_line_length.
156
+ upper_line_length : float, default 10
157
+ Upper limit of line length criterion from Eq. 8 in [1].
158
+ var_diff : float, default 0.005
159
+ Threshold value in Hz for the agreement between normalized
160
+ standard deviations of rate of change in irradiance, see Eqs. 9
161
+ through 11 in [1].
162
+ slope_dev : float, default 8
163
+ Threshold value for agreement between the largest magnitude of
164
+ change in successive values, see Eqs. 12 through 14 in [1].
165
+ lookup_parameters : bool, default False
166
+ Look up the recomended parameters [2] based on the
167
+ frequency of poa_global_measured. If poa_global_measured has a defined
168
+ frequency, this overrides the values of window_length, max_diff,
169
+ var_diff, and slope_dev. For frequencies below 1 minute or greater than
170
+ 30, the lookup uses the recomended parameters for 1 or 30 minutes
171
+ respectively. If poa_global_measured doesn't have a defined frequency,
172
+ the passed or default values of the parameters are used.
173
+ kwargs :
174
+ Additional arguments passed to pvlib.clearsky.detect_clearsky
175
+ return_components is set to False and not passed.
176
+
177
+ Returns
178
+ -------
179
+ pandas.Series
180
+ Boolean Series of whether or not the given time is clear.
181
+
182
+ References
183
+ ----------
184
+ [1] M.J. Reno and C.W. Hansen, Renewable Energy 90, pp. 520-531 (2016)
185
+ [2] D.C. Jordan and C.W. Hansen, Renewable Energy 209 pp. 393-400 (2023)
186
+
187
+
188
+ '''
189
+
190
+ if lookup_parameters and poa_global_measured.index.freq:
191
+ frequencies = np.array([1,5,15,30])
192
+ windows = np.array([50,60,90,120])
193
+ max_diffs = np.array([60,65,75,90])
194
+ var_diffs = np.array([0.005, 0.01, 0.032, 0.07])
195
+ slope_devs = np.array([50,60,75,96])
196
+
197
+ windows_interp = interp1d(frequencies, windows,
198
+ fill_value=(windows[0], windows[-1]),
199
+ bounds_error=False)
200
+ max_diffs_interp = interp1d(frequencies, max_diffs,
201
+ fill_value=(max_diffs[0], max_diffs[-1]),
202
+ bounds_error=False)
203
+ var_diffs_interp = interp1d(frequencies, var_diffs,
204
+ fill_value=(var_diffs[0], var_diffs[-1]),
205
+ bounds_error=False)
206
+ slope_devs_interp = interp1d(frequencies, slope_devs,
207
+ fill_value=(slope_devs[0], slope_devs[-1]),
208
+ bounds_error=False)
209
+
210
+ freq_minutes = poa_global_measured.index.freq.nanos/10**9/60
211
+ window_length = windows_interp(freq_minutes)
212
+ max_diff = max_diffs_interp(freq_minutes)
213
+ var_diff = var_diffs_interp(freq_minutes)
214
+ slope_dev = slope_devs_interp(freq_minutes)
215
+
216
+
217
+ df = pd.concat([poa_global_measured, poa_global_clearsky], axis=1, join='outer')
218
+ df.columns=['measured', 'clearsky']
219
+
220
+ kwargs['return_components'] = False
221
+ mask = pvlib.clearsky.detect_clearsky(df['measured'], df['clearsky'],
222
+ window_length=window_length, mean_diff=mean_diff, max_diff=max_diff,
223
+ lower_line_length=lower_line_length, upper_line_length=upper_line_length,
224
+ var_diff=var_diff, slope_dev=slope_dev, **kwargs)
225
+ return mask
226
+
227
+
125
228
  def clip_filter(power_ac, model="quantile", **kwargs):
126
229
  """
127
230
  Master wrapper for running one of the desired clipping filters.
@@ -412,11 +515,11 @@ def logic_clip_filter(power_ac,
412
515
  detection techniques in AC power time series", 2021 IEEE 48th Photovoltaic
413
516
  Specialists Conference (PVSC). DOI: 10.1109/PVSC43889.2021.9518733.
414
517
  '''
415
- # Throw a warning that this is still an experimental filter
416
- warnings.warn("The logic-based filter is an experimental clipping filter "
417
- "that is still under development. The API, results, and "
418
- "default behaviors may change in future releases (including "
419
- "MINOR and PATCH). Use at your own risk!")
518
+ # Throw a warning that this is still an experimental filter. (Removed for 3.0.0)
519
+ #warnings.warn("The logic-based filter is an experimental clipping filter "
520
+ # "that is still under development. The API, results, and "
521
+ # "default behaviors may change in future releases (including "
522
+ # "MINOR and PATCH). Use at your own risk!")
420
523
  # Format the time series
421
524
  power_ac, index_name = _format_clipping_time_series(power_ac,
422
525
  mounting_type)
@@ -743,3 +846,120 @@ def xgboost_clip_filter(power_ac,
743
846
  & (power_ac_df['scaled_value'] >= .1))
744
847
  final_clip = final_clip.reindex(index=power_ac.index, fill_value=False)
745
848
  return ~(final_clip.astype(bool))
849
+
850
+ def two_way_window_filter(series, roll_period=pd.to_timedelta('7 Days'), outlier_threshold=0.03):
851
+ '''
852
+ Removes outliers based on forward and backward window of the rolling median. Points beyond
853
+ outlier_threshold from both the forward and backward-looking median are excluded by the filter.
854
+
855
+ Parameters
856
+ ----------
857
+ series: pandas.Series
858
+ Pandas time series to be filtered.
859
+ roll_period : int or timedelta, default 7 days
860
+ The window to use for backward and forward
861
+ rolling medians for detecting outliers.
862
+ outlier_threshold : default is 0.03 meaning 3%
863
+ '''
864
+
865
+ series = series/series.quantile(0.99)
866
+ backward_median = series.rolling(roll_period, min_periods=5, closed='both').median()
867
+ forward_median = series.loc[::-1].rolling(roll_period, min_periods=5, closed='both').median()
868
+
869
+ backward_dif = abs(series-backward_median)
870
+ forward_dif = abs(series-forward_median)
871
+
872
+ # This is a change from Matt's original logic, which can exclude
873
+ # points with a NaN median
874
+ backward_dif.fillna(0, inplace=True)
875
+ forward_dif.fillna(0, inplace=True)
876
+
877
+ dif_min=backward_dif.combine(forward_dif,min,0)
878
+
879
+ mask=dif_min<outlier_threshold
880
+
881
+ return mask
882
+
883
+
884
+ def insolation_filter(insolation, quantile=0.1):
885
+ '''
886
+ TODO: figure out if this should be more general
887
+
888
+ returns a filter that excludes everything below quantile from insolation
889
+ '''
890
+
891
+ limit = insolation.quantile(quantile)
892
+ mask = insolation >= limit
893
+ return mask
894
+
895
+ def hampel_filter(vals, k='14d', t0=3):
896
+ '''
897
+ Hampel outlier filter primarily applied on daily normalized data but broadly
898
+ applicable.
899
+ Parameters
900
+ ----------
901
+ vals : pandas.Series
902
+ daily normalized time series
903
+ k : int or time offset string e.g. 'd', default 14d
904
+ size of window including the sample; 14d is equal to 7 days on either
905
+ side of value
906
+ t0 : int, default 3
907
+ Threshold value, defaults to 3 sigma Pearson's rule.
908
+ Returns
909
+ -------
910
+ pandas.Series
911
+ Boolean Series of whether the given measurement is within 3 sigma of the
912
+ median. False points indicate outliers to be removed.
913
+ '''
914
+ # Hampel Filter
915
+ L = 1.4826
916
+ rolling_median = vals.rolling(k, center=True, min_periods=1).median()
917
+ difference = np.abs(rolling_median-vals)
918
+ median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
919
+ threshold = t0 * L * median_abs_deviation
920
+ return difference <= threshold
921
+
922
+
923
+ def _tukey_fence(series, k=1.5):
924
+ 'Calculates the upper and lower tukey fences from a pandas series'
925
+ p25 = series.quantile(0.25)
926
+ p75 = series.quantile(0.75)
927
+ iqr = p75 - p25
928
+ upper_fence = k*iqr + p75
929
+ lower_fence = p25 - 1.5*iqr
930
+ return lower_fence, upper_fence
931
+
932
+
933
+ def directional_tukey_filter(series, roll_period=pd.to_timedelta('7 Days'), k=1.5):
934
+ '''
935
+ Performs a forward and backward looking rolling tukey filter. Points must only
936
+ pass one of either the forward or backward looking filters to be kept
937
+ '''
938
+ backward_median = series.rolling(roll_period, min_periods=5, closed='both').median()
939
+ forward_median = series.loc[::-1].rolling(roll_period, min_periods=5, closed='both').median()
940
+ backward_dif = series - backward_median
941
+ forward_dif = series - forward_median
942
+
943
+ backward_dif_lower, backward_dif_upper = _tukey_fence(backward_dif, k)
944
+ forward_dif_lower, forward_dif_upper = _tukey_fence(forward_dif, k)
945
+
946
+ mask = (
947
+ ((forward_dif > forward_dif_lower) & (forward_dif < forward_dif_upper)) |
948
+ ((backward_dif > backward_dif_lower) & (backward_dif < backward_dif_upper))
949
+ )
950
+ return mask
951
+
952
+
953
+ def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
954
+ '''
955
+ Creates a filter based on the hour angle of the sun (15 degrees per hour)
956
+ '''
957
+
958
+ times = series.index
959
+ spa = pvlib.solarposition.get_solarposition(times, lat, lon)
960
+ eot = spa['equation_of_time']
961
+ hour_angle = pvlib.solarposition.hour_angle(times, lon, eot)
962
+ hour_angle = pd.Series(hour_angle, index=times)
963
+ mask = (hour_angle >= min_hour_angle) & (hour_angle <= max_hour_angle)
964
+
965
+ return mask
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rdtools
3
- Version: 2.2.0b2
3
+ Version: 3.0.0a4
4
4
  Summary: Functions for reproducible timeseries analysis of photovoltaic systems.
5
5
  Home-page: https://github.com/NREL/rdtools
6
6
  Author: Rdtools Python Developers
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes