rdtools 2.2.0b2__tar.gz → 3.0.0a4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rdtools-2.2.0b2/rdtools.egg-info → rdtools-3.0.0a4}/PKG-INFO +1 -1
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/_version.py +3 -3
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/aggregation.py +2 -2
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/analysis_chains.py +83 -21
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/degradation.py +7 -5
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/filtering.py +225 -5
- {rdtools-2.2.0b2 → rdtools-3.0.0a4/rdtools.egg-info}/PKG-INFO +1 -1
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/LICENSE +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/MANIFEST.in +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/README.md +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/__init__.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/_deprecation.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/availability.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/bootstrap.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/clearsky_temperature.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/data/temperature.hdf5 +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/models/xgboost_clipping_model.json +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/normalization.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/plotting.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools/soiling.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/SOURCES.txt +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/dependency_links.txt +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/not-zip-safe +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/requires.txt +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/rdtools.egg-info/top_level.txt +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/setup.cfg +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/setup.py +0 -0
- {rdtools-2.2.0b2 → rdtools-3.0.0a4}/versioneer.py +0 -0
|
@@ -8,11 +8,11 @@ import json
|
|
|
8
8
|
|
|
9
9
|
version_json = '''
|
|
10
10
|
{
|
|
11
|
-
"date": "2023-12-01T15:
|
|
11
|
+
"date": "2023-12-01T15:34:09-0700",
|
|
12
12
|
"dirty": false,
|
|
13
13
|
"error": null,
|
|
14
|
-
"full-revisionid": "
|
|
15
|
-
"version": "
|
|
14
|
+
"full-revisionid": "2bd60f469d51e39b0a0b23f7a765bc093bd31823",
|
|
15
|
+
"version": "3.0.0-alpha.4"
|
|
16
16
|
}
|
|
17
17
|
''' # END VERSION_JSON
|
|
18
18
|
|
|
@@ -22,7 +22,7 @@ def aggregation_insol(energy_normalized, insolation, frequency='D'):
|
|
|
22
22
|
aggregated : pandas.Series
|
|
23
23
|
Insolation weighted average, aggregated at frequency
|
|
24
24
|
'''
|
|
25
|
-
aggregated = (insolation * energy_normalized).resample(frequency).sum() / \
|
|
26
|
-
insolation.resample(frequency).sum()
|
|
25
|
+
aggregated = (insolation * energy_normalized).resample(frequency, origin='start_day').sum() / \
|
|
26
|
+
insolation.resample(frequency, origin='start_day').sum()
|
|
27
27
|
|
|
28
28
|
return aggregated
|
|
@@ -138,7 +138,7 @@ class TrendAnalysis():
|
|
|
138
138
|
'poa_filter': {},
|
|
139
139
|
'tcell_filter': {},
|
|
140
140
|
'clip_filter': {},
|
|
141
|
-
'
|
|
141
|
+
'pvlib_clearsky_filter': {},
|
|
142
142
|
'ad_hoc_filter': None # use this to include an explict filter
|
|
143
143
|
}
|
|
144
144
|
self.filter_params_aggregated = {
|
|
@@ -236,10 +236,10 @@ class TrendAnalysis():
|
|
|
236
236
|
freq='1min')
|
|
237
237
|
aggregate = True
|
|
238
238
|
|
|
239
|
-
if self
|
|
239
|
+
if not hasattr(self, 'pvlib_location'):
|
|
240
240
|
raise ValueError(
|
|
241
241
|
'pvlib location must be provided using set_clearsky()')
|
|
242
|
-
if self
|
|
242
|
+
if not hasattr(self, 'pv_tilt') or not hasattr(self, 'pv_azimuth'):
|
|
243
243
|
raise ValueError(
|
|
244
244
|
'pv_tilt and pv_azimuth must be provided using set_clearsky()')
|
|
245
245
|
|
|
@@ -332,9 +332,9 @@ class TrendAnalysis():
|
|
|
332
332
|
Calculate clear-sky ambient temperature and store in self.temperature_ambient_clearsky
|
|
333
333
|
'''
|
|
334
334
|
times = self.poa_global_clearsky.index
|
|
335
|
-
if self
|
|
335
|
+
if not hasattr(self, 'pvlib_location'):
|
|
336
336
|
raise ValueError(
|
|
337
|
-
'
|
|
337
|
+
'pvlib_location must be provided using set_clearsky()')
|
|
338
338
|
loc = self.pvlib_location
|
|
339
339
|
|
|
340
340
|
cs_amb_temp = clearsky_temperature.get_clearsky_tamb(
|
|
@@ -411,6 +411,18 @@ class TrendAnalysis():
|
|
|
411
411
|
-------
|
|
412
412
|
None
|
|
413
413
|
'''
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# Clearsky filtering subroutine, called either by clearsky analysis,
|
|
417
|
+
# or sensor analysis using sensor_clearsky_filter
|
|
418
|
+
def _call_clearsky_filter(filter_string):
|
|
419
|
+
if self.poa_global is None or self.poa_global_clearsky is None:
|
|
420
|
+
raise ValueError('Both poa_global and poa_global_clearsky must be available to '
|
|
421
|
+
f'do clearsky filtering with {filter_string}')
|
|
422
|
+
f = filtering.pvlib_clearsky_filter(
|
|
423
|
+
self.poa_global, self.poa_global_clearsky, **self.filter_params[filter_string])
|
|
424
|
+
return f
|
|
425
|
+
|
|
414
426
|
# Combining filters is non-trivial because of the possibility of index
|
|
415
427
|
# mismatch. Adding columns to an existing dataframe performs a left index
|
|
416
428
|
# join, but probably we actually want an outer join. We can get an outer
|
|
@@ -452,13 +464,22 @@ class TrendAnalysis():
|
|
|
452
464
|
f = filtering.clip_filter(
|
|
453
465
|
self.pv_power, **self.filter_params['clip_filter'])
|
|
454
466
|
filter_components['clip_filter'] = f
|
|
467
|
+
if 'hour_angle_filter' in self.filter_params:
|
|
468
|
+
if not hasattr(self, 'pvlib_location'):
|
|
469
|
+
raise ValueError(
|
|
470
|
+
'The pvlib location must be provided using set_clearsky() '
|
|
471
|
+
'or by directly setting TrendAnalysis.pvlib_location '
|
|
472
|
+
'in order to use the hour_angle_filter')
|
|
473
|
+
loc = self.pvlib_location
|
|
474
|
+
f = filtering.hour_angle_filter(
|
|
475
|
+
energy_normalized, loc.latitude, loc.longitude,
|
|
476
|
+
**self.filter_params['hour_angle_filter'])
|
|
477
|
+
filter_components['hour_angle_filter'] = f
|
|
478
|
+
|
|
455
479
|
if case == 'clearsky':
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
f = filtering.csi_filter(
|
|
460
|
-
self.poa_global, self.poa_global_clearsky, **self.filter_params['csi_filter'])
|
|
461
|
-
filter_components['csi_filter'] = f
|
|
480
|
+
filter_components['pvlib_clearsky_filter'] = _call_clearsky_filter('pvlib_clearsky_filter')
|
|
481
|
+
if 'sensor_pvlib_clearsky_filter' in self.filter_params:
|
|
482
|
+
filter_components['sensor_pvlib_clearsky_filter'] = _call_clearsky_filter('sensor_pvlib_clearsky_filter')
|
|
462
483
|
|
|
463
484
|
# note: the previous implementation using the & operator treated NaN
|
|
464
485
|
# filter values as False, so we do the same here for consistency:
|
|
@@ -515,7 +536,33 @@ class TrendAnalysis():
|
|
|
515
536
|
"""
|
|
516
537
|
filter_components_aggregated = {'default':
|
|
517
538
|
pd.Series(True, index=aggregated.index)}
|
|
539
|
+
|
|
540
|
+
if case == 'sensor':
|
|
541
|
+
insol = self.sensor_aggregated_insolation
|
|
542
|
+
if case == 'clearsky':
|
|
543
|
+
insol = self.clearsky_aggregated_insolation
|
|
544
|
+
|
|
518
545
|
# Add daily aggregate filters as they come online here.
|
|
546
|
+
if 'two_way_window_filter' in self.filter_params_aggregated:
|
|
547
|
+
f = filtering.two_way_window_filter(
|
|
548
|
+
aggregated, **self.filter_params_aggregated['two_way_window_filter'])
|
|
549
|
+
filter_components_aggregated['two_way_window_filter'] = f
|
|
550
|
+
|
|
551
|
+
if 'insolation_filter' in self.filter_params_aggregated:
|
|
552
|
+
f = filtering.insolation_filter(
|
|
553
|
+
insol, **self.filter_params_aggregated['insolation_filter'])
|
|
554
|
+
filter_components_aggregated['insolation_filter'] = f
|
|
555
|
+
|
|
556
|
+
if 'hampel_filter' in self.filter_params_aggregated:
|
|
557
|
+
hampelmask = filtering.hampel_filter(aggregated,
|
|
558
|
+
**self.filter_params_aggregated['hampel_filter'])
|
|
559
|
+
filter_components_aggregated['hampel_filter'] = hampelmask
|
|
560
|
+
|
|
561
|
+
if 'directional_tukey_filter' in self.filter_params_aggregated:
|
|
562
|
+
f = filtering.directional_tukey_filter(aggregated,
|
|
563
|
+
**self.filter_params_aggregated['directional_tukey_filter'])
|
|
564
|
+
filter_components_aggregated['directional_tukey_filter'] = f
|
|
565
|
+
|
|
519
566
|
# Convert the dictionary into a dataframe (after running filters)
|
|
520
567
|
filter_components_aggregated = pd.DataFrame(
|
|
521
568
|
filter_components_aggregated).fillna(False)
|
|
@@ -587,7 +634,7 @@ class TrendAnalysis():
|
|
|
587
634
|
aggregated = aggregation.aggregation_insol(
|
|
588
635
|
energy_normalized, insolation, self.aggregation_freq)
|
|
589
636
|
aggregated_insolation = insolation.resample(
|
|
590
|
-
self.aggregation_freq).sum()
|
|
637
|
+
self.aggregation_freq, origin='start_day').sum()
|
|
591
638
|
|
|
592
639
|
return aggregated, aggregated_insolation
|
|
593
640
|
|
|
@@ -677,7 +724,14 @@ class TrendAnalysis():
|
|
|
677
724
|
if self.poa_global is None:
|
|
678
725
|
raise ValueError(
|
|
679
726
|
'poa_global must be available to perform _sensor_preprocess')
|
|
680
|
-
|
|
727
|
+
|
|
728
|
+
if 'sensor_pvlib_clearsky_filter' in self.filter_params:
|
|
729
|
+
try:
|
|
730
|
+
if self.poa_global_clearsky is None:
|
|
731
|
+
self._calc_clearsky_poa(model='isotropic')
|
|
732
|
+
except AttributeError:
|
|
733
|
+
raise AttributeError("No poa_global_clearsky. 'set_clearsky' must be run " +
|
|
734
|
+
"to allow filter_params['sensor_pvlib_clearsky_filter']. ")
|
|
681
735
|
if self.power_expected is None:
|
|
682
736
|
# Thermal details required if power_expected is not manually set.
|
|
683
737
|
if self.temperature_cell is None and self.temperature_ambient is None:
|
|
@@ -694,16 +748,20 @@ class TrendAnalysis():
|
|
|
694
748
|
self._filter(energy_normalized, 'sensor')
|
|
695
749
|
aggregated, aggregated_insolation = self._aggregate(
|
|
696
750
|
energy_normalized[self.sensor_filter], insolation[self.sensor_filter])
|
|
751
|
+
|
|
697
752
|
# Run daily filters on aggregated data
|
|
753
|
+
self.sensor_aggregated_insolation = aggregated_insolation
|
|
698
754
|
self._aggregated_filter(aggregated, 'sensor')
|
|
755
|
+
|
|
699
756
|
# Apply filter to aggregated data and store
|
|
700
757
|
self.sensor_aggregated_performance = aggregated[self.sensor_filter_aggregated]
|
|
701
758
|
self.sensor_aggregated_insolation = aggregated_insolation[self.sensor_filter_aggregated]
|
|
759
|
+
|
|
702
760
|
# Reindex the data after the fact, so it's on the aggregated interval
|
|
703
|
-
self.sensor_aggregated_performance = self.sensor_aggregated_performance.
|
|
704
|
-
self.aggregation_freq)
|
|
705
|
-
self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.
|
|
706
|
-
self.aggregation_freq)
|
|
761
|
+
self.sensor_aggregated_performance = self.sensor_aggregated_performance.resample(
|
|
762
|
+
self.aggregation_freq, origin='start_day').asfreq()
|
|
763
|
+
self.sensor_aggregated_insolation = self.sensor_aggregated_insolation.resample(
|
|
764
|
+
self.aggregation_freq, origin='start_day').asfreq()
|
|
707
765
|
|
|
708
766
|
def _clearsky_preprocess(self):
|
|
709
767
|
'''
|
|
@@ -732,17 +790,21 @@ class TrendAnalysis():
|
|
|
732
790
|
self._filter(cs_normalized, 'clearsky')
|
|
733
791
|
cs_aggregated, cs_aggregated_insolation = self._aggregate(
|
|
734
792
|
cs_normalized[self.clearsky_filter], cs_insolation[self.clearsky_filter])
|
|
793
|
+
|
|
735
794
|
# Run daily filters on aggregated data
|
|
795
|
+
self.clearsky_aggregated_insolation = cs_aggregated_insolation
|
|
736
796
|
self._aggregated_filter(cs_aggregated, 'clearsky')
|
|
797
|
+
|
|
737
798
|
# Apply daily filter to aggregated data and store
|
|
738
799
|
self.clearsky_aggregated_performance = cs_aggregated[self.clearsky_filter_aggregated]
|
|
739
800
|
self.clearsky_aggregated_insolation = \
|
|
740
801
|
cs_aggregated_insolation[self.clearsky_filter_aggregated]
|
|
802
|
+
|
|
741
803
|
# Reindex the data after the fact, so it's on the aggregated interval
|
|
742
|
-
self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.
|
|
743
|
-
self.aggregation_freq)
|
|
744
|
-
self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.
|
|
745
|
-
self.aggregation_freq)
|
|
804
|
+
self.clearsky_aggregated_performance = self.clearsky_aggregated_performance.resample(
|
|
805
|
+
self.aggregation_freq, origin='start_day').asfreq()
|
|
806
|
+
self.clearsky_aggregated_insolation = self.clearsky_aggregated_insolation.resample(
|
|
807
|
+
self.aggregation_freq, origin='start_day').asfreq()
|
|
746
808
|
|
|
747
809
|
def sensor_analysis(self, analyses=['yoy_degradation'], yoy_kwargs={}, srr_kwargs={}):
|
|
748
810
|
'''
|
|
@@ -231,10 +231,11 @@ def degradation_year_on_year(energy_normalized, recenter=True,
|
|
|
231
231
|
energy_normalized.index.name = 'dt'
|
|
232
232
|
|
|
233
233
|
# Detect sub-daily data:
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
234
|
+
# disabling this check while we experiment with morning/evening agregation
|
|
235
|
+
# if min(np.diff(energy_normalized.index.values, n=1)) < \
|
|
236
|
+
# np.timedelta64(23, 'h'):
|
|
237
|
+
# raise ValueError('energy_normalized must not be '
|
|
238
|
+
# 'more frequent than daily')
|
|
238
239
|
|
|
239
240
|
# Detect less than 2 years of data. This is complicated by two things:
|
|
240
241
|
# - leap days muddle the precise meaning of "two years of data".
|
|
@@ -276,7 +277,8 @@ def degradation_year_on_year(energy_normalized, recenter=True,
|
|
|
276
277
|
|
|
277
278
|
# Merge with what happened one year ago, use tolerance of 8 days to allow
|
|
278
279
|
# for weekly aggregated data
|
|
279
|
-
df = pd.merge_asof(energy_normalized[['dt', 'energy']],
|
|
280
|
+
df = pd.merge_asof(energy_normalized[['dt', 'energy']],
|
|
281
|
+
energy_normalized.sort_values('dt_shifted'),
|
|
280
282
|
left_on='dt', right_on='dt_shifted',
|
|
281
283
|
suffixes=['', '_right'],
|
|
282
284
|
tolerance=pd.Timedelta('8D')
|
|
@@ -4,7 +4,9 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import os
|
|
6
6
|
import warnings
|
|
7
|
+
import pvlib
|
|
7
8
|
from numbers import Number
|
|
9
|
+
from scipy.interpolate import interp1d
|
|
8
10
|
import rdtools
|
|
9
11
|
import xgboost as xgb
|
|
10
12
|
|
|
@@ -122,6 +124,107 @@ def csi_filter(poa_global_measured, poa_global_clearsky, threshold=0.15):
|
|
|
122
124
|
return (csi >= 1.0 - threshold) & (csi <= 1.0 + threshold)
|
|
123
125
|
|
|
124
126
|
|
|
127
|
+
def pvlib_clearsky_filter(poa_global_measured, poa_global_clearsky,
|
|
128
|
+
window_length=90, mean_diff=75, max_diff=75,
|
|
129
|
+
lower_line_length=-45, upper_line_length=80,
|
|
130
|
+
var_diff=0.032, slope_dev=75,
|
|
131
|
+
lookup_parameters=False, **kwargs):
|
|
132
|
+
'''
|
|
133
|
+
Filtering based on the Reno and Hansen method for clearsky filtering
|
|
134
|
+
as implimented in pvlib. Requires a regular time series with uniform
|
|
135
|
+
time steps.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
poa_global_measured : pandas.Series
|
|
140
|
+
Plane of array irradiance based on measurments
|
|
141
|
+
poa_global_clearsky : pandas.Series
|
|
142
|
+
Plane of array irradiance based on a clear sky model
|
|
143
|
+
window_length : int, default 10
|
|
144
|
+
Length of sliding time window in minutes. Must be greater than 2
|
|
145
|
+
periods.
|
|
146
|
+
mean_diff : float, default 75
|
|
147
|
+
Threshold value for agreement between mean values of measured
|
|
148
|
+
and clearsky in each interval, see Eq. 6 in [1]. [W/m2]
|
|
149
|
+
max_diff : float, default 75
|
|
150
|
+
Threshold value for agreement between maxima of measured and
|
|
151
|
+
clearsky values in each interval, see Eq. 7 in [1]. [W/m2]
|
|
152
|
+
lower_line_length : float, default -5
|
|
153
|
+
Lower limit of line length criterion from Eq. 8 in [1].
|
|
154
|
+
Criterion satisfied when lower_line_length < line length difference
|
|
155
|
+
< upper_line_length.
|
|
156
|
+
upper_line_length : float, default 10
|
|
157
|
+
Upper limit of line length criterion from Eq. 8 in [1].
|
|
158
|
+
var_diff : float, default 0.005
|
|
159
|
+
Threshold value in Hz for the agreement between normalized
|
|
160
|
+
standard deviations of rate of change in irradiance, see Eqs. 9
|
|
161
|
+
through 11 in [1].
|
|
162
|
+
slope_dev : float, default 8
|
|
163
|
+
Threshold value for agreement between the largest magnitude of
|
|
164
|
+
change in successive values, see Eqs. 12 through 14 in [1].
|
|
165
|
+
lookup_parameters : bool, default False
|
|
166
|
+
Look up the recomended parameters [2] based on the
|
|
167
|
+
frequency of poa_global_measured. If poa_global_measured has a defined
|
|
168
|
+
frequency, this overrides the values of window_length, max_diff,
|
|
169
|
+
var_diff, and slope_dev. For frequencies below 1 minute or greater than
|
|
170
|
+
30, the lookup uses the recomended parameters for 1 or 30 minutes
|
|
171
|
+
respectively. If poa_global_measured doesn't have a defined frequency,
|
|
172
|
+
the passed or default values of the parameters are used.
|
|
173
|
+
kwargs :
|
|
174
|
+
Additional arguments passed to pvlib.clearsky.detect_clearsky
|
|
175
|
+
return_components is set to False and not passed.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
pandas.Series
|
|
180
|
+
Boolean Series of whether or not the given time is clear.
|
|
181
|
+
|
|
182
|
+
References
|
|
183
|
+
----------
|
|
184
|
+
[1] M.J. Reno and C.W. Hansen, Renewable Energy 90, pp. 520-531 (2016)
|
|
185
|
+
[2] D.C. Jordan and C.W. Hansen, Renewable Energy 209 pp. 393-400 (2023)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
'''
|
|
189
|
+
|
|
190
|
+
if lookup_parameters and poa_global_measured.index.freq:
|
|
191
|
+
frequencies = np.array([1,5,15,30])
|
|
192
|
+
windows = np.array([50,60,90,120])
|
|
193
|
+
max_diffs = np.array([60,65,75,90])
|
|
194
|
+
var_diffs = np.array([0.005, 0.01, 0.032, 0.07])
|
|
195
|
+
slope_devs = np.array([50,60,75,96])
|
|
196
|
+
|
|
197
|
+
windows_interp = interp1d(frequencies, windows,
|
|
198
|
+
fill_value=(windows[0], windows[-1]),
|
|
199
|
+
bounds_error=False)
|
|
200
|
+
max_diffs_interp = interp1d(frequencies, max_diffs,
|
|
201
|
+
fill_value=(max_diffs[0], max_diffs[-1]),
|
|
202
|
+
bounds_error=False)
|
|
203
|
+
var_diffs_interp = interp1d(frequencies, var_diffs,
|
|
204
|
+
fill_value=(var_diffs[0], var_diffs[-1]),
|
|
205
|
+
bounds_error=False)
|
|
206
|
+
slope_devs_interp = interp1d(frequencies, slope_devs,
|
|
207
|
+
fill_value=(slope_devs[0], slope_devs[-1]),
|
|
208
|
+
bounds_error=False)
|
|
209
|
+
|
|
210
|
+
freq_minutes = poa_global_measured.index.freq.nanos/10**9/60
|
|
211
|
+
window_length = windows_interp(freq_minutes)
|
|
212
|
+
max_diff = max_diffs_interp(freq_minutes)
|
|
213
|
+
var_diff = var_diffs_interp(freq_minutes)
|
|
214
|
+
slope_dev = slope_devs_interp(freq_minutes)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
df = pd.concat([poa_global_measured, poa_global_clearsky], axis=1, join='outer')
|
|
218
|
+
df.columns=['measured', 'clearsky']
|
|
219
|
+
|
|
220
|
+
kwargs['return_components'] = False
|
|
221
|
+
mask = pvlib.clearsky.detect_clearsky(df['measured'], df['clearsky'],
|
|
222
|
+
window_length=window_length, mean_diff=mean_diff, max_diff=max_diff,
|
|
223
|
+
lower_line_length=lower_line_length, upper_line_length=upper_line_length,
|
|
224
|
+
var_diff=var_diff, slope_dev=slope_dev, **kwargs)
|
|
225
|
+
return mask
|
|
226
|
+
|
|
227
|
+
|
|
125
228
|
def clip_filter(power_ac, model="quantile", **kwargs):
|
|
126
229
|
"""
|
|
127
230
|
Master wrapper for running one of the desired clipping filters.
|
|
@@ -412,11 +515,11 @@ def logic_clip_filter(power_ac,
|
|
|
412
515
|
detection techniques in AC power time series", 2021 IEEE 48th Photovoltaic
|
|
413
516
|
Specialists Conference (PVSC). DOI: 10.1109/PVSC43889.2021.9518733.
|
|
414
517
|
'''
|
|
415
|
-
# Throw a warning that this is still an experimental filter
|
|
416
|
-
warnings.warn("The logic-based filter is an experimental clipping filter "
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
518
|
+
# Throw a warning that this is still an experimental filter. (Removed for 3.0.0)
|
|
519
|
+
#warnings.warn("The logic-based filter is an experimental clipping filter "
|
|
520
|
+
# "that is still under development. The API, results, and "
|
|
521
|
+
# "default behaviors may change in future releases (including "
|
|
522
|
+
# "MINOR and PATCH). Use at your own risk!")
|
|
420
523
|
# Format the time series
|
|
421
524
|
power_ac, index_name = _format_clipping_time_series(power_ac,
|
|
422
525
|
mounting_type)
|
|
@@ -743,3 +846,120 @@ def xgboost_clip_filter(power_ac,
|
|
|
743
846
|
& (power_ac_df['scaled_value'] >= .1))
|
|
744
847
|
final_clip = final_clip.reindex(index=power_ac.index, fill_value=False)
|
|
745
848
|
return ~(final_clip.astype(bool))
|
|
849
|
+
|
|
850
|
+
def two_way_window_filter(series, roll_period=pd.to_timedelta('7 Days'), outlier_threshold=0.03):
|
|
851
|
+
'''
|
|
852
|
+
Removes outliers based on forward and backward window of the rolling median. Points beyond
|
|
853
|
+
outlier_threshold from both the forward and backward-looking median are excluded by the filter.
|
|
854
|
+
|
|
855
|
+
Parameters
|
|
856
|
+
----------
|
|
857
|
+
series: pandas.Series
|
|
858
|
+
Pandas time series to be filtered.
|
|
859
|
+
roll_period : int or timedelta, default 7 days
|
|
860
|
+
The window to use for backward and forward
|
|
861
|
+
rolling medians for detecting outliers.
|
|
862
|
+
outlier_threshold : default is 0.03 meaning 3%
|
|
863
|
+
'''
|
|
864
|
+
|
|
865
|
+
series = series/series.quantile(0.99)
|
|
866
|
+
backward_median = series.rolling(roll_period, min_periods=5, closed='both').median()
|
|
867
|
+
forward_median = series.loc[::-1].rolling(roll_period, min_periods=5, closed='both').median()
|
|
868
|
+
|
|
869
|
+
backward_dif = abs(series-backward_median)
|
|
870
|
+
forward_dif = abs(series-forward_median)
|
|
871
|
+
|
|
872
|
+
# This is a change from Matt's original logic, which can exclude
|
|
873
|
+
# points with a NaN median
|
|
874
|
+
backward_dif.fillna(0, inplace=True)
|
|
875
|
+
forward_dif.fillna(0, inplace=True)
|
|
876
|
+
|
|
877
|
+
dif_min=backward_dif.combine(forward_dif,min,0)
|
|
878
|
+
|
|
879
|
+
mask=dif_min<outlier_threshold
|
|
880
|
+
|
|
881
|
+
return mask
|
|
882
|
+
|
|
883
|
+
|
|
884
|
+
def insolation_filter(insolation, quantile=0.1):
|
|
885
|
+
'''
|
|
886
|
+
TODO: figure out if this should be more general
|
|
887
|
+
|
|
888
|
+
returns a filter that excludes everything below quantile from insolation
|
|
889
|
+
'''
|
|
890
|
+
|
|
891
|
+
limit = insolation.quantile(quantile)
|
|
892
|
+
mask = insolation >= limit
|
|
893
|
+
return mask
|
|
894
|
+
|
|
895
|
+
def hampel_filter(vals, k='14d', t0=3):
|
|
896
|
+
'''
|
|
897
|
+
Hampel outlier filter primarily applied on daily normalized data but broadly
|
|
898
|
+
applicable.
|
|
899
|
+
Parameters
|
|
900
|
+
----------
|
|
901
|
+
vals : pandas.Series
|
|
902
|
+
daily normalized time series
|
|
903
|
+
k : int or time offset string e.g. 'd', default 14d
|
|
904
|
+
size of window including the sample; 14d is equal to 7 days on either
|
|
905
|
+
side of value
|
|
906
|
+
t0 : int, default 3
|
|
907
|
+
Threshold value, defaults to 3 sigma Pearson's rule.
|
|
908
|
+
Returns
|
|
909
|
+
-------
|
|
910
|
+
pandas.Series
|
|
911
|
+
Boolean Series of whether the given measurement is within 3 sigma of the
|
|
912
|
+
median. False points indicate outliers to be removed.
|
|
913
|
+
'''
|
|
914
|
+
# Hampel Filter
|
|
915
|
+
L = 1.4826
|
|
916
|
+
rolling_median = vals.rolling(k, center=True, min_periods=1).median()
|
|
917
|
+
difference = np.abs(rolling_median-vals)
|
|
918
|
+
median_abs_deviation = difference.rolling(k, center=True, min_periods=1).median()
|
|
919
|
+
threshold = t0 * L * median_abs_deviation
|
|
920
|
+
return difference <= threshold
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
def _tukey_fence(series, k=1.5):
|
|
924
|
+
'Calculates the upper and lower tukey fences from a pandas series'
|
|
925
|
+
p25 = series.quantile(0.25)
|
|
926
|
+
p75 = series.quantile(0.75)
|
|
927
|
+
iqr = p75 - p25
|
|
928
|
+
upper_fence = k*iqr + p75
|
|
929
|
+
lower_fence = p25 - 1.5*iqr
|
|
930
|
+
return lower_fence, upper_fence
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def directional_tukey_filter(series, roll_period=pd.to_timedelta('7 Days'), k=1.5):
|
|
934
|
+
'''
|
|
935
|
+
Performs a forward and backward looking rolling tukey filter. Points must only
|
|
936
|
+
pass one of either the forward or backward looking filters to be kept
|
|
937
|
+
'''
|
|
938
|
+
backward_median = series.rolling(roll_period, min_periods=5, closed='both').median()
|
|
939
|
+
forward_median = series.loc[::-1].rolling(roll_period, min_periods=5, closed='both').median()
|
|
940
|
+
backward_dif = series - backward_median
|
|
941
|
+
forward_dif = series - forward_median
|
|
942
|
+
|
|
943
|
+
backward_dif_lower, backward_dif_upper = _tukey_fence(backward_dif, k)
|
|
944
|
+
forward_dif_lower, forward_dif_upper = _tukey_fence(forward_dif, k)
|
|
945
|
+
|
|
946
|
+
mask = (
|
|
947
|
+
((forward_dif > forward_dif_lower) & (forward_dif < forward_dif_upper)) |
|
|
948
|
+
((backward_dif > backward_dif_lower) & (backward_dif < backward_dif_upper))
|
|
949
|
+
)
|
|
950
|
+
return mask
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def hour_angle_filter(series, lat, lon, min_hour_angle=-30, max_hour_angle=30):
|
|
954
|
+
'''
|
|
955
|
+
Creates a filter based on the hour angle of the sun (15 degrees per hour)
|
|
956
|
+
'''
|
|
957
|
+
|
|
958
|
+
times = series.index
|
|
959
|
+
spa = pvlib.solarposition.get_solarposition(times, lat, lon)
|
|
960
|
+
eot = spa['equation_of_time']
|
|
961
|
+
hour_angle = pvlib.solarposition.hour_angle(times, lon, eot)
|
|
962
|
+
hour_angle = pd.Series(hour_angle, index=times)
|
|
963
|
+
mask = (hour_angle >= min_hour_angle) & (hour_angle <= max_hour_angle)
|
|
964
|
+
|
|
965
|
+
return mask
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|