captest 0.11.2__py2.py3-none-any.whl → 0.13.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
captest/capdata.py CHANGED
@@ -40,7 +40,7 @@ from bokeh.io import show
40
40
  from bokeh.plotting import figure
41
41
  from bokeh.palettes import Category10
42
42
  from bokeh.layouts import gridplot
43
- from bokeh.models import Legend, HoverTool, ColumnDataSource
43
+ from bokeh.models import Legend, HoverTool, ColumnDataSource, NumeralTickFormatter
44
44
 
45
45
  import param
46
46
 
@@ -76,8 +76,9 @@ if xlsx_spec is None:
76
76
  pvlib_spec = importlib.util.find_spec('pvlib')
77
77
  if pvlib_spec is not None:
78
78
  from pvlib.location import Location
79
- from pvlib.pvsystem import PVSystem
80
- from pvlib.tracking import SingleAxisTracker
79
+ from pvlib.pvsystem import (
80
+ PVSystem, Array, FixedMount, SingleAxisTrackerMount
81
+ )
81
82
  from pvlib.pvsystem import retrieve_sam
82
83
  from pvlib.modelchain import ModelChain
83
84
  from pvlib.clearsky import detect_clearsky
@@ -86,6 +87,7 @@ else:
86
87
  'pvlib package.')
87
88
 
88
89
  from captest import util
90
+ from captest import plotting
89
91
 
90
92
  plot_colors_brewer = {'real_pwr': ['#2b8cbe', '#7bccc4', '#bae4bc', '#f0f9e8'],
91
93
  'irr_poa': ['#e31a1c', '#fd8d3c', '#fecc5c', '#ffffb2'],
@@ -422,7 +424,34 @@ def check_all_perc_diff_comb(series, perc_diff):
422
424
  return all([perc_difference(x, y) < perc_diff for x, y in c])
423
425
 
424
426
 
425
- def sensor_filter(df, perc_diff):
427
+ def abs_diff_from_average(series, threshold):
428
+ """Check each value in series <= average of other values.
429
+
430
+ Drops NaNs from series before calculating difference from average for each value.
431
+
432
+ Returns True if there is only one value in the series.
433
+
434
+ Parameters
435
+ ----------
436
+ series : pd.Series
437
+ Pandas series of values to check.
438
+ threshold : numeric
439
+ Threshold value for absolute difference from average.
440
+
441
+ Returns
442
+ -------
443
+ bool
444
+ """
445
+ series = series.dropna()
446
+ if len(series) == 1:
447
+ return True
448
+ abs_diffs = []
449
+ for i, val in enumerate(series):
450
+ abs_diffs.append(abs(val - series.drop(series.index[i]).mean()) <= threshold)
451
+ return all(abs_diffs)
452
+
453
+
454
+ def sensor_filter(df, threshold, row_filter=check_all_perc_diff_comb):
426
455
  """
427
456
  Check dataframe for rows with inconsistent values.
428
457
 
@@ -435,8 +464,7 @@ def sensor_filter(df, perc_diff):
435
464
  Percent difference as decimal.
436
465
  """
437
466
  if df.shape[1] >= 2:
438
- bool_ser = df.apply(check_all_perc_diff_comb, perc_diff=perc_diff,
439
- axis=1)
467
+ bool_ser = df.apply(row_filter, args=(threshold, ), axis=1)
440
468
  return df[bool_ser].index
441
469
  elif df.shape[1] == 1:
442
470
  return df.index
@@ -881,17 +909,18 @@ def pvlib_location(loc):
881
909
 
882
910
  def pvlib_system(sys):
883
911
  """
884
- Create a pvlib PVSystem or SingleAxisTracker object.
912
+ Create a pvlib :py:class:`~pvlib.pvsystem.PVSystem` object.
885
913
 
886
- A SingleAxisTracker object is created if any of the keyword arguments for
887
- initiating a SingleAxisTracker object are found in the keys of the passed
888
- dictionary.
914
+ The :py:class:`~pvlib.pvsystem.PVSystem` will have either a
915
+ :py:class:`~pvlib.pvsystem.FixedMount` or a
916
+ :py:class:`~pvlib.pvsystem.SingleAxisTrackerMount` depending on
917
+ the keys of the passed dictionary.
889
918
 
890
919
  Parameters
891
920
  ----------
892
921
  sys : dict
893
- Dictionary of keywords required to create a pvlib SingleAxisTracker
894
- or PVSystem.
922
+ Dictionary of keywords required to create a pvlib
923
+ ``SingleAxisTrackerMount`` or ``FixedMount``, plus ``albedo``.
895
924
 
896
925
  Example dictionaries:
897
926
 
@@ -904,27 +933,25 @@ def pvlib_system(sys):
904
933
  'gcr': 0.2, 'albedo': 0.2}
905
934
 
906
935
  Refer to pvlib documentation for details.
907
- https://pvlib-python.readthedocs.io/en/latest/generated/pvlib.pvsystem.PVSystem.html
908
- https://pvlib-python.readthedocs.io/en/latest/generated/pvlib.tracking.SingleAxisTracker.html
909
936
 
910
937
  Returns
911
938
  -------
912
- pvlib PVSystem or SingleAxisTracker object.
939
+ pvlib PVSystem object.
913
940
  """
914
941
  sandia_modules = retrieve_sam('SandiaMod')
915
942
  cec_inverters = retrieve_sam('cecinverter')
916
943
  sandia_module = sandia_modules.iloc[:, 0]
917
944
  cec_inverter = cec_inverters.iloc[:, 0]
918
945
 
946
+ albedo = sys.pop('albedo', None)
919
947
  trck_kwords = ['axis_tilt', 'axis_azimuth', 'max_angle', 'backtrack', 'gcr'] # noqa: E501
920
948
  if any(kword in sys.keys() for kword in trck_kwords):
921
- system = SingleAxisTracker(**sys,
922
- module_parameters=sandia_module,
923
- inverter_parameters=cec_inverter)
949
+ mount = SingleAxisTrackerMount(**sys)
924
950
  else:
925
- system = PVSystem(**sys,
926
- module_parameters=sandia_module,
927
- inverter_parameters=cec_inverter)
951
+ mount = FixedMount(**sys)
952
+ array = Array(mount, albedo=albedo, module_parameters=sandia_module,
953
+ temperature_model_parameters={'u_c': 29.0, 'u_v': 0.0})
954
+ system = PVSystem(arrays=[array], inverter_parameters=cec_inverter)
928
955
 
929
956
  return system
930
957
 
@@ -1002,8 +1029,9 @@ def csky(time_source, loc=None, sys=None, concat=True, output='both'):
1002
1029
  pytz.timezone objects will be converted to strings.
1003
1030
  ints and floats must be in hours from UTC.
1004
1031
  sys : dict
1005
- Dictionary of keywords required to create a pvlib SingleAxisTracker
1006
- or PVSystem.
1032
+ Dictionary of keywords required to create a pvlib
1033
+ :py:class:`~pvlib.pvsystem.SingleAxisTrackerMount` or
1034
+ :py:class:`~pvlib.pvsystem.FixedMount`.
1007
1035
 
1008
1036
  Example dictionaries:
1009
1037
 
@@ -1016,8 +1044,6 @@ def csky(time_source, loc=None, sys=None, concat=True, output='both'):
1016
1044
  'gcr': 0.2, 'albedo': 0.2}
1017
1045
 
1018
1046
  Refer to pvlib documentation for details.
1019
- https://pvlib-python.readthedocs.io/en/latest/generated/pvlib.pvsystem.PVSystem.html
1020
- https://pvlib-python.readthedocs.io/en/latest/generated/pvlib.tracking.SingleAxisTracker.html
1021
1047
  concat : bool, default True
1022
1048
  If concat is True then returns columns as defined by return argument
1023
1049
  added to passed dataframe, otherwise returns just clear sky data.
@@ -1038,14 +1064,17 @@ def csky(time_source, loc=None, sys=None, concat=True, output='both'):
1038
1064
  'poa_ground_diffuse']
1039
1065
 
1040
1066
  if output == 'both':
1041
- csky_df = pd.DataFrame({'poa_mod_csky': mc.total_irrad['poa_global'],
1042
- 'ghi_mod_csky': ghi['ghi']})
1067
+ csky_df = pd.DataFrame({
1068
+ 'poa_mod_csky': mc.results.total_irrad['poa_global'],
1069
+ 'ghi_mod_csky': ghi['ghi']
1070
+ })
1043
1071
  if output == 'poa_all':
1044
- csky_df = mc.total_irrad[cols]
1072
+ csky_df = mc.results.total_irrad[cols]
1045
1073
  if output == 'ghi_all':
1046
1074
  csky_df = ghi[['ghi', 'dni', 'dhi']]
1047
1075
  if output == 'all':
1048
- csky_df = pd.concat([mc.total_irrad[cols], ghi[['ghi', 'dni', 'dhi']]],
1076
+ csky_df = pd.concat([mc.results.total_irrad[cols],
1077
+ ghi[['ghi', 'dni', 'dhi']]],
1049
1078
  axis=1)
1050
1079
 
1051
1080
  ix_no_tz = csky_df.index.tz_localize(None, ambiguous='infer',
@@ -1121,7 +1150,7 @@ def determine_pass_or_fail(cap_ratio, tolerance, nameplate):
1121
1150
  Limits for passing and failing test.
1122
1151
  """
1123
1152
  sign = tolerance.split(sep=' ')[0]
1124
- error = int(tolerance.split(sep=' ')[1]) / 100
1153
+ error = float(tolerance.split(sep=' ')[1]) / 100
1125
1154
 
1126
1155
  nameplate_plus_error = nameplate * (1 + error)
1127
1156
  nameplate_minus_error = nameplate * (1 - error)
@@ -1361,17 +1390,60 @@ def overlay_scatters(measured, expected, expected_label='PVsyst'):
1361
1390
 
1362
1391
 
1363
1392
  def index_capdata(capdata, label, filtered=True):
1393
+ """
1394
+ Like Dataframe.loc but for CapData objects.
1395
+
1396
+ Pass a single label or list of labels to select the columns from the `data` or
1397
+ `data_filtered` DataFrames. The label can be a column name, a column group key, or
1398
+ a regression column key.
1399
+
1400
+ The special label `regcols` will return the columns identified in `regression_cols`.
1401
+
1402
+ Parameters
1403
+ ----------
1404
+ capdata : CapData
1405
+ The CapData object to select from.
1406
+ label : str or list
1407
+ The label or list of labels to select from the `data` or `data_filtered`
1408
+ DataFrames. The label can be a column name, a column group key, or a
1409
+ regression column key. The special label `regcols` will return the columns
1410
+ identified in `regression_cols`.
1411
+ filtered : bool, default True
1412
+ By default the method will return columns from the `data_filtered` DataFrame.
1413
+ Set to False to return columns from the `data` DataFrame.
1414
+
1415
+ Returns
1416
+ --------
1417
+ DataFrame
1418
+ """
1364
1419
  if filtered:
1365
1420
  data = capdata.data_filtered
1366
1421
  else:
1367
1422
  data = capdata.data
1423
+ if label == 'regcols':
1424
+ label = list(capdata.regression_cols.values())
1368
1425
  if isinstance(label, str):
1369
1426
  if label in capdata.column_groups.keys():
1370
- return data[capdata.column_groups[label]]
1427
+ selected_data = data[capdata.column_groups[label]]
1371
1428
  elif label in capdata.regression_cols.keys():
1372
- return data[capdata.column_groups[capdata.regression_cols[label]]]
1429
+ col_or_grp = capdata.regression_cols[label]
1430
+ if col_or_grp in capdata.column_groups.keys():
1431
+ selected_data = data[capdata.column_groups[col_or_grp]]
1432
+ elif col_or_grp in data.columns:
1433
+ selected_data = data[col_or_grp]
1434
+ else:
1435
+ warnings.warn(
1436
+ 'Group or column "{}" mapped to the "{}" key of regression_cols '
1437
+ 'not found in column_groups keys or columns of CapData.data'.format(
1438
+ col_or_grp, label
1439
+ )
1440
+ )
1373
1441
  elif label in data.columns:
1374
- return data.loc[:, label]
1442
+ selected_data = data.loc[:, label]
1443
+ if isinstance(selected_data, pd.Series):
1444
+ return selected_data.to_frame()
1445
+ else:
1446
+ return selected_data
1375
1447
  elif isinstance(label, list):
1376
1448
  cols_to_return = []
1377
1449
  for l in label:
@@ -1418,15 +1490,15 @@ class FilteredLocIndexer(object):
1418
1490
 
1419
1491
  class CapData(object):
1420
1492
  """
1421
- Class to store capacity test data and translation of column names.
1493
+ Class to store capacity test data and column grouping.
1422
1494
 
1423
1495
  CapData objects store a pandas dataframe of measured or simulated data
1424
- and a dictionary used grouping columns by type of measurement.
1496
+ and a dictionary grouping columns by type of measurement.
1425
1497
 
1426
1498
  The `column_groups` dictionary allows maintaining the original column names
1427
1499
  while also grouping measurements of the same type from different
1428
1500
  sensors. Many of the methods for plotting and filtering data rely on the
1429
- column groupings to streamline user interaction.
1501
+ column groupings.
1430
1502
 
1431
1503
  Parameters
1432
1504
  ----------
@@ -1444,18 +1516,11 @@ class CapData(object):
1444
1516
  `group_columns` creates an abbreviated name and a list of columns that
1445
1517
  contain measurements of that type. The abbreviated names are the keys
1446
1518
  and the corresponding values are the lists of columns.
1447
- trans_keys : list
1448
- Simply a list of the `column_groups` keys.
1449
1519
  regression_cols : dictionary
1450
1520
  Dictionary identifying which columns in `data` or groups of columns as
1451
1521
  identified by the keys of `column_groups` are the independent variables
1452
1522
  of the ASTM Capacity test regression equation. Set using
1453
1523
  `set_regression_cols` or by directly assigning a dictionary.
1454
- trans_abrev : dictionary
1455
- Enumerated translation dict keys mapped to original column names.
1456
- Enumerated translation dict keys are used in plot hover tooltip.
1457
- col_colors : dictionary
1458
- Original column names mapped to a color for use in plot function.
1459
1524
  summary_ix : list of tuples
1460
1525
  Holds the row index data modified by the update_summary decorator
1461
1526
  function.
@@ -1480,10 +1545,7 @@ class CapData(object):
1480
1545
  self.data = pd.DataFrame()
1481
1546
  self.data_filtered = None
1482
1547
  self.column_groups = {}
1483
- self.trans_keys = []
1484
1548
  self.regression_cols = {}
1485
- self.trans_abrev = {}
1486
- self.col_colors = {}
1487
1549
  self.summary_ix = []
1488
1550
  self.summary = []
1489
1551
  self.removed = []
@@ -1491,8 +1553,9 @@ class CapData(object):
1491
1553
  self.filter_counts = {}
1492
1554
  self.rc = None
1493
1555
  self.regression_results = None
1494
- self.regression_formula = ('power ~ poa + I(poa * poa)'
1495
- '+ I(poa * t_amb) + I(poa * w_vel) - 1')
1556
+ self.regression_formula = (
1557
+ 'power ~ poa + I(poa * poa) + I(poa * t_amb) + I(poa * w_vel) - 1'
1558
+ )
1496
1559
  self.tolerance = None
1497
1560
  self.pre_agg_cols = None
1498
1561
  self.pre_agg_trans = None
@@ -1533,51 +1596,22 @@ class CapData(object):
1533
1596
  cd_c.data = self.data.copy()
1534
1597
  cd_c.data_filtered = self.data_filtered.copy()
1535
1598
  cd_c.column_groups = copy.copy(self.column_groups)
1536
- cd_c.trans_keys = copy.copy(self.trans_keys)
1537
1599
  cd_c.regression_cols = copy.copy(self.regression_cols)
1538
- cd_c.trans_abrev = copy.copy(self.trans_abrev)
1539
- cd_c.col_colors = copy.copy(self.col_colors)
1540
- cd_c.col_colors = copy.copy(self.col_colors)
1541
1600
  cd_c.summary_ix = copy.copy(self.summary_ix)
1542
1601
  cd_c.summary = copy.copy(self.summary)
1543
1602
  cd_c.rc = copy.copy(self.rc)
1544
1603
  cd_c.regression_results = copy.deepcopy(self.regression_results)
1545
1604
  cd_c.regression_formula = copy.copy(self.regression_formula)
1605
+ cd_c.pre_agg_cols = copy.copy(self.pre_agg_cols)
1606
+ cd_c.pre_agg_trans = copy.deepcopy(self.pre_agg_trans)
1607
+ cd_c.pre_agg_reg_trans = copy.deepcopy(self.pre_agg_reg_trans)
1546
1608
  return cd_c
1547
1609
 
1548
1610
  def empty(self):
1549
1611
  """Return a boolean indicating if the CapData object contains data."""
1550
- tests_indicating_empty = [self.data.empty, len(self.trans_keys) == 0,
1551
- len(self.column_groups) == 0]
1612
+ tests_indicating_empty = [self.data.empty, len(self.column_groups) == 0]
1552
1613
  return all(tests_indicating_empty)
1553
1614
 
1554
- def set_plot_attributes(self):
1555
- """Set column colors used in plot method."""
1556
- # dframe = self.data
1557
-
1558
- group_id_regex = {
1559
- 'real_pwr': re.compile(r'real_pwr|pwr|meter_power|active_pwr|active_power', re.IGNORECASE),
1560
- 'irr_poa': re.compile(r'poa|irr_poa|poa_irr', re.IGNORECASE),
1561
- 'irr_ghi': re.compile(r'ghi|irr_ghi|ghi_irr', re.IGNORECASE),
1562
- 'temp_amb': re.compile(r'amb|temp.*amb', re.IGNORECASE),
1563
- 'temp_mod': re.compile(r'bom|temp.*bom|module.*temp.*|temp.*mod.*', re.IGNORECASE),
1564
- 'wind': re.compile(r'wind|w_vel|wspd|wind__', re.IGNORECASE),
1565
- }
1566
-
1567
- for group_id, cols_in_group in self.column_groups.items():
1568
- col_key = None
1569
- for plot_colors_group_key, regex in group_id_regex.items():
1570
- if regex.match(group_id):
1571
- col_key = plot_colors_group_key
1572
- break
1573
- for i, col in enumerate(cols_in_group):
1574
- try:
1575
- j = i % 4
1576
- self.col_colors[col] = plot_colors_brewer[col_key][j]
1577
- except KeyError:
1578
- j = i % 256
1579
- self.col_colors[col] = cc.glasbey_dark[j]
1580
-
1581
1615
  def drop_cols(self, columns):
1582
1616
  """
1583
1617
  Drop columns from CapData `data` and `column_groups`.
@@ -1620,7 +1654,10 @@ class CapData(object):
1620
1654
  """
1621
1655
  if reg_vars is None:
1622
1656
  reg_vars = list(self.regression_cols.keys())
1623
- df = self.rview(reg_vars, filtered_data=filtered_data).copy()
1657
+ if filtered_data:
1658
+ df = self.floc[reg_vars].copy()
1659
+ else:
1660
+ df = self.loc[reg_vars].copy()
1624
1661
  rename = {df.columns[0]: reg_vars}
1625
1662
 
1626
1663
  if isinstance(reg_vars, list):
@@ -1638,79 +1675,6 @@ class CapData(object):
1638
1675
  df.rename(columns=rename, inplace=True)
1639
1676
  return df
1640
1677
 
1641
- def view(self, tkey, filtered_data=False):
1642
- """
1643
- Convience function returns columns using `column_groups` names.
1644
-
1645
- Parameters
1646
- ----------
1647
- tkey: int or str or list of int or strs
1648
- String or list of strings from self.trans_keys or int postion or
1649
- list of int postitions of value in self.trans_keys.
1650
- """
1651
- if isinstance(tkey, int):
1652
- keys = self.column_groups[self.trans_keys[tkey]]
1653
- elif isinstance(tkey, list) and len(tkey) > 1:
1654
- keys = []
1655
- for key in tkey:
1656
- if isinstance(key, str):
1657
- keys.extend(self.column_groups[key])
1658
- elif isinstance(key, int):
1659
- keys.extend(self.column_groups[self.trans_keys[key]])
1660
- elif tkey in self.trans_keys:
1661
- keys = self.column_groups[tkey]
1662
-
1663
- if filtered_data:
1664
- return self.data_filtered[keys]
1665
- else:
1666
- return self.data[keys]
1667
-
1668
- def rview(self, ind_var, filtered_data=False):
1669
- """
1670
- Convience fucntion to return regression independent variable.
1671
-
1672
- Parameters
1673
- ----------
1674
- ind_var: string or list of strings
1675
- may be 'power', 'poa', 't_amb', 'w_vel', a list of some subset of
1676
- the previous four strings or 'all'
1677
- """
1678
- if ind_var == 'all':
1679
- keys = list(self.regression_cols.values())
1680
- elif isinstance(ind_var, list) and len(ind_var) > 1:
1681
- keys = [self.regression_cols[key] for key in ind_var]
1682
- elif ind_var in met_keys:
1683
- ind_var = [ind_var]
1684
- keys = [self.regression_cols[key] for key in ind_var]
1685
-
1686
- lst = []
1687
- for key in keys:
1688
- if key in self.data.columns:
1689
- lst.extend([key])
1690
- else:
1691
- lst.extend(self.column_groups[key])
1692
- if filtered_data:
1693
- return self.data_filtered[lst]
1694
- else:
1695
- return self.data[lst]
1696
-
1697
- def __comb_trans_keys(self, grp):
1698
- comb_keys = []
1699
-
1700
- for key in self.trans_keys:
1701
- if key.find(grp) != -1:
1702
- comb_keys.append(key)
1703
-
1704
- cols = []
1705
- for key in comb_keys:
1706
- cols.extend(self.column_groups[key])
1707
-
1708
- grp_comb = grp + '_comb'
1709
- if grp_comb not in self.trans_keys:
1710
- self.column_groups[grp_comb] = cols
1711
- self.trans_keys.extend([grp_comb])
1712
- print('Added new group: ' + grp_comb)
1713
-
1714
1678
  def review_column_groups(self):
1715
1679
  """Print `column_groups` with nice formatting."""
1716
1680
  if len(self.column_groups) == 0:
@@ -1741,9 +1705,9 @@ class CapData(object):
1741
1705
  Plots filtered data when true and all data when false.
1742
1706
  """
1743
1707
  if filtered:
1744
- df = self.rview(['power', 'poa'], filtered_data=True)
1708
+ df = self.floc[['power', 'poa']]
1745
1709
  else:
1746
- df = self.rview(['power', 'poa'], filtered_data=False)
1710
+ df = self.loc[['power', 'poa']]
1747
1711
 
1748
1712
  if df.shape[1] != 2:
1749
1713
  return warnings.warn('Aggregate sensors before using this '
@@ -1780,175 +1744,109 @@ class CapData(object):
1780
1744
  vdims = ['power', 'index']
1781
1745
  if all_reg_columns:
1782
1746
  vdims.extend(list(df.columns.difference(vdims)))
1747
+ hover = HoverTool(
1748
+ tooltips=[
1749
+ ('datetime', '@index{%Y-%m-%d %H:%M}'),
1750
+ ('poa', '@poa{0,0.0}'),
1751
+ ('power', '@power{0,0.0}'),
1752
+ ],
1753
+ formatters={
1754
+ '@index': 'datetime',
1755
+ }
1756
+ )
1783
1757
  poa_vs_kw = hv.Scatter(df, 'poa', vdims).opts(
1784
1758
  size=5,
1785
- tools=['hover', 'lasso_select', 'box_select'],
1759
+ tools=[hover, 'lasso_select', 'box_select'],
1786
1760
  legend_position='right',
1787
1761
  height=400,
1788
1762
  width=400,
1763
+ selection_fill_color='red',
1764
+ selection_line_color='red',
1765
+ yformatter=NumeralTickFormatter(format='0,0'),
1789
1766
  )
1790
1767
  # layout_scatter = (poa_vs_kw).opts(opt_dict)
1791
1768
  if timeseries:
1792
- poa_vs_time = hv.Curve(df, 'index', ['power', 'poa']).opts(
1793
- tools=['hover', 'lasso_select', 'box_select'],
1769
+ power_vs_time = hv.Scatter(df, 'index', ['power', 'poa']).opts(
1770
+ tools=[hover, 'lasso_select', 'box_select'],
1771
+ height=400,
1772
+ width=800,
1773
+ selection_fill_color='red',
1774
+ selection_line_color='red',
1775
+ )
1776
+ power_col, poa_col = self.loc[['power', 'poa']].columns
1777
+ power_vs_time_underlay = hv.Curve(
1778
+ self.data.rename_axis('index', axis='index'),
1779
+ 'index',
1780
+ [power_col, poa_col],
1781
+ ).opts(
1782
+ tools=['lasso_select', 'box_select'],
1794
1783
  height=400,
1795
1784
  width=800,
1785
+ line_color='gray',
1786
+ line_width=1,
1787
+ line_alpha=0.4,
1788
+ yformatter=NumeralTickFormatter(format='0,0'),
1796
1789
  )
1797
- layout_timeseries = (poa_vs_kw + poa_vs_time)
1798
- DataLink(poa_vs_kw, poa_vs_time)
1790
+ layout_timeseries = (poa_vs_kw + power_vs_time * power_vs_time_underlay)
1791
+ DataLink(poa_vs_kw, power_vs_time)
1799
1792
  return(layout_timeseries.cols(1))
1800
1793
  else:
1801
1794
  return(poa_vs_kw)
1802
1795
 
1803
- def plot(self, marker='line', ncols=2, width=400, height=350,
1804
- legends=False, merge_grps=['irr', 'temp'], subset=None,
1805
- filtered=False, use_abrev_name=True, **kwargs):
1806
- """
1807
- Create a plot for each group of sensors in self.column_groups.
1808
-
1809
- Function returns a Bokeh grid of figures. A figure is generated for
1810
- each type of measurement identified by the keys in `column_groups` and
1811
- a line is plotted on the figure for each column of measurements of
1812
- that type.
1813
-
1814
- For example, if there are multiple plane of array irradiance sensors,
1815
- the data from each one will be plotted on a single figure.
1816
-
1817
- Figures are not generated for categories that would plot more than 10
1818
- lines.
1796
+ def plot(
1797
+ self,
1798
+ combine=plotting.COMBINE,
1799
+ default_groups=plotting.DEFAULT_GROUPS,
1800
+ width=1500,
1801
+ height=250,
1802
+ **kwargs,
1803
+ ):
1804
+ """
1805
+ Create a dashboard to explore timeseries plots of the data.
1806
+
1807
+ The dashboard contains three tabs: Groups, Layout, and Overlay. The first tab,
1808
+ Groups, presents a column of plots with a separate plot overlaying the measurements
1809
+ for each group of the `column_groups`. The groups plotted are defined by the
1810
+ `default_groups` argument.
1811
+
1812
+ The second tab, Layout, allows manually selecting groups to plot. The button
1813
+ on this tab can be used to replace the column of plots on the Groups tab with
1814
+ the current figure on the Layout tab. Rerun this method after clicking the button
1815
+ to see the new plots in the Groups tab.
1816
+
1817
+ The third tab, Overlay, allows picking a group or any combination of individual
1818
+ tags to overlay on a single plot. The list of groups and tags can be filtered
1819
+ using regular expressions. Adding a text id in the box and clicking Update will
1820
+ add the current overlay to the list of groups on the Layout tab.
1819
1821
 
1820
1822
  Parameters
1821
1823
  ----------
1822
- marker : str, default 'line'
1823
- Accepts 'line', 'circle', 'line-circle'. These are bokeh marker
1824
- options.
1825
- ncols : int, default 2
1826
- Number of columns in the bokeh gridplot.
1827
- width : int, default 400
1828
- Width of individual plots in gridplot.
1829
- height: int, default 350
1830
- Height of individual plots in gridplot.
1831
- legends : bool, default False
1832
- Turn on or off legends for individual plots.
1833
- merge_grps : list, default ['irr', 'temp']
1834
- List of strings to search for in the `column_groups` keys.
1835
- A new entry is added to `column_groups` with keys following the
1836
- format 'search str_comb' and the value is a list of column names
1837
- that contain the search string. The default will combine all
1838
- irradiance measurements into a group and temperature measurements
1839
- into a group.
1840
-
1841
- Pass an empty list to not merge any plots.
1842
-
1843
- Use 'irr-poa' and 'irr-ghi' to plot clear sky modeled with measured
1844
- data.
1845
- subset : list, default None
1846
- List of the keys of `column_groups` to control the order of to plot
1847
- only a subset of the plots or control the order of plots.
1848
- filtered : bool, default False
1849
- Set to true to plot the filtered data.
1850
- kwargs
1851
- Pass additional options to bokeh gridplot. Merge_tools=False will
1852
- shows the hover tool icon, so it can be turned off.
1824
+ combine : dict, optional
1825
+ Dictionary of group names and regex strings to use to identify groups from
1826
+ column groups and individual tags (columns) to combine into new groups. See the
1827
+ `parse_combine` function for more details.
1828
+ default_groups : list of str, optional
1829
+ List of regex strings to use to identify default groups to plot. See the
1830
+ `plotting.find_default_groups` function for more details.
1831
+ group_width : int, optional
1832
+ The width of the plots on the Groups tab.
1833
+ group_height : int, optional
1834
+ The height of the plots on the Groups tab.
1835
+ **kwargs : optional
1836
+ Additional keyword arguments are passed to the options of the scatter plot.
1853
1837
 
1854
1838
  Returns
1855
1839
  -------
1856
- show(grid)
1857
- Command to show grid of figures. Intended for use in jupyter
1858
- notebook.
1859
- """
1860
- for str_val in merge_grps:
1861
- self.__comb_trans_keys(str_val)
1862
-
1863
- if filtered:
1864
- dframe = self.data_filtered
1865
- else:
1866
- dframe = self.data
1867
- dframe.index.name = 'Timestamp'
1868
-
1869
- names_to_abrev = {val: key for key, val in self.trans_abrev.items()}
1870
-
1871
- plots = []
1872
- x_axis = None
1873
-
1874
- source = ColumnDataSource(dframe)
1875
-
1876
- hover = HoverTool()
1877
- hover.tooltips = [
1878
- ("Name", "$name"),
1879
- ("Datetime", "@Timestamp{%F %H:%M}"),
1880
- ("Value", "$y{0,0.00}"),
1881
- ]
1882
- hover.formatters = {"@Timestamp": "datetime"}
1883
-
1884
- tools = 'pan, xwheel_pan, xwheel_zoom, box_zoom, save, reset'
1885
-
1886
- if isinstance(subset, list):
1887
- plot_keys = subset
1888
- else:
1889
- plot_keys = self.trans_keys
1890
-
1891
- for j, key in enumerate(plot_keys):
1892
- df = dframe[self.column_groups[key]]
1893
- cols = df.columns.tolist()
1894
-
1895
- if x_axis is None:
1896
- p = figure(title=key, plot_width=width, plot_height=height,
1897
- x_axis_type='datetime', tools=tools)
1898
- p.tools.append(hover)
1899
- x_axis = p.x_range
1900
- if j > 0:
1901
- p = figure(title=key, plot_width=width, plot_height=height,
1902
- x_axis_type='datetime', x_range=x_axis, tools=tools)
1903
- p.tools.append(hover)
1904
- legend_items = []
1905
- for i, col in enumerate(cols):
1906
- if use_abrev_name:
1907
- name = names_to_abrev[col]
1908
- else:
1909
- name = col
1910
-
1911
- if col.find('csky') == -1:
1912
- line_dash = 'solid'
1913
- else:
1914
- line_dash = (5, 2)
1915
-
1916
- if marker == 'line':
1917
- try:
1918
- series = p.line('Timestamp', col, source=source,
1919
- line_color=self.col_colors[col],
1920
- line_dash=line_dash,
1921
- name=name)
1922
- except KeyError:
1923
- series = p.line('Timestamp', col, source=source,
1924
- line_dash=line_dash,
1925
- name=name)
1926
- elif marker == 'circle':
1927
- series = p.circle('Timestamp', col,
1928
- source=source,
1929
- line_color=self.col_colors[col],
1930
- size=2, fill_color="white",
1931
- name=name)
1932
- if marker == 'line-circle':
1933
- series = p.line('Timestamp', col, source=source,
1934
- line_color=self.col_colors[col],
1935
- name=name)
1936
- series = p.circle('Timestamp', col,
1937
- source=source,
1938
- line_color=self.col_colors[col],
1939
- size=2, fill_color="white",
1940
- name=name)
1941
- legend_items.append((col, [series, ]))
1942
-
1943
- legend = Legend(items=legend_items, location=(40, -5))
1944
- legend.label_text_font_size = '8pt'
1945
- if legends:
1946
- p.add_layout(legend, 'below')
1947
-
1948
- plots.append(p)
1949
-
1950
- grid = gridplot(plots, ncols=ncols, **kwargs)
1951
- return show(grid)
1840
+ Panel tabbed layout
1841
+ """
1842
+ return plotting.plot(
1843
+ self,
1844
+ combine=combine,
1845
+ default_groups=default_groups,
1846
+ group_width=width,
1847
+ group_height=height,
1848
+ **kwargs,
1849
+ )
1952
1850
 
1953
1851
  def scatter_filters(self):
1954
1852
  """
@@ -1961,7 +1859,7 @@ class CapData(object):
1961
1859
  scatters = []
1962
1860
 
1963
1861
  data = self.get_reg_cols(reg_vars=['power', 'poa'], filtered_data=False)
1964
- data['index'] = self.data.loc[:, 'index']
1862
+ data['index'] = self.data.index
1965
1863
  plt_no_filtering = hv.Scatter(data, 'poa', ['power', 'index']).relabel('all')
1966
1864
  scatters.append(plt_no_filtering)
1967
1865
 
@@ -1981,6 +1879,16 @@ class CapData(object):
1981
1879
  scatters.append(plt)
1982
1880
 
1983
1881
  scatter_overlay = hv.Overlay(scatters)
1882
+ hover = HoverTool(
1883
+ tooltips=[
1884
+ ('datetime', '@index{%Y-%m-%d %H:%M}'),
1885
+ ('poa', '@poa{0,0.0}'),
1886
+ ('power', '@power{0,0.0}'),
1887
+ ],
1888
+ formatters={
1889
+ '@index': 'datetime',
1890
+ }
1891
+ )
1984
1892
  scatter_overlay.opts(
1985
1893
  hv.opts.Scatter(
1986
1894
  size=5,
@@ -1989,7 +1897,8 @@ class CapData(object):
1989
1897
  muted_fill_alpha=0,
1990
1898
  fill_alpha=0.4,
1991
1899
  line_width=0,
1992
- tools=['hover'],
1900
+ tools=[hover],
1901
+ yformatter=NumeralTickFormatter(format='0,0'),
1993
1902
  ),
1994
1903
  hv.opts.Overlay(
1995
1904
  legend_position='right',
@@ -2009,8 +1918,8 @@ class CapData(object):
2009
1918
  plots = []
2010
1919
 
2011
1920
  data = self.get_reg_cols(reg_vars='power', filtered_data=False)
2012
- data.reset_index(inplace=True)
2013
- plt_no_filtering = hv.Curve(data, ['Timestamp'], ['power'], label='all')
1921
+ data['Timestamp'] = data.index
1922
+ plt_no_filtering = hv.Curve(data, ['Timestamp'], ['power'], label='all')
2014
1923
  plt_no_filtering.opts(
2015
1924
  line_color='black',
2016
1925
  line_width=1,
@@ -2019,10 +1928,10 @@ class CapData(object):
2019
1928
  )
2020
1929
  plots.append(plt_no_filtering)
2021
1930
 
2022
- d1 = self.rview('power').loc[self.removed[0]['index'], :]
1931
+ d1 = data.loc[self.removed[0]['index'], ['power', 'Timestamp']]
2023
1932
  plt_first_filter = hv.Scatter(
2024
- (d1.index, d1.iloc[:, 0]),
2025
- label=self.removed[0]['name'])
1933
+ d1, ['Timestamp'], ['power'], label=self.removed[0]['name']
1934
+ )
2026
1935
  plots.append(plt_first_filter)
2027
1936
 
2028
1937
  for i, filtering_step in enumerate(self.kept):
@@ -2030,18 +1939,30 @@ class CapData(object):
2030
1939
  break
2031
1940
  else:
2032
1941
  flt_legend = self.kept[i + 1]['name']
2033
- d_flt = self.rview('power').loc[filtering_step['index'], :]
2034
- plt = hv.Scatter((d_flt.index, d_flt.iloc[:, 0]), label=flt_legend)
1942
+ d_flt = data.loc[filtering_step['index'], :]
1943
+ plt = hv.Scatter(
1944
+ d_flt, ['Timestamp'], ['power'], label=flt_legend
1945
+ )
2035
1946
  plots.append(plt)
2036
1947
 
2037
1948
  scatter_overlay = hv.Overlay(plots)
1949
+ hover = HoverTool(
1950
+ tooltips=[
1951
+ ('datetime', '@Timestamp{%Y-%m-%d %H:%M}'),
1952
+ ('power', '@power{0,0.0}'),
1953
+ ],
1954
+ formatters={
1955
+ '@Timestamp': 'datetime',
1956
+ }
1957
+ )
2038
1958
  scatter_overlay.opts(
2039
1959
  hv.opts.Scatter(
2040
1960
  size=5,
2041
1961
  muted_fill_alpha=0,
2042
1962
  fill_alpha=1,
2043
1963
  line_width=0,
2044
- tools=['hover'],
1964
+ tools=[hover],
1965
+ yformatter=NumeralTickFormatter(format='0,0'),
2045
1966
  ),
2046
1967
  hv.opts.Overlay(
2047
1968
  legend_position='bottom',
@@ -2149,8 +2070,9 @@ class CapData(object):
2149
2070
  self.regression_cols['w_vel']: 'mean'}
2150
2071
 
2151
2072
  dfs_to_concat = []
2073
+ agg_names = {}
2152
2074
  for group_id, agg_func in agg_map.items():
2153
- columns_to_aggregate = self.view(group_id, filtered_data=False)
2075
+ columns_to_aggregate = self.loc[group_id]
2154
2076
  if columns_to_aggregate.shape[1] == 1:
2155
2077
  continue
2156
2078
  agg_result = columns_to_aggregate.agg(agg_func, axis=1).to_frame()
@@ -2160,23 +2082,23 @@ class CapData(object):
2160
2082
  col_name = group_id + '_' + agg_func.__name__ + '_agg'
2161
2083
  agg_result.rename(columns={agg_result.columns[0]: col_name}, inplace=True)
2162
2084
  dfs_to_concat.append(agg_result)
2085
+ agg_names[group_id] = col_name
2163
2086
 
2164
2087
  dfs_to_concat.append(self.data)
2165
2088
  # write over data and data_filtered attributes
2166
2089
  self.data = pd.concat(dfs_to_concat, axis=1)
2167
2090
  self.data_filtered = self.data.copy()
2168
2091
 
2169
- # update regression_cols attribute
2092
+ # update regression_cols attribute
2170
2093
  for reg_var, trans_group in self.regression_cols.items():
2171
- if self.rview(reg_var).shape[1] == 1:
2094
+ if self.loc[reg_var].shape[1] == 1:
2172
2095
  continue
2173
- if trans_group in agg_map.keys():
2174
- try:
2175
- agg_col = trans_group + '_' + agg_map[trans_group] + '_agg' # noqa: E501
2176
- except TypeError:
2177
- agg_col = trans_group + '_' + col_name + '_agg'
2178
- print(agg_col)
2179
- self.regression_cols[reg_var] = agg_col
2096
+ if trans_group in agg_names.keys():
2097
+ print(
2098
+ "Regression variable '{}' has been remapped: '{}' to '{}'"
2099
+ .format(reg_var, trans_group, agg_names[trans_group])
2100
+ )
2101
+ self.regression_cols[reg_var] = agg_names[trans_group]
2180
2102
 
2181
2103
  def data_columns_to_excel(self, sort_by_reversed_names=True):
2182
2104
  """
@@ -2485,7 +2407,7 @@ class CapData(object):
2485
2407
  Add option to return plot showing envelope with points not removed
2486
2408
  alpha decreased.
2487
2409
  """
2488
- XandY = self.rview(['poa', 'power'], filtered_data=True)
2410
+ XandY = self.floc[['poa', 'power']]
2489
2411
  if XandY.shape[1] > 2:
2490
2412
  return warnings.warn('Too many columns. Try running '
2491
2413
  'aggregate_sensors before using '
@@ -2528,7 +2450,7 @@ class CapData(object):
2528
2450
  Spec pf column
2529
2451
  Increase options to specify which columns are used in the filter.
2530
2452
  """
2531
- for key in self.trans_keys:
2453
+ for key in self.column_groups.keys():
2532
2454
  if key.find('pf') == 0:
2533
2455
  selection = key
2534
2456
 
@@ -2578,7 +2500,7 @@ class CapData(object):
2578
2500
  power_data = self.get_reg_cols('power')
2579
2501
  elif isinstance(columns, str):
2580
2502
  if columns in self.column_groups.keys():
2581
- power_data = self.view(columns, filtered_data=True)
2503
+ power_data = self.floc[columns]
2582
2504
  multiple_columns = True
2583
2505
  else:
2584
2506
  power_data = pd.DataFrame(self.data_filtered[columns])
@@ -2646,7 +2568,8 @@ class CapData(object):
2646
2568
  self.data_filtered = func(self.data_filtered, *args, **kwargs)
2647
2569
 
2648
2570
  @update_summary
2649
- def filter_sensors(self, perc_diff=None, inplace=True):
2571
+ def filter_sensors(
2572
+ self, perc_diff=None, inplace=True, row_filter=check_all_perc_diff_comb):
2650
2573
  """
2651
2574
  Drop suspicious measurments by comparing values from different sensors.
2652
2575
 
@@ -2682,16 +2605,18 @@ class CapData(object):
2682
2605
  poa_trans_key = regression_cols['poa']
2683
2606
  perc_diff = {poa_trans_key: 0.05}
2684
2607
 
2685
- for key, perc_diff_for_key in perc_diff.items():
2608
+ for key, threshold in perc_diff.items():
2686
2609
  if 'index' in locals():
2687
2610
  # if index has been assigned then take intersection
2688
2611
  sensors_df = df[trans[key]]
2689
- next_index = sensor_filter(sensors_df, perc_diff_for_key)
2612
+ next_index = sensor_filter(
2613
+ sensors_df, threshold, row_filter=row_filter)
2690
2614
  index = index.intersection(next_index) # noqa: F821
2691
2615
  else:
2692
2616
  # if index has not been assigned then assign it
2693
2617
  sensors_df = df[trans[key]]
2694
- index = sensor_filter(sensors_df, perc_diff_for_key)
2618
+ index = sensor_filter(
2619
+ sensors_df, threshold, row_filter=row_filter)
2695
2620
 
2696
2621
  df_out = self.data_filtered.loc[index, :]
2697
2622
 
@@ -2738,7 +2663,7 @@ class CapData(object):
2738
2663
  'load_data clear_sky option.')
2739
2664
  if ghi_col is None:
2740
2665
  ghi_keys = []
2741
- for key in self.trans_keys:
2666
+ for key in self.column_groups.keys():
2742
2667
  defs = key.split('-')
2743
2668
  if len(defs) == 1:
2744
2669
  continue
@@ -2753,7 +2678,7 @@ class CapData(object):
2753
2678
  else:
2754
2679
  meas_ghi = ghi_keys[0]
2755
2680
 
2756
- meas_ghi = self.view(meas_ghi, filtered_data=True)
2681
+ meas_ghi = self.floc[meas_ghi]
2757
2682
  if meas_ghi.shape[1] > 1:
2758
2683
  warnings.warn('Averaging measured GHI data. Pass column name '
2759
2684
  'to ghi_col to use a specific column.')
@@ -2952,8 +2877,7 @@ class CapData(object):
2952
2877
  pandas DataFrame
2953
2878
  If pred=True, then returns a pandas dataframe of results.
2954
2879
  """
2955
- df = self.rview(['poa', 't_amb', 'w_vel'],
2956
- filtered_data=True)
2880
+ df = self.floc[['poa', 't_amb', 'w_vel']]
2957
2881
  df = df.rename(columns={df.columns[0]: 'poa',
2958
2882
  df.columns[1]: 't_amb',
2959
2883
  df.columns[2]: 'w_vel'})
@@ -3041,8 +2965,7 @@ class CapData(object):
3041
2965
  See pandas Grouper doucmentation for details. Default is left
3042
2966
  labeled and left closed.
3043
2967
  """
3044
- df = self.rview(['poa', 't_amb', 'w_vel', 'power'],
3045
- filtered_data=True)
2968
+ df = self.floc[['poa', 't_amb', 'w_vel', 'power']]
3046
2969
  df = df.rename(columns={df.columns[0]: 'poa',
3047
2970
  df.columns[1]: 't_amb',
3048
2971
  df.columns[2]: 'w_vel',
@@ -3148,7 +3071,7 @@ class CapData(object):
3148
3071
  """
3149
3072
  spatial_uncerts = {}
3150
3073
  for group in column_groups:
3151
- df = self.view(group, filtered_data=True)
3074
+ df = self.floc[group]
3152
3075
  # prevent aggregation from updating column groups?
3153
3076
  # would not need the below line then
3154
3077
  df = df[[col for col in df.columns if 'agg' not in col]]
@@ -3313,6 +3236,19 @@ class CapData(object):
3313
3236
  """
3314
3237
  self.test_complete = self.data_filtered.shape[0] >= pts_required
3315
3238
 
3239
+ def column_groups_to_excel(self, save_to='./column_groups.xlsx'):
3240
+ """Export the column groups attribute to an excel file.
3241
+
3242
+ Parameters
3243
+ ----------
3244
+ save_to : str
3245
+ File path to save column groups to. Should include .xlsx.
3246
+ """
3247
+ pd.DataFrame.from_dict(
3248
+ self.column_groups.data, orient='index'
3249
+ ).stack().to_frame().droplevel(1).to_excel(save_to, header=False)
3250
+
3251
+
3316
3252
  if __name__ == "__main__":
3317
3253
  import doctest
3318
3254
  import pandas as pd # noqa F811