PyPI - captest - Versions diffs - 0.12.1__py2.py3-none-any.whl → 0.13.0__py2.py3-none-any.whl - Mend

captest 0.12.1py2.py3-none-any.whl → 0.13.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

captest/__init__.py +1 -0
captest/_version.py +3 -3
captest/capdata.py +232 -314
captest/io.py +65 -23
captest/plotting.py +492 -0
captest/util.py +13 -5
{captest-0.12.1.dist-info → captest-0.13.0.dist-info}/METADATA +1 -1
captest-0.13.0.dist-info/RECORD +13 -0
{captest-0.12.1.dist-info → captest-0.13.0.dist-info}/WHEEL +1 -1
captest-0.12.1.dist-info/RECORD +0 -12
{captest-0.12.1.dist-info → captest-0.13.0.dist-info}/LICENSE.txt +0 -0
{captest-0.12.1.dist-info → captest-0.13.0.dist-info}/top_level.txt +0 -0

captest/capdata.py CHANGED Viewed

@@ -40,7 +40,7 @@ from bokeh.io import show
 from bokeh.plotting import figure
 from bokeh.palettes import Category10
 from bokeh.layouts import gridplot
-from bokeh.models import Legend, HoverTool, ColumnDataSource
+from bokeh.models import Legend, HoverTool, ColumnDataSource, NumeralTickFormatter
 import param
@@ -87,6 +87,7 @@ else:
                   'pvlib package.')
 from captest import util
+from captest import plotting
 plot_colors_brewer = {'real_pwr': ['#2b8cbe', '#7bccc4', '#bae4bc', '#f0f9e8'],
                       'irr_poa': ['#e31a1c', '#fd8d3c', '#fecc5c', '#ffffb2'],
@@ -423,7 +424,34 @@ def check_all_perc_diff_comb(series, perc_diff):
     return all([perc_difference(x, y) < perc_diff for x, y in c])
-def sensor_filter(df, perc_diff):
+def abs_diff_from_average(series, threshold):
+    """Check each value in series <= average of other values.
+    Drops NaNs from series before calculating difference from average for each value.
+    Returns True if there is only one value in the series.
+    Parameters
+    ----------
+    series : pd.Series
+        Pandas series of values to check.
+    threshold : numeric
+        Threshold value for absolute difference from average.
+    Returns
+    -------
+    bool
+    """
+    series = series.dropna()
+    if len(series) == 1:
+        return True
+    abs_diffs = []
+    for i, val in enumerate(series):
+        abs_diffs.append(abs(val - series.drop(series.index[i]).mean()) <= threshold)
+    return all(abs_diffs)
+def sensor_filter(df, threshold, row_filter=check_all_perc_diff_comb):
     """
     Check dataframe for rows with inconsistent values.
@@ -436,8 +464,7 @@ def sensor_filter(df, perc_diff):
         Percent difference as decimal.
     """
     if df.shape[1] >= 2:
-        bool_ser = df.apply(check_all_perc_diff_comb, perc_diff=perc_diff,
-                            axis=1)
+        bool_ser = df.apply(row_filter, args=(threshold, ), axis=1)
         return df[bool_ser].index
     elif df.shape[1] == 1:
         return df.index
@@ -1123,7 +1150,7 @@ def determine_pass_or_fail(cap_ratio, tolerance, nameplate):
         Limits for passing and failing test.
     """
     sign = tolerance.split(sep=' ')[0]
-    error = int(tolerance.split(sep=' ')[1]) / 100
+    error = float(tolerance.split(sep=' ')[1]) / 100
     nameplate_plus_error = nameplate * (1 + error)
     nameplate_minus_error = nameplate * (1 - error)
@@ -1363,17 +1390,60 @@ def overlay_scatters(measured, expected, expected_label='PVsyst'):
 def index_capdata(capdata, label, filtered=True):
+    """
+    Like Dataframe.loc but for CapData objects.
+    Pass a single label or list of labels to select the columns from the `data` or
+    `data_filtered` DataFrames. The label can be a column name, a column group key, or
+    a regression column key.
+    The special label `regcols` will return the columns identified in `regression_cols`.
+    Parameters
+    ----------
+    capdata : CapData
+        The CapData object to select from.
+    label : str or list
+        The label or list of labels to select from the `data` or `data_filtered`
+        DataFrames. The label can be a column name, a column group key, or a
+        regression column key. The special label `regcols` will return the columns
+        identified in `regression_cols`.
+    filtered : bool, default True
+        By default the method will return columns from the `data_filtered` DataFrame.
+        Set to False to return columns from the `data` DataFrame.
+    Returns
+    --------
+    DataFrame
+    """
     if filtered:
         data = capdata.data_filtered
     else:
         data = capdata.data
+    if label == 'regcols':
+        label = list(capdata.regression_cols.values())
     if isinstance(label, str):
         if label in capdata.column_groups.keys():
-            return data[capdata.column_groups[label]]
+            selected_data = data[capdata.column_groups[label]]
         elif label in capdata.regression_cols.keys():
-            return data[capdata.column_groups[capdata.regression_cols[label]]]
+            col_or_grp = capdata.regression_cols[label]
+            if col_or_grp in capdata.column_groups.keys():
+                selected_data = data[capdata.column_groups[col_or_grp]]
+            elif col_or_grp in data.columns:
+                selected_data = data[col_or_grp]
+            else:
+                warnings.warn(
+                    'Group or column "{}" mapped to the "{}" key of regression_cols '
+                    'not found in column_groups keys or columns of CapData.data'.format(
+                        col_or_grp, label
+                    )
+                )
         elif label in data.columns:
-            return data.loc[:, label]
+            selected_data = data.loc[:, label]
+        if isinstance(selected_data, pd.Series):
+            return selected_data.to_frame()
+        else:
+            return selected_data
     elif isinstance(label, list):
         cols_to_return = []
         for l in label:
@@ -1420,15 +1490,15 @@ class FilteredLocIndexer(object):
 class CapData(object):
     """
-    Class to store capacity test data and translation of column names.
+    Class to store capacity test data and column grouping.
     CapData objects store a pandas dataframe of measured or simulated data
-    and a dictionary used grouping columns by type of measurement.
+    and a dictionary grouping columns by type of measurement.
     The `column_groups` dictionary allows maintaining the original column names
     while also grouping measurements of the same type from different
     sensors.  Many of the methods for plotting and filtering data rely on the
-    column groupings to streamline user interaction.
+    column groupings.
     Parameters
     ----------
@@ -1446,18 +1516,11 @@ class CapData(object):
         `group_columns` creates an abbreviated name and a list of columns that
         contain measurements of that type. The abbreviated names are the keys
         and the corresponding values are the lists of columns.
-    trans_keys : list
-        Simply a list of the `column_groups` keys.
     regression_cols : dictionary
         Dictionary identifying which columns in `data` or groups of columns as
         identified by the keys of `column_groups` are the independent variables
         of the ASTM Capacity test regression equation. Set using
         `set_regression_cols` or by directly assigning a dictionary.
-    trans_abrev : dictionary
-        Enumerated translation dict keys mapped to original column names.
-        Enumerated translation dict keys are used in plot hover tooltip.
-    col_colors : dictionary
-        Original column names mapped to a color for use in plot function.
     summary_ix : list of tuples
         Holds the row index data modified by the update_summary decorator
         function.
@@ -1482,10 +1545,7 @@ class CapData(object):
         self.data = pd.DataFrame()
         self.data_filtered = None
         self.column_groups = {}
-        self.trans_keys = []
         self.regression_cols = {}
-        self.trans_abrev = {}
-        self.col_colors = {}
         self.summary_ix = []
         self.summary = []
         self.removed = []
@@ -1493,8 +1553,9 @@ class CapData(object):
         self.filter_counts = {}
         self.rc = None
         self.regression_results = None
-        self.regression_formula = ('power ~ poa + I(poa * poa)'
-                                   '+ I(poa * t_amb) + I(poa * w_vel) - 1')
+        self.regression_formula = (
+            'power ~ poa + I(poa * poa) + I(poa * t_amb) + I(poa * w_vel) - 1'
+        )
         self.tolerance = None
         self.pre_agg_cols = None
         self.pre_agg_trans = None
@@ -1535,11 +1596,7 @@ class CapData(object):
         cd_c.data = self.data.copy()
         cd_c.data_filtered = self.data_filtered.copy()
         cd_c.column_groups = copy.copy(self.column_groups)
-        cd_c.trans_keys = copy.copy(self.trans_keys)
         cd_c.regression_cols = copy.copy(self.regression_cols)
-        cd_c.trans_abrev = copy.copy(self.trans_abrev)
-        cd_c.col_colors = copy.copy(self.col_colors)
-        cd_c.col_colors = copy.copy(self.col_colors)
         cd_c.summary_ix = copy.copy(self.summary_ix)
         cd_c.summary = copy.copy(self.summary)
         cd_c.rc = copy.copy(self.rc)
@@ -1552,37 +1609,9 @@ class CapData(object):
     def empty(self):
         """Return a boolean indicating if the CapData object contains data."""
-        tests_indicating_empty = [self.data.empty, len(self.trans_keys) == 0,
-                                  len(self.column_groups) == 0]
+        tests_indicating_empty = [self.data.empty, len(self.column_groups) == 0]
         return all(tests_indicating_empty)
-    def set_plot_attributes(self):
-        """Set column colors used in plot method."""
-        # dframe = self.data
-        group_id_regex = {
-            'real_pwr': re.compile(r'real_pwr|pwr|meter_power|active_pwr|active_power', re.IGNORECASE),
-            'irr_poa': re.compile(r'poa|irr_poa|poa_irr', re.IGNORECASE),
-            'irr_ghi': re.compile(r'ghi|irr_ghi|ghi_irr', re.IGNORECASE),
-            'temp_amb': re.compile(r'amb|temp.*amb', re.IGNORECASE),
-            'temp_mod': re.compile(r'bom|temp.*bom|module.*temp.*|temp.*mod.*', re.IGNORECASE),
-            'wind': re.compile(r'wind|w_vel|wspd|wind__', re.IGNORECASE),
-        }
-        for group_id, cols_in_group in self.column_groups.items():
-            col_key = None
-            for plot_colors_group_key, regex in group_id_regex.items():
-                if regex.match(group_id):
-                    col_key = plot_colors_group_key
-                    break
-            for i, col in enumerate(cols_in_group):
-                try:
-                    j = i % 4
-                    self.col_colors[col] = plot_colors_brewer[col_key][j]
-                except KeyError:
-                    j = i % 256
-                    self.col_colors[col] = cc.glasbey_dark[j]
     def drop_cols(self, columns):
         """
         Drop columns from CapData `data` and `column_groups`.
@@ -1625,7 +1654,10 @@ class CapData(object):
         """
         if reg_vars is None:
             reg_vars = list(self.regression_cols.keys())
-        df = self.rview(reg_vars, filtered_data=filtered_data).copy()
+        if filtered_data:
+            df = self.floc[reg_vars].copy()
+        else:
+            df = self.loc[reg_vars].copy()
         rename = {df.columns[0]: reg_vars}
         if isinstance(reg_vars, list):
@@ -1643,79 +1675,6 @@ class CapData(object):
         df.rename(columns=rename, inplace=True)
         return df
-    def view(self, tkey, filtered_data=False):
-        """
-        Convience function returns columns using `column_groups` names.
-        Parameters
-        ----------
-        tkey: int or str or list of int or strs
-            String or list of strings from self.trans_keys or int postion or
-            list of int postitions of value in self.trans_keys.
-        """
-        if isinstance(tkey, int):
-            keys = self.column_groups[self.trans_keys[tkey]]
-        elif isinstance(tkey, list) and len(tkey) > 1:
-            keys = []
-            for key in tkey:
-                if isinstance(key, str):
-                    keys.extend(self.column_groups[key])
-                elif isinstance(key, int):
-                    keys.extend(self.column_groups[self.trans_keys[key]])
-        elif tkey in self.trans_keys:
-            keys = self.column_groups[tkey]
-        if filtered_data:
-            return self.data_filtered[keys]
-        else:
-            return self.data[keys]
-    def rview(self, ind_var, filtered_data=False):
-        """
-        Convience fucntion to return regression independent variable.
-        Parameters
-        ----------
-        ind_var: string or list of strings
-            may be 'power', 'poa', 't_amb', 'w_vel', a list of some subset of
-            the previous four strings or 'all'
-        """
-        if ind_var == 'all':
-            keys = list(self.regression_cols.values())
-        elif isinstance(ind_var, list) and len(ind_var) > 1:
-            keys = [self.regression_cols[key] for key in ind_var]
-        elif ind_var in met_keys:
-            ind_var = [ind_var]
-            keys = [self.regression_cols[key] for key in ind_var]
-        lst = []
-        for key in keys:
-            if key in self.data.columns:
-                lst.extend([key])
-            else:
-                lst.extend(self.column_groups[key])
-        if filtered_data:
-            return self.data_filtered[lst]
-        else:
-            return self.data[lst]
-    def __comb_trans_keys(self, grp):
-        comb_keys = []
-        for key in self.trans_keys:
-            if key.find(grp) != -1:
-                comb_keys.append(key)
-        cols = []
-        for key in comb_keys:
-            cols.extend(self.column_groups[key])
-        grp_comb = grp + '_comb'
-        if grp_comb not in self.trans_keys:
-            self.column_groups[grp_comb] = cols
-            self.trans_keys.extend([grp_comb])
-            print('Added new group: ' + grp_comb)
     def review_column_groups(self):
         """Print `column_groups` with nice formatting."""
         if len(self.column_groups) == 0:
@@ -1746,9 +1705,9 @@ class CapData(object):
             Plots filtered data when true and all data when false.
         """
         if filtered:
-            df = self.rview(['power', 'poa'], filtered_data=True)
+            df = self.floc[['power', 'poa']]
         else:
-            df = self.rview(['power', 'poa'], filtered_data=False)
+            df = self.loc[['power', 'poa']]
         if df.shape[1] != 2:
             return warnings.warn('Aggregate sensors before using this '
@@ -1785,175 +1744,109 @@ class CapData(object):
         vdims = ['power', 'index']
         if all_reg_columns:
             vdims.extend(list(df.columns.difference(vdims)))
+        hover = HoverTool(
+            tooltips=[
+                ('datetime', '@index{%Y-%m-%d %H:%M}'),
+                ('poa', '@poa{0,0.0}'),
+                ('power', '@power{0,0.0}'),
+            ],
+            formatters={
+                '@index': 'datetime',
+            }
+        )
         poa_vs_kw = hv.Scatter(df, 'poa', vdims).opts(
             size=5,
-            tools=['hover', 'lasso_select', 'box_select'],
+            tools=[hover, 'lasso_select', 'box_select'],
             legend_position='right',
             height=400,
             width=400,
+            selection_fill_color='red',
+            selection_line_color='red',
+            yformatter=NumeralTickFormatter(format='0,0'),
         )
         # layout_scatter = (poa_vs_kw).opts(opt_dict)
         if timeseries:
-            poa_vs_time = hv.Curve(df, 'index', ['power', 'poa']).opts(
-                tools=['hover', 'lasso_select', 'box_select'],
+            power_vs_time = hv.Scatter(df, 'index', ['power', 'poa']).opts(
+                tools=[hover, 'lasso_select', 'box_select'],
                 height=400,
                 width=800,
+                selection_fill_color='red',
+                selection_line_color='red',
             )
-            layout_timeseries = (poa_vs_kw + poa_vs_time)
-            DataLink(poa_vs_kw, poa_vs_time)
+            power_col, poa_col = self.loc[['power', 'poa']].columns
+            power_vs_time_underlay = hv.Curve(
+                self.data.rename_axis('index', axis='index'),
+                'index',
+                [power_col, poa_col],
+            ).opts(
+                tools=['lasso_select', 'box_select'],
+                height=400,
+                width=800,
+                line_color='gray',
+                line_width=1,
+                line_alpha=0.4,
+                yformatter=NumeralTickFormatter(format='0,0'),
+            )
+            layout_timeseries = (poa_vs_kw + power_vs_time * power_vs_time_underlay)
+            DataLink(poa_vs_kw, power_vs_time)
             return(layout_timeseries.cols(1))
         else:
             return(poa_vs_kw)
-    def plot(self, marker='line', ncols=1, width=1500, height=250,
-             legends=False, merge_grps=['irr', 'temp'], subset=None,
-             filtered=False, use_abrev_name=False, **kwargs):
-        """
-        Create a plot for each group of sensors in self.column_groups.
-        Function returns a Bokeh grid of figures.  A figure is generated for
-        each type of measurement identified by the keys in `column_groups` and
-        a line is plotted on the figure for each column of measurements of
-        that type.
-        For example, if there are multiple plane of array irradiance sensors,
-        the data from each one will be plotted on a single figure.
-        Figures are not generated for categories that would plot more than 10
-        lines.
+    def plot(
+        self,
+        combine=plotting.COMBINE,
+        default_groups=plotting.DEFAULT_GROUPS,
+        width=1500,
+        height=250,
+        **kwargs,
+    ):
+        """
+        Create a dashboard to explore timeseries plots of the data.
+        The dashboard contains three tabs: Groups, Layout, and Overlay. The first tab,
+        Groups, presents a column of plots with a separate plot overlaying the measurements
+        for each group of the `column_groups`. The groups plotted are defined by the
+        `default_groups` argument.
+        The second tab, Layout, allows manually selecting groups to plot. The button
+        on this tab can be used to replace the column of plots on the Groups tab with
+        the current figure on the Layout tab. Rerun this method after clicking the button
+        to see the new plots in the Groups tab.
+        The third tab, Overlay, allows picking a group or any combination of individual
+        tags to overlay on a single plot. The list of groups and tags can be filtered
+        using regular expressions. Adding a text id in the box and clicking Update will
+        add the current overlay to the list of groups on the Layout tab.
         Parameters
         ----------
-        marker : str, default 'line'
-            Accepts 'line', 'circle', 'line-circle'.  These are bokeh marker
-            options.
-        ncols : int, default 2
-            Number of columns in the bokeh gridplot.
-        width : int, default 400
-            Width of individual plots in gridplot.
-        height: int, default 350
-            Height of individual plots in gridplot.
-        legends : bool, default False
-            Turn on or off legends for individual plots.
-        merge_grps : list, default ['irr', 'temp']
-            List of strings to search for in the `column_groups` keys.
-            A new entry is added to `column_groups` with keys following the
-            format 'search str_comb' and the value is a list of column names
-            that contain the search string. The default will combine all
-            irradiance measurements into a group and temperature measurements
-            into a group.
-            Pass an empty list to not merge any plots.
-            Use 'irr-poa' and 'irr-ghi' to plot clear sky modeled with measured
-            data.
-        subset : list, default None
-            List of the keys of `column_groups` to control the order of to plot
-            only a subset of the plots or control the order of plots.
-        filtered : bool, default False
-            Set to true to plot the filtered data.
-        kwargs
-            Pass additional options to bokeh gridplot.  Merge_tools=False will
-            shows the hover tool icon, so it can be turned off.
+        combine : dict, optional
+            Dictionary of group names and regex strings to use to identify groups from
+            column groups and individual tags (columns) to combine into new groups. See the
+            `parse_combine` function for more details.
+        default_groups : list of str, optional
+            List of regex strings to use to identify default groups to plot. See the
+            `plotting.find_default_groups` function for more details.
+        group_width : int, optional
+            The width of the plots on the Groups tab.
+        group_height : int, optional
+            The height of the plots on the Groups tab.
+        **kwargs : optional
+            Additional keyword arguments are passed to the options of the scatter plot.
         Returns
         -------
-        show(grid)
-            Command to show grid of figures.  Intended for use in jupyter
-            notebook.
-        """
-        for str_val in merge_grps:
-            self.__comb_trans_keys(str_val)
-        if filtered:
-            dframe = self.data_filtered
-        else:
-            dframe = self.data
-        dframe.index.name = 'Timestamp'
-        names_to_abrev = {val: key for key, val in self.trans_abrev.items()}
-        plots = []
-        x_axis = None
-        source = ColumnDataSource(dframe)
-        hover = HoverTool()
-        hover.tooltips = [
-            ("Name", "$name"),
-            ("Datetime", "@Timestamp{%F %H:%M}"),
-            ("Value", "$y{0,0.00}"),
-        ]
-        hover.formatters = {"@Timestamp": "datetime"}
-        tools = 'pan, xwheel_pan, xwheel_zoom, box_zoom, save, reset'
-        if isinstance(subset, list):
-            plot_keys = subset
-        else:
-            plot_keys = self.trans_keys
-        for j, key in enumerate(plot_keys):
-            df = dframe[self.column_groups[key]]
-            cols = df.columns.tolist()
-            if x_axis is None:
-                p = figure(title=key, width=width, height=height,
-                           x_axis_type='datetime', tools=tools)
-                p.tools.append(hover)
-                x_axis = p.x_range
-            if j > 0:
-                p = figure(title=key, width=width, height=height,
-                           x_axis_type='datetime', x_range=x_axis, tools=tools)
-                p.tools.append(hover)
-            legend_items = []
-            for i, col in enumerate(cols):
-                if use_abrev_name:
-                    name = names_to_abrev[col]
-                else:
-                    name = col
-                if col.find('csky') == -1:
-                    line_dash = 'solid'
-                else:
-                    line_dash = (5, 2)
-                if marker == 'line':
-                    try:
-                        series = p.line('Timestamp', col, source=source,
-                                        line_color=self.col_colors[col],
-                                        line_dash=line_dash,
-                                        name=name)
-                    except KeyError:
-                            series = p.line('Timestamp', col, source=source,
-                                            line_dash=line_dash,
-                                            name=name)
-                elif marker == 'circle':
-                    series = p.circle('Timestamp', col,
-                                      source=source,
-                                      line_color=self.col_colors[col],
-                                      size=2, fill_color="white",
-                                      name=name)
-                if marker == 'line-circle':
-                    series = p.line('Timestamp', col, source=source,
-                                    line_color=self.col_colors[col],
-                                    name=name)
-                    series = p.circle('Timestamp', col,
-                                      source=source,
-                                      line_color=self.col_colors[col],
-                                      size=2, fill_color="white",
-                                      name=name)
-                legend_items.append((col, [series, ]))
-            legend = Legend(items=legend_items, location=(40, -5))
-            legend.label_text_font_size = '8pt'
-            if legends:
-                p.add_layout(legend, 'below')
-            plots.append(p)
-        grid = gridplot(plots, ncols=ncols, **kwargs)
-        return show(grid)
+        Panel tabbed layout
+        """
+        return plotting.plot(
+            self,
+            combine=combine,
+            default_groups=default_groups,
+            group_width=width,
+            group_height=height,
+            **kwargs,
+        )
     def scatter_filters(self):
         """
@@ -1966,7 +1859,7 @@ class CapData(object):
         scatters = []
         data = self.get_reg_cols(reg_vars=['power', 'poa'], filtered_data=False)
-        data['index'] = self.data.loc[:, 'index']
+        data['index'] = self.data.index
         plt_no_filtering = hv.Scatter(data, 'poa', ['power', 'index']).relabel('all')
         scatters.append(plt_no_filtering)
@@ -1986,6 +1879,16 @@ class CapData(object):
             scatters.append(plt)
         scatter_overlay = hv.Overlay(scatters)
+        hover = HoverTool(
+            tooltips=[
+                ('datetime', '@index{%Y-%m-%d %H:%M}'),
+                ('poa', '@poa{0,0.0}'),
+                ('power', '@power{0,0.0}'),
+            ],
+            formatters={
+                '@index': 'datetime',
+            }
+        )
         scatter_overlay.opts(
             hv.opts.Scatter(
                 size=5,
@@ -1994,7 +1897,8 @@ class CapData(object):
                 muted_fill_alpha=0,
                 fill_alpha=0.4,
                 line_width=0,
-                tools=['hover'],
+                tools=[hover],
+                yformatter=NumeralTickFormatter(format='0,0'),
             ),
             hv.opts.Overlay(
                 legend_position='right',
@@ -2014,8 +1918,8 @@ class CapData(object):
         plots = []
         data = self.get_reg_cols(reg_vars='power', filtered_data=False)
-        data.reset_index(inplace=True)
-        plt_no_filtering  = hv.Curve(data, ['Timestamp'], ['power'], label='all')
+        data['Timestamp'] = data.index
+        plt_no_filtering = hv.Curve(data, ['Timestamp'], ['power'], label='all')
         plt_no_filtering.opts(
             line_color='black',
             line_width=1,
@@ -2024,10 +1928,10 @@ class CapData(object):
         )
         plots.append(plt_no_filtering)
-        d1 = self.rview('power').loc[self.removed[0]['index'], :]
+        d1 = data.loc[self.removed[0]['index'], ['power', 'Timestamp']]
         plt_first_filter = hv.Scatter(
-            (d1.index, d1.iloc[:, 0]),
-            label=self.removed[0]['name'])
+            d1, ['Timestamp'], ['power'], label=self.removed[0]['name']
+        )
         plots.append(plt_first_filter)
         for i, filtering_step in enumerate(self.kept):
@@ -2035,18 +1939,30 @@ class CapData(object):
                 break
             else:
                 flt_legend = self.kept[i + 1]['name']
-            d_flt = self.rview('power').loc[filtering_step['index'], :]
-            plt = hv.Scatter((d_flt.index, d_flt.iloc[:, 0]), label=flt_legend)
+            d_flt = data.loc[filtering_step['index'], :]
+            plt = hv.Scatter(
+                d_flt, ['Timestamp'], ['power'], label=flt_legend
+            )
             plots.append(plt)
         scatter_overlay = hv.Overlay(plots)
+        hover = HoverTool(
+            tooltips=[
+                ('datetime', '@Timestamp{%Y-%m-%d %H:%M}'),
+                ('power', '@power{0,0.0}'),
+            ],
+            formatters={
+                '@Timestamp': 'datetime',
+            }
+        )
         scatter_overlay.opts(
             hv.opts.Scatter(
                 size=5,
                 muted_fill_alpha=0,
                 fill_alpha=1,
                 line_width=0,
-                tools=['hover'],
+                tools=[hover],
+                yformatter=NumeralTickFormatter(format='0,0'),
             ),
             hv.opts.Overlay(
                 legend_position='bottom',
@@ -2154,8 +2070,9 @@ class CapData(object):
                        self.regression_cols['w_vel']: 'mean'}
         dfs_to_concat = []
+        agg_names = {}
         for group_id, agg_func in agg_map.items():
-            columns_to_aggregate = self.view(group_id, filtered_data=False)
+            columns_to_aggregate = self.loc[group_id]
             if columns_to_aggregate.shape[1] == 1:
                 continue
             agg_result = columns_to_aggregate.agg(agg_func, axis=1).to_frame()
@@ -2165,23 +2082,23 @@ class CapData(object):
                 col_name = group_id + '_' + agg_func.__name__ + '_agg'
             agg_result.rename(columns={agg_result.columns[0]: col_name}, inplace=True)
             dfs_to_concat.append(agg_result)
+            agg_names[group_id] = col_name
         dfs_to_concat.append(self.data)
         # write over data and data_filtered attributes
         self.data = pd.concat(dfs_to_concat, axis=1)
         self.data_filtered = self.data.copy()
-        # update regression_cols attribute
+        # update regression_cols attribute
         for reg_var, trans_group in self.regression_cols.items():
-            if self.rview(reg_var).shape[1] == 1:
+            if self.loc[reg_var].shape[1] == 1:
                 continue
-            if trans_group in agg_map.keys():
-                try:
-                    agg_col = trans_group + '_' + agg_map[trans_group] + '_agg'  # noqa: E501
-                except TypeError:
-                    agg_col = trans_group + '_' + col_name + '_agg'
-                print(agg_col)
-                self.regression_cols[reg_var] = agg_col
+            if trans_group in agg_names.keys():
+                print(
+                    "Regression variable '{}' has been remapped: '{}' to '{}'"
+                    .format(reg_var, trans_group, agg_names[trans_group])
+                )
+                self.regression_cols[reg_var] = agg_names[trans_group]
     def data_columns_to_excel(self, sort_by_reversed_names=True):
         """
@@ -2490,7 +2407,7 @@ class CapData(object):
             Add option to return plot showing envelope with points not removed
             alpha decreased.
         """
-        XandY = self.rview(['poa', 'power'], filtered_data=True)
+        XandY = self.floc[['poa', 'power']]
         if XandY.shape[1] > 2:
             return warnings.warn('Too many columns. Try running '
                                  'aggregate_sensors before using '
@@ -2533,7 +2450,7 @@ class CapData(object):
         Spec pf column
             Increase options to specify which columns are used in the filter.
         """
-        for key in self.trans_keys:
+        for key in self.column_groups.keys():
             if key.find('pf') == 0:
                 selection = key
@@ -2583,7 +2500,7 @@ class CapData(object):
             power_data = self.get_reg_cols('power')
         elif isinstance(columns, str):
             if columns in self.column_groups.keys():
-                power_data = self.view(columns, filtered_data=True)
+                power_data = self.floc[columns]
                 multiple_columns = True
             else:
                 power_data = pd.DataFrame(self.data_filtered[columns])
@@ -2651,7 +2568,8 @@ class CapData(object):
         self.data_filtered = func(self.data_filtered, *args, **kwargs)
     @update_summary
-    def filter_sensors(self, perc_diff=None, inplace=True):
+    def filter_sensors(
+        self, perc_diff=None, inplace=True, row_filter=check_all_perc_diff_comb):
         """
         Drop suspicious measurments by comparing values from different sensors.
@@ -2687,16 +2605,18 @@ class CapData(object):
             poa_trans_key = regression_cols['poa']
             perc_diff = {poa_trans_key: 0.05}
-        for key, perc_diff_for_key in perc_diff.items():
+        for key, threshold in perc_diff.items():
             if 'index' in locals():
                 # if index has been assigned then take intersection
                 sensors_df = df[trans[key]]
-                next_index = sensor_filter(sensors_df, perc_diff_for_key)
+                next_index = sensor_filter(
+                    sensors_df, threshold, row_filter=row_filter)
                 index = index.intersection(next_index)  # noqa: F821
             else:
                 # if index has not been assigned then assign it
                 sensors_df = df[trans[key]]
-                index = sensor_filter(sensors_df, perc_diff_for_key)
+                index = sensor_filter(
+                    sensors_df, threshold, row_filter=row_filter)
         df_out = self.data_filtered.loc[index, :]
@@ -2743,7 +2663,7 @@ class CapData(object):
                                  'load_data clear_sky option.')
         if ghi_col is None:
             ghi_keys = []
-            for key in self.trans_keys:
+            for key in self.column_groups.keys():
                 defs = key.split('-')
                 if len(defs) == 1:
                     continue
@@ -2758,7 +2678,7 @@ class CapData(object):
             else:
                 meas_ghi = ghi_keys[0]
-            meas_ghi = self.view(meas_ghi, filtered_data=True)
+            meas_ghi = self.floc[meas_ghi]
             if meas_ghi.shape[1] > 1:
                 warnings.warn('Averaging measured GHI data.  Pass column name '
                               'to ghi_col to use a specific column.')
@@ -2957,8 +2877,7 @@ class CapData(object):
         pandas DataFrame
             If pred=True, then returns a pandas dataframe of results.
         """
-        df = self.rview(['poa', 't_amb', 'w_vel'],
-                        filtered_data=True)
+        df = self.floc[['poa', 't_amb', 'w_vel']]
         df = df.rename(columns={df.columns[0]: 'poa',
                                 df.columns[1]: 't_amb',
                                 df.columns[2]: 'w_vel'})
@@ -3046,8 +2965,7 @@ class CapData(object):
             See pandas Grouper doucmentation for details. Default is left
             labeled and left closed.
         """
-        df = self.rview(['poa', 't_amb', 'w_vel', 'power'],
-                        filtered_data=True)
+        df = self.floc[['poa', 't_amb', 'w_vel', 'power']]
         df = df.rename(columns={df.columns[0]: 'poa',
                                 df.columns[1]: 't_amb',
                                 df.columns[2]: 'w_vel',
@@ -3153,7 +3071,7 @@ class CapData(object):
         """
         spatial_uncerts = {}
         for group in column_groups:
-            df = self.view(group, filtered_data=True)
+            df = self.floc[group]
             # prevent aggregation from updating column groups?
             # would not need the below line then
             df = df[[col for col in df.columns if 'agg' not in col]]

captest 0.12.1__py2.py3-none-any.whl → 0.13.0__py2.py3-none-any.whl

captest 0.12.1py2.py3-none-any.whl → 0.13.0py2.py3-none-any.whl