PyPI - captest - Versions diffs - 0.11.2__py2.py3-none-any.whl → 0.13.0__py2.py3-none-any.whl - Mend

captest 0.11.2py2.py3-none-any.whl → 0.13.0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

captest/__init__.py +1 -0
captest/_version.py +3 -3
captest/capdata.py +275 -339
captest/io.py +150 -64
captest/plotting.py +492 -0
captest/prtest.py +1 -1
captest/util.py +14 -6
{captest-0.11.2.dist-info → captest-0.13.0.dist-info}/METADATA +30 -29
captest-0.13.0.dist-info/RECORD +13 -0
{captest-0.11.2.dist-info → captest-0.13.0.dist-info}/WHEEL +1 -1
captest-0.11.2.dist-info/RECORD +0 -12
{captest-0.11.2.dist-info → captest-0.13.0.dist-info}/LICENSE.txt +0 -0
{captest-0.11.2.dist-info → captest-0.13.0.dist-info}/top_level.txt +0 -0

captest/io.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # this file is formatted with black
+import copy
 import dateutil
 import datetime
 from pathlib import Path
@@ -19,6 +20,7 @@ from captest import util
 def flatten_multi_index(columns):
     return ["_".join(col_name) for col_name in columns.to_list()]
 def load_excel_column_groups(path):
     """
     Load column groups from an excel file.
@@ -45,9 +47,10 @@ def load_excel_column_groups(path):
     dict
         Dictionary mapping column group names to lists of column names.
     """
-    df = pd.read_excel(path, header=None).fillna(method="ffill")
+    df = pd.read_excel(path, header=None).ffill(axis='index')
     return df.groupby(0)[1].apply(list).to_dict()
 def load_pvsyst(
     path,
     name="pvsyst",
@@ -58,6 +61,9 @@ def load_pvsyst(
     """
     Load data from a PVsyst energy production model.
+    Will load day first or month first dates. Expects files that use a comma as a
+    separator rather than a semicolon.
     Parameters
     ----------
     path : str
@@ -70,7 +76,8 @@ def load_pvsyst(
         By default sets power to E_Grid, poa to GlobInc, t_amb to T Amb, and w_vel to
         WindVel. Set to False to not set regression columns on load.
     **kwargs
-        Use to pass additional kwargs to pandas read_csv.
+        Use to pass additional kwargs to pandas read_csv. Pass sep=';' to load files
+        that use semicolons instead of commas as the separator.
     Returns
     -------
@@ -107,8 +114,33 @@ def load_pvsyst(
             break
     pvraw.columns = pvraw.columns.droplevel(1)
-    dates = pvraw.loc[:, "date"]
     try:
+        dates = pvraw.loc[:, "date"]
+    except KeyError:
+        warnings.warn(
+            "No 'date' column found in the PVsyst data. This may be due to "
+            "the separator being a semicolon ';' rather than a comma ','. "
+            "If this is the case, try passing sep=';' when calling load_pvsyst. "
+            "Otherwise the date column may actually be missing. Exception:"
+        )
+        raise
+    # PVsyst creates dates like '01/01/90 00:00' i.e. January 1st, 1990.
+    # Opening the PVsyst output in excel will likely result in the dates modified to
+    # 1/1/1990 0:00. The strftime format specified won't load the excel modified dates
+    # so these are caught by checking for consistent length and reformatted
+    if not all(dates.str.len() == 14):
+        date_parts = dates.str.split(' ').str[0].str.split('/')
+        time_parts = dates.str.split(' ').str[1].str.split(':')
+        dates = (
+            date_parts.str[0].str.zfill(2) + '/' +
+            date_parts.str[1].str.zfill(2) + '/' +
+            '90 ' +
+            time_parts.str[0].str.zfill(2) + ':' +
+            time_parts.str[1]
+        )
+    try:
+        # mm/dd/yy hh:mm, lower case y gives
+        # Year without century as a zero-padded decimal number. e.g. 00, 01, …, 99
         dt_index = pd.to_datetime(dates, format="%m/%d/%y %H:%M")
     except ValueError:
         warnings.warn(
@@ -131,7 +163,6 @@ def load_pvsyst(
         cd.data["E_Grid"] = cd.data["E_Grid"] / egrid_unit_adj_factor
     cd.data_filtered = cd.data.copy()
     cd.column_groups = cg.group_columns(cd.data)
-    cd.trans_keys = list(cd.column_groups.keys())
     if set_regression_columns:
         cd.set_regression_cols(
             power="E_Grid", poa="GlobInc", t_amb="T_Amb", w_vel="WindVel"
@@ -139,15 +170,13 @@ def load_pvsyst(
     return cd
 def file_reader(path, **kwargs):
     """
     Read measured solar data from a csv file.
     Utilizes pandas read_csv to import measure solar data from a csv file.
-    Attempts a few diferent encodings, trys to determine the header end
-    by looking for a date in the first column, and concantenates column
+    Attempts a few different encodings, tries to determine the header end
+    by looking for a date in the first column, and concatenates column
     headings to a single string.
     Parameters
@@ -161,16 +190,20 @@ def file_reader(path, **kwargs):
     -------
     pandas DataFrame
     """
+    default_kwargs = {
+        'index_col': 0,
+        'parse_dates': True,
+        'skip_blank_lines': True,
+        'low_memory': False,
+    }
+    for key, value in default_kwargs.items():
+        kwargs.setdefault(key, value)
     encodings = ["utf-8", "latin1", "iso-8859-1", "cp1252"]
     for encoding in encodings:
+        kwargs['encoding'] = encoding
         try:
             data_file = pd.read_csv(
                 path,
-                encoding=encoding,
-                index_col=0,
-                parse_dates=True,
-                skip_blank_lines=True,
-                low_memory=False,
                 **kwargs,
             )
         except UnicodeDecodeError:
@@ -178,6 +211,12 @@ def file_reader(path, **kwargs):
         else:
             break
     data_file.dropna(how="all", axis=0, inplace=True)
+    if data_file.index.equals(pd.Index(np.arange(len(data_file.index)))):
+        kwargs['index_col'] = 1
+        data_file = pd.read_csv(
+            path,
+            **kwargs,
+        )
     if not isinstance(data_file.index[0], pd.Timestamp):
         for i, _indice in enumerate(data_file.index):
             try:
@@ -189,18 +228,11 @@ def file_reader(path, **kwargs):
             except ValueError:
                 continue
         header = list(np.arange(header_end))
+        kwargs.setdefault('header', header)
         data_file = pd.read_csv(
             path,
-            encoding=encoding,
-            header=header,
-            index_col=0,
-            parse_dates=True,
-            skip_blank_lines=True,
-            low_memory=False,
             **kwargs,
         )
-    data_file = data_file.apply(pd.to_numeric)
     if isinstance(data_file.columns, pd.MultiIndex):
         data_file.columns = flatten_multi_index(data_file.columns)
     data_file = data_file.rename(columns=(lambda x: x.strip()))
@@ -218,6 +250,7 @@ class DataLoader:
     sys: Optional[dict] = field(default=None)
     file_reader: object = file_reader
     files_to_load: Optional[list] = field(default=None)
+    failed_to_load: Optional[list] = field(default=None)
     def __setattr__(self, key, value):
         if key == "path":
@@ -256,7 +289,6 @@ class DataLoader:
             current_file, missing_intervals, freq_str = util.reindex_datetime(
                 file,
                 report=False,
-                add_index_col=True,
             )
             reindexed_dfs[name] = current_file
             file_frequencies.append(freq_str)
@@ -315,43 +347,86 @@ class DataLoader:
         data = data.apply(pd.to_numeric, errors="coerce")
         return data
-    def load(self, extension="csv", **kwargs):
+    def load(self, extension="csv", verbose=True, print_errors=False, **kwargs):
         """
         Load file(s) of timeseries data from SCADA / DAS systems.
-        This is a convience function to generate an instance of DataLoader
-        and call the `load` method.
+        Set `path` to the path to a file to load a single file. Set `path` to the path
+        to a directory of files to load all the files in the directory ending in "csv".
+        Or, set `files_to_load` to a list of specific files to load.
-        A single file or multiple files can be loaded. Multiple files will be joined together
-        and may include files with different column headings. When multiple files with
-        matching column headings are loaded, the individual files will be reindexed and
-        then joined. Missing time intervals within the individual files will be filled,
+        Multiple files will be joined together and may include files with different
+        column headings. When multiple files with matching column headings are loaded,
+        the individual files will be reindexed and then joined.
+        Missing time intervals within the individual files will be filled,
         but missing time intervals between the individual files will not be filled.
+        When loading multiple files they will be stored in `loaded_files`, a dictionary,
+        mapping the file names to a dataframe for each file.
         Parameters
         ----------
         extension : str, default "csv"
             Change the extension to allow loading different filetypes. Must also set
             the `file_reader` attribute to a function that will read that type of file.
+            Do not include a period ".".
+        verbose : bool, default True
+            By default prints path of each file attempted to load and then confirmation
+            it was loaded or states it failed to load. Is only relevant if `path` is
+            set to a directory not a file. Set to False to not print out any file
+            loading status.
+        print_errors : bool, default False
+            Set to true to print error if file fails to load.
+        **kwargs
+            Are passed through to the file_reader callable, which by default will pass
+            them on to pandas.read_csv.
+        Returns
+        -------
+        None
+            Resulting DataFrame of data is stored to the `data` attribute.
         """
         if self.path.is_file():
-            self.data = self.file_reader(self.path)
+            self.data = self.file_reader(self.path, **kwargs)
         elif self.path.is_dir():
-            if self.files_to_load is not None:
-                self.loaded_files = {
-                    file.stem: self.file_reader(file) for file in self.files_to_load
-                }
-            else:
+            if self.files_to_load is None:
                 self.set_files_to_load(extension=extension)
-                self.loaded_files = {
-                    file.stem: self.file_reader(file, **kwargs) for file in self.files_to_load
-                }
-            (
-                self.loaded_files,
-                self.common_freq,
-                self.file_frequencies,
-            ) = self._reindex_loaded_files()
-            data = self._join_files()
+            self.loaded_files = dict()
+            failed_to_load_count = 0
+            for file in self.files_to_load:
+                try:
+                    if verbose:
+                        print('trying to load {}'.format(file))
+                    self.loaded_files[file.stem] = self.file_reader(file, **kwargs)
+                    if verbose:
+                        print('    loaded      {}'.format(file))
+                except Exception as err:
+                    if self.failed_to_load is None:
+                        self.failed_to_load = []
+                    self.failed_to_load.append(file)
+                    print('  **FAILED to load {}'.format(file))
+                    print(
+                        '  To review full stack traceback run \n'
+                        '  meas.data_loader.file_reader(meas.data_loader'
+                        '.failed_to_load[{}])'.format(failed_to_load_count)
+                    )
+                    if print_errors:
+                        print(err)
+                    failed_to_load_count += 1
+                    continue
+            if len(self.loaded_files) == 0:
+                warnings.warn(
+                    "No files were loaded. Check that file_reader is working")
+            elif len(self.loaded_files) > 1:
+                (
+                    self.loaded_files,
+                    self.common_freq,
+                    self.file_frequencies,
+                ) = self._reindex_loaded_files()
+                data = self._join_files()
+            elif len(self.loaded_files) == 1:
+                data = list(self.loaded_files.values())[0]
             data.index.name = "Timestamp"
             self.data = data
         else:
@@ -380,12 +455,13 @@ def load_data(
     reindex=True,
     site=None,
     column_groups_template=False,
+    verbose=False,
     **kwargs,
 ):
     """
     Load file(s) of timeseries data from SCADA / DAS systems.
-    This is a convience function to generate an instance of DataLoader
+    This is a convenience function to generate an instance of DataLoader
     and call the `load` method.
     A single file or multiple files can be loaded. Multiple files will be joined together
@@ -397,7 +473,7 @@ def load_data(
         Path to either a single file to load or a directory of files to load.
     group_columns : function or str, default columngroups.group_columns
         Function to use to group the columns of the loaded data. Function should accept
-        a DataFrame and return a dictionary with keys that are ids and valeus that are
+        a DataFrame and return a dictionary with keys that are ids and values that are
         lists of column names. Will be set to the `group_columns` attribute of the
         CapData.DataLoader object.
         Provide a string to load column grouping from a json, yaml, or excel file. The
@@ -414,32 +490,34 @@ def load_data(
     sort : bool, default True
         By default sorts the data by the datetime index from old to new.
     drop_duplicates : bool, default True
-        By default drops rows of the joined data where all the columns are duplicats
+        By default drops rows of the joined data where all the columns are duplicates
         of another row. Keeps the first instance of the duplicated values. This is
-        helpful if individual datafiles have overlaping rows with the same data.
+        helpful if individual data files have overlapping rows with the same data.
     reindex : bool, default True
         By default will create a new index for the data using the earliest datetime,
         latest datetime, and the most frequent time interval ensuring there are no
         missing intervals.
-    site : dict, default None
-        Pass a dictionary containing site data, which will be used to generate
-        modeled clear sky ghi and poa values. The clear sky irradiance values are
-        added to the data and the column_groups attribute is updated to include these
-        two irradiance columns. The site data dictionary should be
-        {sys: {system data}, loc: {location data}}. See the capdata.csky documentation
-        for the format of the system data and location data.
+    site : dict or str, default None
+        Pass a dictionary or path to a json or yaml file containing site data, which
+        will be used to generate modeled clear sky ghi and poa values. The clear sky
+        irradiance values are added to the data and the column_groups attribute is
+        updated to include these two irradiance columns. The site data dictionary should
+        be {sys: {system data}, loc: {location data}}. See the capdata.csky
+        documentation for the format of the system data and location data.
     column_groups_template : bool, default False
         If True, will call `CapData.data_columns_to_excel` to save a file to use to
         manually create column groupings at `path`.
+    verbose : bool, default False
+        Set to True to print status of file loading.
     **kwargs
-        Passed to `DataLoader.load` Options include: sort, drop_duplicates, reindex,
-        extension. See `DataLoader` for complete documentation.
+        Passed to `DataLoader.load`, which passes them to the `file_reader` function.
+        The default `file_reader` function passes them to pandas.read_csv.
     """
     dl = DataLoader(
         path=path,
         file_reader=file_reader,
     )
-    dl.load(**kwargs)
+    dl.load(verbose=verbose, **kwargs)
     if sort:
         dl.sort_data()
@@ -454,7 +532,7 @@ def load_data(
     cd.data_loader = dl
     # group columns
     if callable(group_columns):
-        cd.column_groups = group_columns(cd.data)
+        cd.column_groups = cg.ColumnGroups(group_columns(cd.data))
     elif isinstance(group_columns, str):
         p = Path(group_columns)
         if p.suffix == ".json":
@@ -464,12 +542,20 @@ def load_data(
         elif (p.suffix == '.xlsx') or (p.suffix == '.xls'):
             cd.column_groups = cg.ColumnGroups(load_excel_column_groups(group_columns))
     if site is not None:
-        cd.data = csky(cd.data, loc=site['loc'], sys=site['sys'])
-        cd.data_filtered = cd.data.copy()
-        cd.column_groups['irr-poa-clear_sky'] = ['poa_mod_csky']
-        cd.column_groups['irr-ghi-clear_sky'] = ['ghi_mod_csky']
+        if isinstance(site, str):
+            path_to_site = Path(site)
+            if path_to_site.is_file():
+                if path_to_site.suffix == ".json":
+                    site = util.read_json(site)
+                if (path_to_site.suffix == ".yaml") or (path_to_site.suffix == ".yml"):
+                    site = util.read_yaml(site)
+                cd.site = copy.deepcopy(site)
+        if isinstance(site, dict):
+            cd.data = csky(cd.data, loc=site['loc'], sys=site['sys'])
+            cd.data_filtered = cd.data.copy()
+            cd.column_groups['irr-poa-clear_sky'] = ['poa_mod_csky']
+            cd.column_groups['irr-ghi-clear_sky'] = ['ghi_mod_csky']
     cd.trans_keys = list(cd.column_groups.keys())
-    cd.set_plot_attributes()
     if column_groups_template:
         cd.data_columns_to_excel()
     return cd

captest 0.11.2__py2.py3-none-any.whl → 0.13.0__py2.py3-none-any.whl

captest 0.11.2py2.py3-none-any.whl → 0.13.0py2.py3-none-any.whl