PyPI - ecopipeline - Versions diffs - 0.6.9__tar.gz → 0.7.0__tar.gz - Mend

ecopipeline 0.6.9tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{ecopipeline-0.6.9/src/ecopipeline.egg-info → ecopipeline-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.6.9
+Version: 0.7.0
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.6.9 → ecopipeline-0.7.0}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = ecopipeline
-version = 0.6.9
+version = 0.7.0
 authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
 description = Contains functions for use in Ecotope Datapipelines
 long_description = file: README.md

ecopipeline-0.7.0/src/ecopipeline/load/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss, load_data_statistics
+__all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss",
+           "load_data_statistics"]

{ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/load/load.py RENAMED Viewed

@@ -460,6 +460,27 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
     cursor.close()
     return True
+def load_data_statistics(config : ConfigManager, daily_stats_df : pd.DataFrame, config_daily_indicator : str = "day"):
+    """
+    Logs data statistics for the site in a table with name "{daily table name}_stats"
+    Parameters
+    ----------
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline.
+    daily_stats_df : pd.DataFrame
+        dataframe created by the create_data_statistics_df() function in ecopipeline.transform
+    config_daily_indicator : str
+        the indicator of the daily_table name in the config.ini file of the data pipeline
+    Returns
+    -------
+    bool:
+        A boolean value indicating if the data was successfully written to the database.
+    """
+    table_name = f"{config.get_table_name(config_daily_indicator)}_stats"
+    return load_overwrite_database(config, daily_stats_df, config.get_db_table_info([]), config_daily_indicator, table_name=table_name)
 def _generate_mysql_update_event_table(row, id):
     statement = f"UPDATE site_events SET "
     statment_elems = []

{ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/transform/__init__.py RENAMED Viewed

@@ -1,9 +1,16 @@
-from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity, apply_equipment_cop_derate
-from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
+from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, \
+    aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
+    convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
+    heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt
+from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
+    nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
+    get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
+    lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
 from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
 __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
            "cop_method_1", "cop_method_2", "create_summary_tables", "remove_partial_days", "nclarity_filter_new", "site_specific", "condensate_calculations", "gas_valve_diff", "gather_outdoor_conditions", "aqsuite_prep_time",
            "nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
            "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
            "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
-           "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate"]
+           "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
+           "delete_erroneous_from_time_pt"]

{ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/transform/transform.py RENAMED Viewed

@@ -306,6 +306,35 @@ def ffill_missing(original_df: pd.DataFrame, config : ConfigManager, previous_fi
     df.apply(_ffill, args=(ffill_df,previous_fill))
     return df
+def delete_erroneous_from_time_pt(df: pd.DataFrame, time_point : pd.Timestamp, column_names : list, new_value = None) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and delete specified erroneous values at a specified time point.
+    Parameters
+    ----------
+    df: pd.DataFrame
+        Timestamp indexed Pandas dataframe that needs to have an erroneous value removed
+    time_point : pd.Timestamp
+        The timepoint index the erroneous value takes place in
+    column_names : list
+        list of column names as strings that contain erroneous values at this time stamp
+    new_value : any
+        new value to populate the erroneous columns at this timestamp with. If set to None, will replace value with NaN
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with error values replaced with new value
+    """
+    if new_value is None:
+        new_value = float('NaN')  # Replace with NaN if new_value is not provided
+    if time_point in df.index:
+        for col in column_names:
+            df.loc[time_point, col] = new_value
+    return df
 # TODO test this
 def nullify_erroneous(original_df: pd.DataFrame, config : ConfigManager) -> pd.DataFrame:
     """
@@ -998,7 +1027,7 @@ def join_to_daily(daily_data: pd.DataFrame, cop_data: pd.DataFrame) -> pd.DataFr
     out_df = daily_data.join(cop_data)
     return out_df
-def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16):
+def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16) -> pd.DataFrame:
     """
     Function derates equipment COP based on R value
     R12 - R16 : 12 %
@@ -1041,3 +1070,70 @@ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int
     df[equip_cop_col] =  df[equip_cop_col] * derate
     return df
+def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
+    """
+    Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
+    The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
+    '_max_gap' respectively. the columns will carry the following statisctics:
+    _missing_mins -> the number of minutes in the day that have no reported data value for the column
+    _avg_gap -> the average gap (in minutes) between collected data values that day
+    _max_gap -> the maximum gap (in minutes) between collected data values that day
+    Parameters
+    ----------
+    df : pd.DataFrame
+        minute data df after the rename_varriables() and before the ffill_missing() function has been called
+    Returns
+    -------
+    daily_data_stats : pd.DataFrame
+        new dataframe with the columns descriped in the function's description
+    """
+    min_time = df.index.min()
+    start_day = min_time.floor('D')
+    # If min_time is not exactly at the start of the day, move to the next day
+    if min_time != start_day:
+        start_day = start_day + pd.tseries.offsets.Day(1)
+    # Build a complete minutely timestamp index over the full date range
+    full_index = pd.date_range(start=start_day,
+                               end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
+                               freq='T')
+    # Reindex to include any completely missing minutes
+    df_full = df.reindex(full_index)
+    # Resample daily to count missing values per column
+    total_missing = df_full.isna().resample('D').sum().astype(int)
+    # Function to calculate max consecutive missing values
+    def max_consecutive_nans(x):
+        is_na = x.isna()
+        groups = (is_na != is_na.shift()).cumsum()
+        return is_na.groupby(groups).sum().max() or 0
+    # Function to calculate average consecutive missing values
+    def avg_consecutive_nans(x):
+        is_na = x.isna()
+        groups = (is_na != is_na.shift()).cumsum()
+        gap_lengths = is_na.groupby(groups).sum()
+        gap_lengths = gap_lengths[gap_lengths > 0]
+        if len(gap_lengths) == 0:
+            return 0
+        return gap_lengths.mean()
+    # Apply daily, per column
+    max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
+    avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
+    # Rename columns to include a suffix
+    total_missing = total_missing.add_suffix('_missing_mins')
+    max_consec_missing = max_consec_missing.add_suffix('_max_gap')
+    avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
+    # Concatenate along columns (axis=1)
+    combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
+    return combined_df

{ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/utils/ConfigManager.py RENAMED Viewed

@@ -134,15 +134,22 @@ class ConfigManager:
             tables.
         """
-        configure = configparser.ConfigParser()
-        configure.read(self.config_directory)
-        db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
+        db_table_info = {}
+        if len(table_headers) > 0:
+            configure = configparser.ConfigParser()
+            configure.read(self.config_directory)
+            db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
         db_table_info["database"] = self.db_connection_info["database"]
         print(f"Successfully fetched configuration information from file path {self.config_directory}.")
         return db_table_info
+    def get_table_name(self, header):
+        configure = configparser.ConfigParser()
+        configure.read(self.config_directory)
+        return configure.get(header, 'table_name')
     def get_db_name(self):
         """
         returns name of database that data will be uploaded to

{ecopipeline-0.6.9 → ecopipeline-0.7.0/src/ecopipeline.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.6.9
+Version: 0.7.0
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

ecopipeline-0.6.9/src/ecopipeline/load/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
2	- __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"]