ecopipeline 0.6.10__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,3 @@
1
- from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
2
- __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"]
1
+ from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss, load_data_statistics
2
+ __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss",
3
+ "load_data_statistics"]
ecopipeline/load/load.py CHANGED
@@ -315,7 +315,7 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
315
315
  site_name = config.get_site_name()
316
316
  column_names = f"start_time_pt,site_name"
317
317
  column_types = ["datetime","varchar(25)","datetime",
318
- "ENUM('HW_OUTAGE', 'HW_LOSS','PIPELINE_STATUS', 'MISC_EVENT', 'PIPELINE_UPLOAD', 'DATA_LOSS', 'DATA_LOSS_COP', 'SITE_VISIT', 'COMMISIONING', 'SYSTEM_MAINTENENCE', 'POWER_OUTAGE', 'EQUIPMENT_MALFUNCTION','PARTIAL_OCCUPANCY','INSTALLATION_ERROR','SETPOINT_ADJUSTMENT')",
318
+ "ENUM('MISC_EVENT','DATA_LOSS','DATA_LOSS_COP','SITE_VISIT','SYSTEM_MAINTENANCE','EQUIPMENT_MALFUNCTION','PARTIAL_OCCUPANCY','INSTALLATION_ERROR','ALARM','MV_COMMISSIONED','PLANT_COMMISSIONED','INSTALLATION_ERROR_COP','SOO_PERIOD','SOO_PERIOD_COP','SYSTEM_TESTING')",
319
319
  "varchar(200)"]
320
320
  column_list = ['end_time_pt','event_type', 'event_detail']
321
321
  if not set(column_list).issubset(event_df.columns):
@@ -460,6 +460,27 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
460
460
  cursor.close()
461
461
  return True
462
462
 
463
+ def load_data_statistics(config : ConfigManager, daily_stats_df : pd.DataFrame, config_daily_indicator : str = "day"):
464
+ """
465
+ Logs data statistics for the site in a table with name "{daily table name}_stats"
466
+
467
+ Parameters
468
+ ----------
469
+ config : ecopipeline.ConfigManager
470
+ The ConfigManager object that holds configuration data for the pipeline.
471
+ daily_stats_df : pd.DataFrame
472
+ dataframe created by the create_data_statistics_df() function in ecopipeline.transform
473
+ config_daily_indicator : str
474
+ the indicator of the daily_table name in the config.ini file of the data pipeline
475
+
476
+ Returns
477
+ -------
478
+ bool:
479
+ A boolean value indicating if the data was successfully written to the database.
480
+ """
481
+ table_name = f"{config.get_table_name(config_daily_indicator)}_stats"
482
+ return load_overwrite_database(config, daily_stats_df, config.get_db_table_info([]), config_daily_indicator, table_name=table_name)
483
+
463
484
  def _generate_mysql_update_event_table(row, id):
464
485
  statement = f"UPDATE site_events SET "
465
486
  statment_elems = []
@@ -1,9 +1,16 @@
1
- from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, delete_erroneous_from_time_pt
2
- from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
1
+ from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, \
2
+ aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
3
+ convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
4
+ heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt
5
+ from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
6
+ nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
7
+ get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
8
+ lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
3
9
  from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
4
10
  __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
5
11
  "cop_method_1", "cop_method_2", "create_summary_tables", "remove_partial_days", "nclarity_filter_new", "site_specific", "condensate_calculations", "gas_valve_diff", "gather_outdoor_conditions", "aqsuite_prep_time",
6
12
  "nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
7
13
  "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
8
14
  "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
9
- "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","delete_erroneous_from_time_pt"]
15
+ "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
16
+ "delete_erroneous_from_time_pt"]
@@ -717,7 +717,7 @@ def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column
717
717
  Returns
718
718
  -------
719
719
  event_df : pd.DataFrame
720
- Dataframe with 'HW_OUTAGE' events on the days in which there was a DHW Outage.
720
+ Dataframe with 'ALARM' events on the days in which there was a DHW Outage.
721
721
  """
722
722
  # TODO edge case for outage that spans over a day
723
723
  events = {
@@ -739,7 +739,7 @@ def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column
739
739
  adjusted_time = first_true_index - pd.Timedelta(minutes=consecutive_minutes-1)
740
740
  events['start_time_pt'].append(day)
741
741
  events['end_time_pt'].append(next_day - pd.Timedelta(minutes=1))
742
- events['event_type'].append("HW_OUTAGE")
742
+ events['event_type'].append("ALARM")
743
743
  events['event_detail'].append(f"Hot Water Outage Occured (first one starting at {adjusted_time.strftime('%H:%M')})")
744
744
  event_df = pd.DataFrame(events)
745
745
  event_df.set_index('start_time_pt', inplace=True)
@@ -1027,7 +1027,7 @@ def join_to_daily(daily_data: pd.DataFrame, cop_data: pd.DataFrame) -> pd.DataFr
1027
1027
  out_df = daily_data.join(cop_data)
1028
1028
  return out_df
1029
1029
 
1030
- def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16):
1030
+ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16) -> pd.DataFrame:
1031
1031
  """
1032
1032
  Function derates equipment COP based on R value
1033
1033
  R12 - R16 : 12 %
@@ -1070,3 +1070,70 @@ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int
1070
1070
 
1071
1071
  df[equip_cop_col] = df[equip_cop_col] * derate
1072
1072
  return df
1073
+
1074
+ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1075
+ """
1076
+ Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
1077
+ The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
1078
+ '_max_gap' respectively. the columns will carry the following statisctics:
1079
+ _missing_mins -> the number of minutes in the day that have no reported data value for the column
1080
+ _avg_gap -> the average gap (in minutes) between collected data values that day
1081
+ _max_gap -> the maximum gap (in minutes) between collected data values that day
1082
+
1083
+ Parameters
1084
+ ----------
1085
+ df : pd.DataFrame
1086
+ minute data df after the rename_varriables() and before the ffill_missing() function has been called
1087
+
1088
+ Returns
1089
+ -------
1090
+ daily_data_stats : pd.DataFrame
1091
+ new dataframe with the columns descriped in the function's description
1092
+ """
1093
+ min_time = df.index.min()
1094
+ start_day = min_time.floor('D')
1095
+
1096
+ # If min_time is not exactly at the start of the day, move to the next day
1097
+ if min_time != start_day:
1098
+ start_day = start_day + pd.tseries.offsets.Day(1)
1099
+
1100
+ # Build a complete minutely timestamp index over the full date range
1101
+ full_index = pd.date_range(start=start_day,
1102
+ end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
1103
+ freq='T')
1104
+
1105
+ # Reindex to include any completely missing minutes
1106
+ df_full = df.reindex(full_index)
1107
+
1108
+ # Resample daily to count missing values per column
1109
+ total_missing = df_full.isna().resample('D').sum().astype(int)
1110
+
1111
+ # Function to calculate max consecutive missing values
1112
+ def max_consecutive_nans(x):
1113
+ is_na = x.isna()
1114
+ groups = (is_na != is_na.shift()).cumsum()
1115
+ return is_na.groupby(groups).sum().max() or 0
1116
+
1117
+ # Function to calculate average consecutive missing values
1118
+ def avg_consecutive_nans(x):
1119
+ is_na = x.isna()
1120
+ groups = (is_na != is_na.shift()).cumsum()
1121
+ gap_lengths = is_na.groupby(groups).sum()
1122
+ gap_lengths = gap_lengths[gap_lengths > 0]
1123
+ if len(gap_lengths) == 0:
1124
+ return 0
1125
+ return gap_lengths.mean()
1126
+
1127
+ # Apply daily, per column
1128
+ max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
1129
+ avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
1130
+
1131
+ # Rename columns to include a suffix
1132
+ total_missing = total_missing.add_suffix('_missing_mins')
1133
+ max_consec_missing = max_consec_missing.add_suffix('_max_gap')
1134
+ avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
1135
+
1136
+ # Concatenate along columns (axis=1)
1137
+ combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
1138
+
1139
+ return combined_df
@@ -134,15 +134,22 @@ class ConfigManager:
134
134
  tables.
135
135
  """
136
136
 
137
- configure = configparser.ConfigParser()
138
- configure.read(self.config_directory)
139
-
140
- db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
137
+ db_table_info = {}
138
+ if len(table_headers) > 0:
139
+ configure = configparser.ConfigParser()
140
+ configure.read(self.config_directory)
141
+ db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
141
142
  db_table_info["database"] = self.db_connection_info["database"]
142
143
 
143
144
  print(f"Successfully fetched configuration information from file path {self.config_directory}.")
144
145
  return db_table_info
145
146
 
147
+ def get_table_name(self, header):
148
+ configure = configparser.ConfigParser()
149
+ configure.read(self.config_directory)
150
+
151
+ return configure.get(header, 'table_name')
152
+
146
153
  def get_db_name(self):
147
154
  """
148
155
  returns name of database that data will be uploaded to
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.6.10
3
+ Version: 0.7.1
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -0,0 +1,17 @@
1
+ ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
2
+ ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
3
+ ecopipeline/extract/extract.py,sha256=heWcWTeRVTRITh_1sHVnkaKOOi5PwUOEVIi4k5tw2Z8,43384
4
+ ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
5
+ ecopipeline/load/load.py,sha256=eNps857QRf0DZw_s90ShYbPMAyfOwbQJJfdKqYl7g-Y,22469
6
+ ecopipeline/transform/__init__.py,sha256=hYb4F64fXdXtjBSYCqv6gLFBwKZjjnl0z7s291pFE98,2505
7
+ ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
8
+ ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
9
+ ecopipeline/transform/transform.py,sha256=l1jw6uQ9Bqcb8tCydpxO3WEE5t_B3CKxfhohrlwCiXA,48944
10
+ ecopipeline/utils/ConfigManager.py,sha256=E7e2RC2FveigjREMfeaiFu9fNQ8_b0xlWDfLCywS92k,10088
11
+ ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
12
+ ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
13
+ ecopipeline-0.7.1.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ ecopipeline-0.7.1.dist-info/METADATA,sha256=Jr0oFXTGHK2XGj0Nx6fnmp3NIba-Znjhxf9TinY1Sx4,2329
15
+ ecopipeline-0.7.1.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
16
+ ecopipeline-0.7.1.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
17
+ ecopipeline-0.7.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,17 +0,0 @@
1
- ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
2
- ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
3
- ecopipeline/extract/extract.py,sha256=heWcWTeRVTRITh_1sHVnkaKOOi5PwUOEVIi4k5tw2Z8,43384
4
- ecopipeline/load/__init__.py,sha256=oDAVF8AhK_qugqegjW7jK16p-nb9QzKhiNQOkEBniKM,235
5
- ecopipeline/load/load.py,sha256=X7JIakIxyjzZbLuUjJ991kcQpyK4cFEZ0Lk36eXBEfI,21506
6
- ecopipeline/transform/__init__.py,sha256=7HuovqGHqrw0bZmeSCPMq1SkSRxJY8QBOBhR7y1JdBw,2400
7
- ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
8
- ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
9
- ecopipeline/transform/transform.py,sha256=kcJl6gzOmPx5K5pzcfQq17a5zInW4XfyjtwLxOMNlr4,46004
10
- ecopipeline/utils/ConfigManager.py,sha256=t4sfTjGO0g5P50XBQqGVFWaXfAlW1GMDh1DLoBuFGks,9826
11
- ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
12
- ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
13
- ecopipeline-0.6.10.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- ecopipeline-0.6.10.dist-info/METADATA,sha256=7Ru_udzflx21RI3z8s7ZWrXh8bB2-5xXxYNsihAZIlY,2330
15
- ecopipeline-0.6.10.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
16
- ecopipeline-0.6.10.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
17
- ecopipeline-0.6.10.dist-info/RECORD,,