ecopipeline 0.6.9__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {ecopipeline-0.6.9/src/ecopipeline.egg-info → ecopipeline-0.7.0}/PKG-INFO +1 -1
  2. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/setup.cfg +1 -1
  3. ecopipeline-0.7.0/src/ecopipeline/load/__init__.py +3 -0
  4. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/load/load.py +21 -0
  5. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/transform/__init__.py +10 -3
  6. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/transform/transform.py +97 -1
  7. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/utils/ConfigManager.py +11 -4
  8. {ecopipeline-0.6.9 → ecopipeline-0.7.0/src/ecopipeline.egg-info}/PKG-INFO +1 -1
  9. ecopipeline-0.6.9/src/ecopipeline/load/__init__.py +0 -2
  10. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/LICENSE +0 -0
  11. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/README.md +0 -0
  12. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/pyproject.toml +0 -0
  13. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/setup.py +0 -0
  14. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/__init__.py +0 -0
  15. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/extract/__init__.py +0 -0
  16. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/extract/extract.py +0 -0
  17. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/transform/bayview.py +0 -0
  18. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/transform/lbnl.py +0 -0
  19. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/utils/__init__.py +0 -0
  20. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline/utils/unit_convert.py +0 -0
  21. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
  22. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  23. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/requires.txt +0 -0
  24. {ecopipeline-0.6.9 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.6.9
3
+ Version: 0.7.0
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 0.6.9
3
+ version = 0.7.0
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -0,0 +1,3 @@
1
+ from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss, load_data_statistics
2
+ __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss",
3
+ "load_data_statistics"]
@@ -460,6 +460,27 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
460
460
  cursor.close()
461
461
  return True
462
462
 
463
+ def load_data_statistics(config : ConfigManager, daily_stats_df : pd.DataFrame, config_daily_indicator : str = "day"):
464
+ """
465
+ Logs data statistics for the site in a table with name "{daily table name}_stats"
466
+
467
+ Parameters
468
+ ----------
469
+ config : ecopipeline.ConfigManager
470
+ The ConfigManager object that holds configuration data for the pipeline.
471
+ daily_stats_df : pd.DataFrame
472
+ dataframe created by the create_data_statistics_df() function in ecopipeline.transform
473
+ config_daily_indicator : str
474
+ the indicator of the daily_table name in the config.ini file of the data pipeline
475
+
476
+ Returns
477
+ -------
478
+ bool:
479
+ A boolean value indicating if the data was successfully written to the database.
480
+ """
481
+ table_name = f"{config.get_table_name(config_daily_indicator)}_stats"
482
+ return load_overwrite_database(config, daily_stats_df, config.get_db_table_info([]), config_daily_indicator, table_name=table_name)
483
+
463
484
  def _generate_mysql_update_event_table(row, id):
464
485
  statement = f"UPDATE site_events SET "
465
486
  statment_elems = []
@@ -1,9 +1,16 @@
1
- from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity, apply_equipment_cop_derate
2
- from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
1
+ from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, \
2
+ aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
3
+ convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
4
+ heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt
5
+ from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
6
+ nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
7
+ get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
8
+ lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
3
9
  from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
4
10
  __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
5
11
  "cop_method_1", "cop_method_2", "create_summary_tables", "remove_partial_days", "nclarity_filter_new", "site_specific", "condensate_calculations", "gas_valve_diff", "gather_outdoor_conditions", "aqsuite_prep_time",
6
12
  "nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
7
13
  "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
8
14
  "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
9
- "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate"]
15
+ "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
16
+ "delete_erroneous_from_time_pt"]
@@ -306,6 +306,35 @@ def ffill_missing(original_df: pd.DataFrame, config : ConfigManager, previous_fi
306
306
  df.apply(_ffill, args=(ffill_df,previous_fill))
307
307
  return df
308
308
 
309
+ def delete_erroneous_from_time_pt(df: pd.DataFrame, time_point : pd.Timestamp, column_names : list, new_value = None) -> pd.DataFrame:
310
+ """
311
+ Function will take a pandas dataframe and delete specified erroneous values at a specified time point.
312
+
313
+ Parameters
314
+ ----------
315
+ df: pd.DataFrame
316
+ Timestamp indexed Pandas dataframe that needs to have an erroneous value removed
317
+ time_point : pd.Timestamp
318
+ The timepoint index the erroneous value takes place in
319
+ column_names : list
320
+ list of column names as strings that contain erroneous values at this time stamp
321
+ new_value : any
322
+ new value to populate the erroneous columns at this timestamp with. If set to None, will replace value with NaN
323
+
324
+ Returns
325
+ -------
326
+ pd.DataFrame:
327
+ Pandas dataframe with error values replaced with new value
328
+ """
329
+ if new_value is None:
330
+ new_value = float('NaN') # Replace with NaN if new_value is not provided
331
+
332
+ if time_point in df.index:
333
+ for col in column_names:
334
+ df.loc[time_point, col] = new_value
335
+
336
+ return df
337
+
309
338
  # TODO test this
310
339
  def nullify_erroneous(original_df: pd.DataFrame, config : ConfigManager) -> pd.DataFrame:
311
340
  """
@@ -998,7 +1027,7 @@ def join_to_daily(daily_data: pd.DataFrame, cop_data: pd.DataFrame) -> pd.DataFr
998
1027
  out_df = daily_data.join(cop_data)
999
1028
  return out_df
1000
1029
 
1001
- def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16):
1030
+ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16) -> pd.DataFrame:
1002
1031
  """
1003
1032
  Function derates equipment COP based on R value
1004
1033
  R12 - R16 : 12 %
@@ -1041,3 +1070,70 @@ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int
1041
1070
 
1042
1071
  df[equip_cop_col] = df[equip_cop_col] * derate
1043
1072
  return df
1073
+
1074
+ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1075
+ """
1076
+ Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
1077
+ The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
1078
+ '_max_gap' respectively. the columns will carry the following statisctics:
1079
+ _missing_mins -> the number of minutes in the day that have no reported data value for the column
1080
+ _avg_gap -> the average gap (in minutes) between collected data values that day
1081
+ _max_gap -> the maximum gap (in minutes) between collected data values that day
1082
+
1083
+ Parameters
1084
+ ----------
1085
+ df : pd.DataFrame
1086
+ minute data df after the rename_varriables() and before the ffill_missing() function has been called
1087
+
1088
+ Returns
1089
+ -------
1090
+ daily_data_stats : pd.DataFrame
1091
+ new dataframe with the columns descriped in the function's description
1092
+ """
1093
+ min_time = df.index.min()
1094
+ start_day = min_time.floor('D')
1095
+
1096
+ # If min_time is not exactly at the start of the day, move to the next day
1097
+ if min_time != start_day:
1098
+ start_day = start_day + pd.tseries.offsets.Day(1)
1099
+
1100
+ # Build a complete minutely timestamp index over the full date range
1101
+ full_index = pd.date_range(start=start_day,
1102
+ end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
1103
+ freq='T')
1104
+
1105
+ # Reindex to include any completely missing minutes
1106
+ df_full = df.reindex(full_index)
1107
+
1108
+ # Resample daily to count missing values per column
1109
+ total_missing = df_full.isna().resample('D').sum().astype(int)
1110
+
1111
+ # Function to calculate max consecutive missing values
1112
+ def max_consecutive_nans(x):
1113
+ is_na = x.isna()
1114
+ groups = (is_na != is_na.shift()).cumsum()
1115
+ return is_na.groupby(groups).sum().max() or 0
1116
+
1117
+ # Function to calculate average consecutive missing values
1118
+ def avg_consecutive_nans(x):
1119
+ is_na = x.isna()
1120
+ groups = (is_na != is_na.shift()).cumsum()
1121
+ gap_lengths = is_na.groupby(groups).sum()
1122
+ gap_lengths = gap_lengths[gap_lengths > 0]
1123
+ if len(gap_lengths) == 0:
1124
+ return 0
1125
+ return gap_lengths.mean()
1126
+
1127
+ # Apply daily, per column
1128
+ max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
1129
+ avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
1130
+
1131
+ # Rename columns to include a suffix
1132
+ total_missing = total_missing.add_suffix('_missing_mins')
1133
+ max_consec_missing = max_consec_missing.add_suffix('_max_gap')
1134
+ avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
1135
+
1136
+ # Concatenate along columns (axis=1)
1137
+ combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
1138
+
1139
+ return combined_df
@@ -134,15 +134,22 @@ class ConfigManager:
134
134
  tables.
135
135
  """
136
136
 
137
- configure = configparser.ConfigParser()
138
- configure.read(self.config_directory)
139
-
140
- db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
137
+ db_table_info = {}
138
+ if len(table_headers) > 0:
139
+ configure = configparser.ConfigParser()
140
+ configure.read(self.config_directory)
141
+ db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
141
142
  db_table_info["database"] = self.db_connection_info["database"]
142
143
 
143
144
  print(f"Successfully fetched configuration information from file path {self.config_directory}.")
144
145
  return db_table_info
145
146
 
147
+ def get_table_name(self, header):
148
+ configure = configparser.ConfigParser()
149
+ configure.read(self.config_directory)
150
+
151
+ return configure.get(header, 'table_name')
152
+
146
153
  def get_db_name(self):
147
154
  """
148
155
  returns name of database that data will be uploaded to
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.6.9
3
+ Version: 0.7.0
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,2 +0,0 @@
1
- from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
2
- __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"]
File without changes
File without changes
File without changes
File without changes