ecopipeline 0.6.10__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (24) hide show
  1. {ecopipeline-0.6.10/src/ecopipeline.egg-info → ecopipeline-0.7.0}/PKG-INFO +1 -1
  2. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/setup.cfg +1 -1
  3. ecopipeline-0.7.0/src/ecopipeline/load/__init__.py +3 -0
  4. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/load/load.py +21 -0
  5. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/__init__.py +10 -3
  6. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/transform.py +68 -1
  7. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/utils/ConfigManager.py +11 -4
  8. {ecopipeline-0.6.10 → ecopipeline-0.7.0/src/ecopipeline.egg-info}/PKG-INFO +1 -1
  9. ecopipeline-0.6.10/src/ecopipeline/load/__init__.py +0 -2
  10. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/LICENSE +0 -0
  11. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/README.md +0 -0
  12. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/pyproject.toml +0 -0
  13. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/setup.py +0 -0
  14. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/__init__.py +0 -0
  15. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/extract/__init__.py +0 -0
  16. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/extract/extract.py +0 -0
  17. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/bayview.py +0 -0
  18. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/lbnl.py +0 -0
  19. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/utils/__init__.py +0 -0
  20. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/utils/unit_convert.py +0 -0
  21. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
  22. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  23. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/requires.txt +0 -0
  24. {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.6.10
3
+ Version: 0.7.0
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 0.6.10
3
+ version = 0.7.0
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -0,0 +1,3 @@
1
+ from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss, load_data_statistics
2
+ __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss",
3
+ "load_data_statistics"]
@@ -460,6 +460,27 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
460
460
  cursor.close()
461
461
  return True
462
462
 
463
+ def load_data_statistics(config : ConfigManager, daily_stats_df : pd.DataFrame, config_daily_indicator : str = "day"):
464
+ """
465
+ Logs data statistics for the site in a table with name "{daily table name}_stats"
466
+
467
+ Parameters
468
+ ----------
469
+ config : ecopipeline.ConfigManager
470
+ The ConfigManager object that holds configuration data for the pipeline.
471
+ daily_stats_df : pd.DataFrame
472
+ dataframe created by the create_data_statistics_df() function in ecopipeline.transform
473
+ config_daily_indicator : str
474
+ the indicator of the daily_table name in the config.ini file of the data pipeline
475
+
476
+ Returns
477
+ -------
478
+ bool:
479
+ A boolean value indicating if the data was successfully written to the database.
480
+ """
481
+ table_name = f"{config.get_table_name(config_daily_indicator)}_stats"
482
+ return load_overwrite_database(config, daily_stats_df, config.get_db_table_info([]), config_daily_indicator, table_name=table_name)
483
+
463
484
  def _generate_mysql_update_event_table(row, id):
464
485
  statement = f"UPDATE site_events SET "
465
486
  statment_elems = []
@@ -1,9 +1,16 @@
1
- from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, delete_erroneous_from_time_pt
2
- from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
1
+ from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, \
2
+ aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
3
+ convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
4
+ heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt
5
+ from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
6
+ nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
7
+ get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
8
+ lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
3
9
  from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
4
10
  __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
5
11
  "cop_method_1", "cop_method_2", "create_summary_tables", "remove_partial_days", "nclarity_filter_new", "site_specific", "condensate_calculations", "gas_valve_diff", "gather_outdoor_conditions", "aqsuite_prep_time",
6
12
  "nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
7
13
  "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
8
14
  "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
9
- "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","delete_erroneous_from_time_pt"]
15
+ "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
16
+ "delete_erroneous_from_time_pt"]
@@ -1027,7 +1027,7 @@ def join_to_daily(daily_data: pd.DataFrame, cop_data: pd.DataFrame) -> pd.DataFr
1027
1027
  out_df = daily_data.join(cop_data)
1028
1028
  return out_df
1029
1029
 
1030
- def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16):
1030
+ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16) -> pd.DataFrame:
1031
1031
  """
1032
1032
  Function derates equipment COP based on R value
1033
1033
  R12 - R16 : 12 %
@@ -1070,3 +1070,70 @@ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int
1070
1070
 
1071
1071
  df[equip_cop_col] = df[equip_cop_col] * derate
1072
1072
  return df
1073
+
1074
+ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1075
+ """
1076
+ Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
1077
+ The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
1078
+ '_max_gap' respectively. the columns will carry the following statisctics:
1079
+ _missing_mins -> the number of minutes in the day that have no reported data value for the column
1080
+ _avg_gap -> the average gap (in minutes) between collected data values that day
1081
+ _max_gap -> the maximum gap (in minutes) between collected data values that day
1082
+
1083
+ Parameters
1084
+ ----------
1085
+ df : pd.DataFrame
1086
+ minute data df after the rename_varriables() and before the ffill_missing() function has been called
1087
+
1088
+ Returns
1089
+ -------
1090
+ daily_data_stats : pd.DataFrame
1091
+ new dataframe with the columns descriped in the function's description
1092
+ """
1093
+ min_time = df.index.min()
1094
+ start_day = min_time.floor('D')
1095
+
1096
+ # If min_time is not exactly at the start of the day, move to the next day
1097
+ if min_time != start_day:
1098
+ start_day = start_day + pd.tseries.offsets.Day(1)
1099
+
1100
+ # Build a complete minutely timestamp index over the full date range
1101
+ full_index = pd.date_range(start=start_day,
1102
+ end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
1103
+ freq='T')
1104
+
1105
+ # Reindex to include any completely missing minutes
1106
+ df_full = df.reindex(full_index)
1107
+
1108
+ # Resample daily to count missing values per column
1109
+ total_missing = df_full.isna().resample('D').sum().astype(int)
1110
+
1111
+ # Function to calculate max consecutive missing values
1112
+ def max_consecutive_nans(x):
1113
+ is_na = x.isna()
1114
+ groups = (is_na != is_na.shift()).cumsum()
1115
+ return is_na.groupby(groups).sum().max() or 0
1116
+
1117
+ # Function to calculate average consecutive missing values
1118
+ def avg_consecutive_nans(x):
1119
+ is_na = x.isna()
1120
+ groups = (is_na != is_na.shift()).cumsum()
1121
+ gap_lengths = is_na.groupby(groups).sum()
1122
+ gap_lengths = gap_lengths[gap_lengths > 0]
1123
+ if len(gap_lengths) == 0:
1124
+ return 0
1125
+ return gap_lengths.mean()
1126
+
1127
+ # Apply daily, per column
1128
+ max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
1129
+ avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
1130
+
1131
+ # Rename columns to include a suffix
1132
+ total_missing = total_missing.add_suffix('_missing_mins')
1133
+ max_consec_missing = max_consec_missing.add_suffix('_max_gap')
1134
+ avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
1135
+
1136
+ # Concatenate along columns (axis=1)
1137
+ combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
1138
+
1139
+ return combined_df
@@ -134,15 +134,22 @@ class ConfigManager:
134
134
  tables.
135
135
  """
136
136
 
137
- configure = configparser.ConfigParser()
138
- configure.read(self.config_directory)
139
-
140
- db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
137
+ db_table_info = {}
138
+ if len(table_headers) > 0:
139
+ configure = configparser.ConfigParser()
140
+ configure.read(self.config_directory)
141
+ db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
141
142
  db_table_info["database"] = self.db_connection_info["database"]
142
143
 
143
144
  print(f"Successfully fetched configuration information from file path {self.config_directory}.")
144
145
  return db_table_info
145
146
 
147
+ def get_table_name(self, header):
148
+ configure = configparser.ConfigParser()
149
+ configure.read(self.config_directory)
150
+
151
+ return configure.get(header, 'table_name')
152
+
146
153
  def get_db_name(self):
147
154
  """
148
155
  returns name of database that data will be uploaded to
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.6.10
3
+ Version: 0.7.0
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,2 +0,0 @@
1
- from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
2
- __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"]
File without changes
File without changes
File without changes
File without changes