ecopipeline 0.6.10__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ecopipeline-0.6.10/src/ecopipeline.egg-info → ecopipeline-0.7.0}/PKG-INFO +1 -1
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/setup.cfg +1 -1
- ecopipeline-0.7.0/src/ecopipeline/load/__init__.py +3 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/load/load.py +21 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/__init__.py +10 -3
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/transform.py +68 -1
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/utils/ConfigManager.py +11 -4
- {ecopipeline-0.6.10 → ecopipeline-0.7.0/src/ecopipeline.egg-info}/PKG-INFO +1 -1
- ecopipeline-0.6.10/src/ecopipeline/load/__init__.py +0 -2
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/LICENSE +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/README.md +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/pyproject.toml +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/setup.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/__init__.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/extract/__init__.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/extract/extract.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/bayview.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/transform/lbnl.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/utils/__init__.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline/utils/unit_convert.py +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/requires.txt +0 -0
- {ecopipeline-0.6.10 → ecopipeline-0.7.0}/src/ecopipeline.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = ecopipeline
|
|
3
|
-
version = 0.
|
|
3
|
+
version = 0.7.0
|
|
4
4
|
authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
|
|
5
5
|
description = Contains functions for use in Ecotope Datapipelines
|
|
6
6
|
long_description = file: README.md
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss, load_data_statistics
|
|
2
|
+
__all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss",
|
|
3
|
+
"load_data_statistics"]
|
|
@@ -460,6 +460,27 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
|
|
|
460
460
|
cursor.close()
|
|
461
461
|
return True
|
|
462
462
|
|
|
463
|
+
def load_data_statistics(config : ConfigManager, daily_stats_df : pd.DataFrame, config_daily_indicator : str = "day"):
|
|
464
|
+
"""
|
|
465
|
+
Logs data statistics for the site in a table with name "{daily table name}_stats"
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
config : ecopipeline.ConfigManager
|
|
470
|
+
The ConfigManager object that holds configuration data for the pipeline.
|
|
471
|
+
daily_stats_df : pd.DataFrame
|
|
472
|
+
dataframe created by the create_data_statistics_df() function in ecopipeline.transform
|
|
473
|
+
config_daily_indicator : str
|
|
474
|
+
the indicator of the daily_table name in the config.ini file of the data pipeline
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
bool:
|
|
479
|
+
A boolean value indicating if the data was successfully written to the database.
|
|
480
|
+
"""
|
|
481
|
+
table_name = f"{config.get_table_name(config_daily_indicator)}_stats"
|
|
482
|
+
return load_overwrite_database(config, daily_stats_df, config.get_db_table_info([]), config_daily_indicator, table_name=table_name)
|
|
483
|
+
|
|
463
484
|
def _generate_mysql_update_event_table(row, id):
|
|
464
485
|
statement = f"UPDATE site_events SET "
|
|
465
486
|
statment_elems = []
|
|
@@ -1,9 +1,16 @@
|
|
|
1
|
-
from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time,
|
|
2
|
-
|
|
1
|
+
from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, \
|
|
2
|
+
aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
|
|
3
|
+
convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
|
|
4
|
+
heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt
|
|
5
|
+
from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
|
|
6
|
+
nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
|
|
7
|
+
get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
|
|
8
|
+
lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
|
|
3
9
|
from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
|
|
4
10
|
__all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
|
|
5
11
|
"cop_method_1", "cop_method_2", "create_summary_tables", "remove_partial_days", "nclarity_filter_new", "site_specific", "condensate_calculations", "gas_valve_diff", "gather_outdoor_conditions", "aqsuite_prep_time",
|
|
6
12
|
"nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
|
|
7
13
|
"create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
|
|
8
14
|
"get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
|
|
9
|
-
"shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","
|
|
15
|
+
"shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
|
|
16
|
+
"delete_erroneous_from_time_pt"]
|
|
@@ -1027,7 +1027,7 @@ def join_to_daily(daily_data: pd.DataFrame, cop_data: pd.DataFrame) -> pd.DataFr
|
|
|
1027
1027
|
out_df = daily_data.join(cop_data)
|
|
1028
1028
|
return out_df
|
|
1029
1029
|
|
|
1030
|
-
def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16):
|
|
1030
|
+
def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16) -> pd.DataFrame:
|
|
1031
1031
|
"""
|
|
1032
1032
|
Function derates equipment COP based on R value
|
|
1033
1033
|
R12 - R16 : 12 %
|
|
@@ -1070,3 +1070,70 @@ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int
|
|
|
1070
1070
|
|
|
1071
1071
|
df[equip_cop_col] = df[equip_cop_col] * derate
|
|
1072
1072
|
return df
|
|
1073
|
+
|
|
1074
|
+
def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
1075
|
+
"""
|
|
1076
|
+
Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
|
|
1077
|
+
The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
|
|
1078
|
+
'_max_gap' respectively. the columns will carry the following statisctics:
|
|
1079
|
+
_missing_mins -> the number of minutes in the day that have no reported data value for the column
|
|
1080
|
+
_avg_gap -> the average gap (in minutes) between collected data values that day
|
|
1081
|
+
_max_gap -> the maximum gap (in minutes) between collected data values that day
|
|
1082
|
+
|
|
1083
|
+
Parameters
|
|
1084
|
+
----------
|
|
1085
|
+
df : pd.DataFrame
|
|
1086
|
+
minute data df after the rename_varriables() and before the ffill_missing() function has been called
|
|
1087
|
+
|
|
1088
|
+
Returns
|
|
1089
|
+
-------
|
|
1090
|
+
daily_data_stats : pd.DataFrame
|
|
1091
|
+
new dataframe with the columns descriped in the function's description
|
|
1092
|
+
"""
|
|
1093
|
+
min_time = df.index.min()
|
|
1094
|
+
start_day = min_time.floor('D')
|
|
1095
|
+
|
|
1096
|
+
# If min_time is not exactly at the start of the day, move to the next day
|
|
1097
|
+
if min_time != start_day:
|
|
1098
|
+
start_day = start_day + pd.tseries.offsets.Day(1)
|
|
1099
|
+
|
|
1100
|
+
# Build a complete minutely timestamp index over the full date range
|
|
1101
|
+
full_index = pd.date_range(start=start_day,
|
|
1102
|
+
end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
|
|
1103
|
+
freq='T')
|
|
1104
|
+
|
|
1105
|
+
# Reindex to include any completely missing minutes
|
|
1106
|
+
df_full = df.reindex(full_index)
|
|
1107
|
+
|
|
1108
|
+
# Resample daily to count missing values per column
|
|
1109
|
+
total_missing = df_full.isna().resample('D').sum().astype(int)
|
|
1110
|
+
|
|
1111
|
+
# Function to calculate max consecutive missing values
|
|
1112
|
+
def max_consecutive_nans(x):
|
|
1113
|
+
is_na = x.isna()
|
|
1114
|
+
groups = (is_na != is_na.shift()).cumsum()
|
|
1115
|
+
return is_na.groupby(groups).sum().max() or 0
|
|
1116
|
+
|
|
1117
|
+
# Function to calculate average consecutive missing values
|
|
1118
|
+
def avg_consecutive_nans(x):
|
|
1119
|
+
is_na = x.isna()
|
|
1120
|
+
groups = (is_na != is_na.shift()).cumsum()
|
|
1121
|
+
gap_lengths = is_na.groupby(groups).sum()
|
|
1122
|
+
gap_lengths = gap_lengths[gap_lengths > 0]
|
|
1123
|
+
if len(gap_lengths) == 0:
|
|
1124
|
+
return 0
|
|
1125
|
+
return gap_lengths.mean()
|
|
1126
|
+
|
|
1127
|
+
# Apply daily, per column
|
|
1128
|
+
max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
|
|
1129
|
+
avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
|
|
1130
|
+
|
|
1131
|
+
# Rename columns to include a suffix
|
|
1132
|
+
total_missing = total_missing.add_suffix('_missing_mins')
|
|
1133
|
+
max_consec_missing = max_consec_missing.add_suffix('_max_gap')
|
|
1134
|
+
avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
|
|
1135
|
+
|
|
1136
|
+
# Concatenate along columns (axis=1)
|
|
1137
|
+
combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
|
|
1138
|
+
|
|
1139
|
+
return combined_df
|
|
@@ -134,15 +134,22 @@ class ConfigManager:
|
|
|
134
134
|
tables.
|
|
135
135
|
"""
|
|
136
136
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
137
|
+
db_table_info = {}
|
|
138
|
+
if len(table_headers) > 0:
|
|
139
|
+
configure = configparser.ConfigParser()
|
|
140
|
+
configure.read(self.config_directory)
|
|
141
|
+
db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
|
|
141
142
|
db_table_info["database"] = self.db_connection_info["database"]
|
|
142
143
|
|
|
143
144
|
print(f"Successfully fetched configuration information from file path {self.config_directory}.")
|
|
144
145
|
return db_table_info
|
|
145
146
|
|
|
147
|
+
def get_table_name(self, header):
|
|
148
|
+
configure = configparser.ConfigParser()
|
|
149
|
+
configure.read(self.config_directory)
|
|
150
|
+
|
|
151
|
+
return configure.get(header, 'table_name')
|
|
152
|
+
|
|
146
153
|
def get_db_name(self):
|
|
147
154
|
"""
|
|
148
155
|
returns name of database that data will be uploaded to
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|