ecopipeline 0.6.9__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecopipeline/load/__init__.py +3 -2
- ecopipeline/load/load.py +21 -0
- ecopipeline/transform/__init__.py +10 -3
- ecopipeline/transform/transform.py +97 -1
- ecopipeline/utils/ConfigManager.py +11 -4
- {ecopipeline-0.6.9.dist-info → ecopipeline-0.7.0.dist-info}/METADATA +1 -1
- ecopipeline-0.7.0.dist-info/RECORD +17 -0
- ecopipeline-0.6.9.dist-info/RECORD +0 -17
- {ecopipeline-0.6.9.dist-info → ecopipeline-0.7.0.dist-info}/WHEEL +0 -0
- {ecopipeline-0.6.9.dist-info → ecopipeline-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {ecopipeline-0.6.9.dist-info → ecopipeline-0.7.0.dist-info}/top_level.txt +0 -0
ecopipeline/load/__init__.py
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
1
|
-
from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
|
|
2
|
-
__all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"
|
|
1
|
+
from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss, load_data_statistics
|
|
2
|
+
__all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss",
|
|
3
|
+
"load_data_statistics"]
|
ecopipeline/load/load.py
CHANGED
|
@@ -460,6 +460,27 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
|
|
|
460
460
|
cursor.close()
|
|
461
461
|
return True
|
|
462
462
|
|
|
463
|
+
def load_data_statistics(config : ConfigManager, daily_stats_df : pd.DataFrame, config_daily_indicator : str = "day"):
|
|
464
|
+
"""
|
|
465
|
+
Logs data statistics for the site in a table with name "{daily table name}_stats"
|
|
466
|
+
|
|
467
|
+
Parameters
|
|
468
|
+
----------
|
|
469
|
+
config : ecopipeline.ConfigManager
|
|
470
|
+
The ConfigManager object that holds configuration data for the pipeline.
|
|
471
|
+
daily_stats_df : pd.DataFrame
|
|
472
|
+
dataframe created by the create_data_statistics_df() function in ecopipeline.transform
|
|
473
|
+
config_daily_indicator : str
|
|
474
|
+
the indicator of the daily_table name in the config.ini file of the data pipeline
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
bool:
|
|
479
|
+
A boolean value indicating if the data was successfully written to the database.
|
|
480
|
+
"""
|
|
481
|
+
table_name = f"{config.get_table_name(config_daily_indicator)}_stats"
|
|
482
|
+
return load_overwrite_database(config, daily_stats_df, config.get_db_table_info([]), config_daily_indicator, table_name=table_name)
|
|
483
|
+
|
|
463
484
|
def _generate_mysql_update_event_table(row, id):
|
|
464
485
|
statement = f"UPDATE site_events SET "
|
|
465
486
|
statment_elems = []
|
|
@@ -1,9 +1,16 @@
|
|
|
1
|
-
from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time,
|
|
2
|
-
|
|
1
|
+
from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, \
|
|
2
|
+
aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
|
|
3
|
+
convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
|
|
4
|
+
heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt
|
|
5
|
+
from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
|
|
6
|
+
nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
|
|
7
|
+
get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
|
|
8
|
+
lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
|
|
3
9
|
from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
|
|
4
10
|
__all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
|
|
5
11
|
"cop_method_1", "cop_method_2", "create_summary_tables", "remove_partial_days", "nclarity_filter_new", "site_specific", "condensate_calculations", "gas_valve_diff", "gather_outdoor_conditions", "aqsuite_prep_time",
|
|
6
12
|
"nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
|
|
7
13
|
"create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
|
|
8
14
|
"get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
|
|
9
|
-
"shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate"
|
|
15
|
+
"shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
|
|
16
|
+
"delete_erroneous_from_time_pt"]
|
|
@@ -306,6 +306,35 @@ def ffill_missing(original_df: pd.DataFrame, config : ConfigManager, previous_fi
|
|
|
306
306
|
df.apply(_ffill, args=(ffill_df,previous_fill))
|
|
307
307
|
return df
|
|
308
308
|
|
|
309
|
+
def delete_erroneous_from_time_pt(df: pd.DataFrame, time_point : pd.Timestamp, column_names : list, new_value = None) -> pd.DataFrame:
|
|
310
|
+
"""
|
|
311
|
+
Function will take a pandas dataframe and delete specified erroneous values at a specified time point.
|
|
312
|
+
|
|
313
|
+
Parameters
|
|
314
|
+
----------
|
|
315
|
+
df: pd.DataFrame
|
|
316
|
+
Timestamp indexed Pandas dataframe that needs to have an erroneous value removed
|
|
317
|
+
time_point : pd.Timestamp
|
|
318
|
+
The timepoint index the erroneous value takes place in
|
|
319
|
+
column_names : list
|
|
320
|
+
list of column names as strings that contain erroneous values at this time stamp
|
|
321
|
+
new_value : any
|
|
322
|
+
new value to populate the erroneous columns at this timestamp with. If set to None, will replace value with NaN
|
|
323
|
+
|
|
324
|
+
Returns
|
|
325
|
+
-------
|
|
326
|
+
pd.DataFrame:
|
|
327
|
+
Pandas dataframe with error values replaced with new value
|
|
328
|
+
"""
|
|
329
|
+
if new_value is None:
|
|
330
|
+
new_value = float('NaN') # Replace with NaN if new_value is not provided
|
|
331
|
+
|
|
332
|
+
if time_point in df.index:
|
|
333
|
+
for col in column_names:
|
|
334
|
+
df.loc[time_point, col] = new_value
|
|
335
|
+
|
|
336
|
+
return df
|
|
337
|
+
|
|
309
338
|
# TODO test this
|
|
310
339
|
def nullify_erroneous(original_df: pd.DataFrame, config : ConfigManager) -> pd.DataFrame:
|
|
311
340
|
"""
|
|
@@ -998,7 +1027,7 @@ def join_to_daily(daily_data: pd.DataFrame, cop_data: pd.DataFrame) -> pd.DataFr
|
|
|
998
1027
|
out_df = daily_data.join(cop_data)
|
|
999
1028
|
return out_df
|
|
1000
1029
|
|
|
1001
|
-
def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16):
|
|
1030
|
+
def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int = 16) -> pd.DataFrame:
|
|
1002
1031
|
"""
|
|
1003
1032
|
Function derates equipment COP based on R value
|
|
1004
1033
|
R12 - R16 : 12 %
|
|
@@ -1041,3 +1070,70 @@ def apply_equipment_cop_derate(df: pd.DataFrame, equip_cop_col: str, r_val : int
|
|
|
1041
1070
|
|
|
1042
1071
|
df[equip_cop_col] = df[equip_cop_col] * derate
|
|
1043
1072
|
return df
|
|
1073
|
+
|
|
1074
|
+
def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
1075
|
+
"""
|
|
1076
|
+
Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
|
|
1077
|
+
The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
|
|
1078
|
+
'_max_gap' respectively. the columns will carry the following statisctics:
|
|
1079
|
+
_missing_mins -> the number of minutes in the day that have no reported data value for the column
|
|
1080
|
+
_avg_gap -> the average gap (in minutes) between collected data values that day
|
|
1081
|
+
_max_gap -> the maximum gap (in minutes) between collected data values that day
|
|
1082
|
+
|
|
1083
|
+
Parameters
|
|
1084
|
+
----------
|
|
1085
|
+
df : pd.DataFrame
|
|
1086
|
+
minute data df after the rename_varriables() and before the ffill_missing() function has been called
|
|
1087
|
+
|
|
1088
|
+
Returns
|
|
1089
|
+
-------
|
|
1090
|
+
daily_data_stats : pd.DataFrame
|
|
1091
|
+
new dataframe with the columns descriped in the function's description
|
|
1092
|
+
"""
|
|
1093
|
+
min_time = df.index.min()
|
|
1094
|
+
start_day = min_time.floor('D')
|
|
1095
|
+
|
|
1096
|
+
# If min_time is not exactly at the start of the day, move to the next day
|
|
1097
|
+
if min_time != start_day:
|
|
1098
|
+
start_day = start_day + pd.tseries.offsets.Day(1)
|
|
1099
|
+
|
|
1100
|
+
# Build a complete minutely timestamp index over the full date range
|
|
1101
|
+
full_index = pd.date_range(start=start_day,
|
|
1102
|
+
end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
|
|
1103
|
+
freq='T')
|
|
1104
|
+
|
|
1105
|
+
# Reindex to include any completely missing minutes
|
|
1106
|
+
df_full = df.reindex(full_index)
|
|
1107
|
+
|
|
1108
|
+
# Resample daily to count missing values per column
|
|
1109
|
+
total_missing = df_full.isna().resample('D').sum().astype(int)
|
|
1110
|
+
|
|
1111
|
+
# Function to calculate max consecutive missing values
|
|
1112
|
+
def max_consecutive_nans(x):
|
|
1113
|
+
is_na = x.isna()
|
|
1114
|
+
groups = (is_na != is_na.shift()).cumsum()
|
|
1115
|
+
return is_na.groupby(groups).sum().max() or 0
|
|
1116
|
+
|
|
1117
|
+
# Function to calculate average consecutive missing values
|
|
1118
|
+
def avg_consecutive_nans(x):
|
|
1119
|
+
is_na = x.isna()
|
|
1120
|
+
groups = (is_na != is_na.shift()).cumsum()
|
|
1121
|
+
gap_lengths = is_na.groupby(groups).sum()
|
|
1122
|
+
gap_lengths = gap_lengths[gap_lengths > 0]
|
|
1123
|
+
if len(gap_lengths) == 0:
|
|
1124
|
+
return 0
|
|
1125
|
+
return gap_lengths.mean()
|
|
1126
|
+
|
|
1127
|
+
# Apply daily, per column
|
|
1128
|
+
max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
|
|
1129
|
+
avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
|
|
1130
|
+
|
|
1131
|
+
# Rename columns to include a suffix
|
|
1132
|
+
total_missing = total_missing.add_suffix('_missing_mins')
|
|
1133
|
+
max_consec_missing = max_consec_missing.add_suffix('_max_gap')
|
|
1134
|
+
avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
|
|
1135
|
+
|
|
1136
|
+
# Concatenate along columns (axis=1)
|
|
1137
|
+
combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
|
|
1138
|
+
|
|
1139
|
+
return combined_df
|
|
@@ -134,15 +134,22 @@ class ConfigManager:
|
|
|
134
134
|
tables.
|
|
135
135
|
"""
|
|
136
136
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
137
|
+
db_table_info = {}
|
|
138
|
+
if len(table_headers) > 0:
|
|
139
|
+
configure = configparser.ConfigParser()
|
|
140
|
+
configure.read(self.config_directory)
|
|
141
|
+
db_table_info = {header: {"table_name": configure.get(header, 'table_name')} for header in table_headers}
|
|
141
142
|
db_table_info["database"] = self.db_connection_info["database"]
|
|
142
143
|
|
|
143
144
|
print(f"Successfully fetched configuration information from file path {self.config_directory}.")
|
|
144
145
|
return db_table_info
|
|
145
146
|
|
|
147
|
+
def get_table_name(self, header):
|
|
148
|
+
configure = configparser.ConfigParser()
|
|
149
|
+
configure.read(self.config_directory)
|
|
150
|
+
|
|
151
|
+
return configure.get(header, 'table_name')
|
|
152
|
+
|
|
146
153
|
def get_db_name(self):
|
|
147
154
|
"""
|
|
148
155
|
returns name of database that data will be uploaded to
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
|
|
2
|
+
ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
|
|
3
|
+
ecopipeline/extract/extract.py,sha256=heWcWTeRVTRITh_1sHVnkaKOOi5PwUOEVIi4k5tw2Z8,43384
|
|
4
|
+
ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
|
|
5
|
+
ecopipeline/load/load.py,sha256=3Zk5AXBt-2goqUc5FoOghmHaxLUODGuU9iLPqQnLMqM,22470
|
|
6
|
+
ecopipeline/transform/__init__.py,sha256=hYb4F64fXdXtjBSYCqv6gLFBwKZjjnl0z7s291pFE98,2505
|
|
7
|
+
ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
|
|
8
|
+
ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
|
|
9
|
+
ecopipeline/transform/transform.py,sha256=klqGHAX34MmM6govdP-mj61wyrDF7RN4GW9q1WabbRI,48952
|
|
10
|
+
ecopipeline/utils/ConfigManager.py,sha256=E7e2RC2FveigjREMfeaiFu9fNQ8_b0xlWDfLCywS92k,10088
|
|
11
|
+
ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
|
|
12
|
+
ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
|
|
13
|
+
ecopipeline-0.7.0.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
ecopipeline-0.7.0.dist-info/METADATA,sha256=cb2ntfxBrE5PAQJHnOs2ei2B8mbJlfJsnLKC-_S1F04,2329
|
|
15
|
+
ecopipeline-0.7.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
16
|
+
ecopipeline-0.7.0.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
|
|
17
|
+
ecopipeline-0.7.0.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
|
|
2
|
-
ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
|
|
3
|
-
ecopipeline/extract/extract.py,sha256=heWcWTeRVTRITh_1sHVnkaKOOi5PwUOEVIi4k5tw2Z8,43384
|
|
4
|
-
ecopipeline/load/__init__.py,sha256=oDAVF8AhK_qugqegjW7jK16p-nb9QzKhiNQOkEBniKM,235
|
|
5
|
-
ecopipeline/load/load.py,sha256=X7JIakIxyjzZbLuUjJ991kcQpyK4cFEZ0Lk36eXBEfI,21506
|
|
6
|
-
ecopipeline/transform/__init__.py,sha256=YIE20XukPx-WiJ575PRgSPaCTtgTCiMqmFXPoE_yR1M,2337
|
|
7
|
-
ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
|
|
8
|
-
ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
|
|
9
|
-
ecopipeline/transform/transform.py,sha256=E-rvf1MOnlHSRoSpcdffjVbs1vzk_XMIz_xttxAKOxw,44898
|
|
10
|
-
ecopipeline/utils/ConfigManager.py,sha256=t4sfTjGO0g5P50XBQqGVFWaXfAlW1GMDh1DLoBuFGks,9826
|
|
11
|
-
ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
|
|
12
|
-
ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
|
|
13
|
-
ecopipeline-0.6.9.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
ecopipeline-0.6.9.dist-info/METADATA,sha256=awBO9IOMk8QznQCN-hIcWgzWnFVGxJcF-6ABTF6wMG8,2329
|
|
15
|
-
ecopipeline-0.6.9.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
16
|
-
ecopipeline-0.6.9.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
|
|
17
|
-
ecopipeline-0.6.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|