ecopipeline 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecopipeline/event_tracking/Alarm.py +317 -0
- ecopipeline/event_tracking/__init__.py +20 -2
- ecopipeline/event_tracking/alarms/AbnormalCOP.py +76 -0
- ecopipeline/event_tracking/alarms/BackupUse.py +94 -0
- ecopipeline/event_tracking/alarms/BalancingValve.py +78 -0
- ecopipeline/event_tracking/alarms/BlownFuse.py +72 -0
- ecopipeline/event_tracking/alarms/Boundary.py +90 -0
- ecopipeline/event_tracking/alarms/HPWHInlet.py +73 -0
- ecopipeline/event_tracking/alarms/HPWHOutage.py +96 -0
- ecopipeline/event_tracking/alarms/HPWHOutlet.py +85 -0
- ecopipeline/event_tracking/alarms/LSInconsist.py +114 -0
- ecopipeline/event_tracking/alarms/PowerRatio.py +111 -0
- ecopipeline/event_tracking/alarms/SOOChange.py +127 -0
- ecopipeline/event_tracking/alarms/ShortCycle.py +59 -0
- ecopipeline/event_tracking/alarms/TMSetpoint.py +127 -0
- ecopipeline/event_tracking/alarms/TempRange.py +84 -0
- ecopipeline/event_tracking/alarms/__init__.py +0 -0
- ecopipeline/event_tracking/event_tracking.py +517 -704
- ecopipeline/extract/__init__.py +2 -2
- ecopipeline/extract/extract.py +84 -0
- ecopipeline/load/__init__.py +2 -2
- ecopipeline/load/load.py +304 -3
- ecopipeline/transform/transform.py +1 -1
- ecopipeline/utils/ConfigManager.py +15 -2
- {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/METADATA +1 -1
- ecopipeline-1.1.0.dist-info/RECORD +41 -0
- {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/WHEEL +1 -1
- ecopipeline-1.0.4.dist-info/RECORD +0 -25
- {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/licenses/LICENSE +0 -0
- {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/top_level.txt +0 -0
|
@@ -5,33 +5,93 @@ from ecopipeline import ConfigManager
|
|
|
5
5
|
import re
|
|
6
6
|
import mysql.connector.errors as mysqlerrors
|
|
7
7
|
from datetime import timedelta
|
|
8
|
+
from .alarms.ShortCycle import ShortCycle
|
|
9
|
+
from .alarms.TempRange import TempRange
|
|
10
|
+
from .alarms.LSInconsist import LSInconsist
|
|
11
|
+
from .alarms.SOOChange import SOOChange
|
|
12
|
+
from .alarms.BlownFuse import BlownFuse
|
|
13
|
+
from .alarms.HPWHOutage import HPWHOutage
|
|
14
|
+
from .alarms.BackupUse import BackupUse
|
|
15
|
+
from .alarms.HPWHOutlet import HPWHOutlet
|
|
16
|
+
from .alarms.HPWHInlet import HPWHInlet
|
|
17
|
+
from .alarms.BalancingValve import BalancingValve
|
|
18
|
+
from .alarms.TMSetpoint import TMSetpoint
|
|
19
|
+
from .alarms.AbnormalCOP import AbnormalCOP
|
|
20
|
+
from .alarms.PowerRatio import PowerRatio
|
|
21
|
+
from .alarms.Boundary import Boundary
|
|
8
22
|
|
|
9
23
|
def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config : ConfigManager, system: str = "",
|
|
10
24
|
default_cop_high_bound : float = 4.5, default_cop_low_bound : float = 0,
|
|
11
25
|
default_boundary_fault_time : int = 15, site_name : str = None, day_table_name_header : str = "day",
|
|
12
26
|
power_ratio_period_days : int = 7) -> pd.DataFrame:
|
|
27
|
+
if df.empty:
|
|
28
|
+
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
29
|
+
return pd.DataFrame()
|
|
30
|
+
variable_names_path = config.get_var_names_path()
|
|
31
|
+
try:
|
|
32
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
33
|
+
except FileNotFoundError:
|
|
34
|
+
print("File Not Found: ", variable_names_path)
|
|
35
|
+
return pd.DataFrame()
|
|
36
|
+
if (system != ""):
|
|
37
|
+
if not 'system' in bounds_df.columns:
|
|
38
|
+
raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
|
|
39
|
+
bounds_df = bounds_df.loc[bounds_df['system'] == system]
|
|
40
|
+
|
|
13
41
|
day_list = daily_data.index.to_list()
|
|
14
42
|
print('Checking for alarms...')
|
|
15
43
|
alarm_df = _convert_silent_alarm_dict_to_df({})
|
|
16
44
|
dict_of_alarms = {}
|
|
17
|
-
dict_of_alarms['boundary'] =
|
|
18
|
-
|
|
19
|
-
dict_of_alarms['
|
|
20
|
-
|
|
21
|
-
dict_of_alarms['
|
|
22
|
-
|
|
45
|
+
dict_of_alarms['boundary'] = Boundary(bounds_df, default_fault_time= default_boundary_fault_time)
|
|
46
|
+
# flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
|
|
47
|
+
dict_of_alarms['power ratio'] = PowerRatio(bounds_df, day_table_name = config.get_table_name(day_table_name_header), ratio_period_days=power_ratio_period_days)
|
|
48
|
+
# power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
|
|
49
|
+
dict_of_alarms['abnormal COP'] = AbnormalCOP(bounds_df, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
|
|
50
|
+
# flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
|
|
51
|
+
dict_of_alarms['temperature maintenance setpoint'] = TMSetpoint(bounds_df)
|
|
52
|
+
# flag_high_tm_setpoint(df, daily_data, config, system=system)
|
|
53
|
+
dict_of_alarms['recirculation loop balancing valve'] = BalancingValve(bounds_df)
|
|
54
|
+
# flag_recirc_balance_valve(daily_data, config, system=system)
|
|
55
|
+
dict_of_alarms['HPWH inlet temperature'] = HPWHInlet(bounds_df)
|
|
56
|
+
# flag_hp_inlet_temp(df, daily_data, config, system)
|
|
57
|
+
dict_of_alarms['HPWH outlet temperature'] = HPWHOutlet(bounds_df)
|
|
58
|
+
# flag_hp_outlet_temp(df, daily_data, config, system)
|
|
59
|
+
dict_of_alarms['improper backup heating use'] = BackupUse(bounds_df)
|
|
60
|
+
# flag_backup_use(df, daily_data, config, system)
|
|
61
|
+
dict_of_alarms['HPWH outage'] = HPWHOutage(bounds_df, day_table_name = config.get_table_name(day_table_name_header))
|
|
62
|
+
# flag_HP_outage(df, daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system)
|
|
63
|
+
dict_of_alarms['blown equipment fuse'] = BlownFuse(bounds_df)
|
|
64
|
+
# flag_blown_fuse(df, daily_data, config, system)
|
|
65
|
+
dict_of_alarms['unexpected SOO change'] = SOOChange(bounds_df)
|
|
66
|
+
# flag_unexpected_soo_change(df, daily_data, config, system)
|
|
67
|
+
dict_of_alarms['short cycle'] = ShortCycle(bounds_df)
|
|
68
|
+
# flag_shortcycle(df, daily_data, config, system)
|
|
69
|
+
dict_of_alarms['unexpected temperature'] = TempRange(bounds_df)
|
|
70
|
+
# flag_unexpected_temp(df, daily_data, config, system)
|
|
71
|
+
dict_of_alarms['demand response inconsistency'] = LSInconsist(bounds_df)
|
|
72
|
+
# flag_ls_mode_inconsistancy(df, daily_data, config, system)
|
|
73
|
+
# return alarm.find_alarms(df, daily_df, config)
|
|
23
74
|
|
|
24
75
|
ongoing_COP_exception = ['abnormal COP']
|
|
25
|
-
|
|
26
76
|
for key, value in dict_of_alarms.items():
|
|
27
|
-
if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
|
|
28
|
-
|
|
29
|
-
|
|
77
|
+
# if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
|
|
78
|
+
# print("Ongoing DATA_LOSS_COP detected. ABNORMAL_COP events will be uploaded")
|
|
79
|
+
specific_alarm_df = value.find_alarms(df, daily_data, config)
|
|
80
|
+
if len(specific_alarm_df) > 0:
|
|
30
81
|
print(f"Detected {key} alarm(s). Adding to event df...")
|
|
31
|
-
alarm_df = pd.concat([alarm_df,
|
|
82
|
+
alarm_df = pd.concat([alarm_df, specific_alarm_df])
|
|
32
83
|
else:
|
|
33
84
|
print(f"No {key} alarm(s) detected.")
|
|
34
85
|
|
|
86
|
+
# for key, value in dict_of_alarms.items():
|
|
87
|
+
# if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
|
|
88
|
+
# print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
|
|
89
|
+
# elif len(value) > 0:
|
|
90
|
+
# print(f"Detected {key} alarm(s). Adding to event df...")
|
|
91
|
+
# alarm_df = pd.concat([alarm_df, value])
|
|
92
|
+
# else:
|
|
93
|
+
# print(f"No {key} alarm(s) detected.")
|
|
94
|
+
|
|
35
95
|
return alarm_df
|
|
36
96
|
|
|
37
97
|
def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system: str = "", default_high_bound : float = 4.5, default_low_bound : float = 0) -> pd.DataFrame:
|
|
@@ -41,65 +101,9 @@ def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system:
|
|
|
41
101
|
except FileNotFoundError:
|
|
42
102
|
print("File Not Found: ", variable_names_path)
|
|
43
103
|
return pd.DataFrame()
|
|
104
|
+
alarm = AbnormalCOP(bounds_df, default_high_bound, default_low_bound)
|
|
105
|
+
return alarm.find_alarms(None, daily_data, config)
|
|
44
106
|
|
|
45
|
-
if (system != ""):
|
|
46
|
-
if not 'system' in bounds_df.columns:
|
|
47
|
-
raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
|
|
48
|
-
bounds_df = bounds_df.loc[bounds_df['system'] == system]
|
|
49
|
-
if not "variable_name" in bounds_df.columns:
|
|
50
|
-
raise Exception(f"variable_name is not present in Variable_Names.csv")
|
|
51
|
-
if not 'pretty_name' in bounds_df.columns:
|
|
52
|
-
bounds_df['pretty_name'] = bounds_df['variable_name']
|
|
53
|
-
else:
|
|
54
|
-
bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
|
|
55
|
-
if not 'high_alarm' in bounds_df.columns:
|
|
56
|
-
bounds_df['high_alarm'] = default_high_bound
|
|
57
|
-
else:
|
|
58
|
-
bounds_df['high_alarm'] = bounds_df['high_alarm'].fillna(default_high_bound)
|
|
59
|
-
if not 'low_alarm' in bounds_df.columns:
|
|
60
|
-
bounds_df['low_alarm'] = default_low_bound
|
|
61
|
-
else:
|
|
62
|
-
bounds_df['low_alarm'] = bounds_df['low_alarm'].fillna(default_low_bound)
|
|
63
|
-
|
|
64
|
-
bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "pretty_name"]]
|
|
65
|
-
bounds_df.dropna(axis=0, thresh=2, inplace=True)
|
|
66
|
-
bounds_df.set_index(['variable_name'], inplace=True)
|
|
67
|
-
|
|
68
|
-
cop_pattern = re.compile(r'^(COP\w*|SystemCOP\w*)$')
|
|
69
|
-
cop_columns = [col for col in daily_data.columns if re.match(cop_pattern, col)]
|
|
70
|
-
|
|
71
|
-
alarms_dict = {}
|
|
72
|
-
if not daily_data.empty and len(cop_columns) > 0:
|
|
73
|
-
for bound_var, bounds in bounds_df.iterrows():
|
|
74
|
-
if bound_var in cop_columns:
|
|
75
|
-
for day, day_values in daily_data.iterrows():
|
|
76
|
-
if not day_values[bound_var] is None and (day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']):
|
|
77
|
-
alarm_str = f"Unexpected COP Value detected: {bounds['pretty_name']} = {round(day_values[bound_var],2)}"
|
|
78
|
-
if day in alarms_dict:
|
|
79
|
-
alarms_dict[day].append([bound_var, alarm_str])
|
|
80
|
-
else:
|
|
81
|
-
alarms_dict[day] = [[bound_var, alarm_str]]
|
|
82
|
-
return _convert_event_type_dict_to_df(alarms_dict, event_type="SILENT_ALARM")
|
|
83
|
-
|
|
84
|
-
def _check_if_during_ongoing_cop_alarm(daily_df : pd.DataFrame, config : ConfigManager, site_name : str = None) -> bool:
|
|
85
|
-
if site_name is None:
|
|
86
|
-
site_name = config.get_site_name()
|
|
87
|
-
connection, cursor = config.connect_db()
|
|
88
|
-
on_going_cop = False
|
|
89
|
-
try:
|
|
90
|
-
# find existing times in database for upsert statement
|
|
91
|
-
cursor.execute(
|
|
92
|
-
f"SELECT id FROM site_events WHERE start_time_pt <= '{daily_df.index.min()}' AND (end_time_pt IS NULL OR end_time_pt >= '{daily_df.index.max()}') AND site_name = '{site_name}' AND event_type = 'DATA_LOSS_COP'")
|
|
93
|
-
# Fetch the results into a DataFrame
|
|
94
|
-
existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id'])
|
|
95
|
-
if not existing_rows.empty:
|
|
96
|
-
on_going_cop = True
|
|
97
|
-
|
|
98
|
-
except mysqlerrors.Error as e:
|
|
99
|
-
print(f"Retrieving data from site_events caused exception: {e}")
|
|
100
|
-
connection.close()
|
|
101
|
-
cursor.close()
|
|
102
|
-
return on_going_cop
|
|
103
107
|
|
|
104
108
|
def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 15, system: str = "", full_days : list = None) -> pd.DataFrame:
|
|
105
109
|
"""
|
|
@@ -113,13 +117,13 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
|
|
|
113
117
|
are out of order or have gaps, the function may return erroneous alarms.
|
|
114
118
|
config : ecopipeline.ConfigManager
|
|
115
119
|
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
116
|
-
called
|
|
120
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
117
121
|
The file must have at least three columns which must be titled "variable_name", "low_alarm", and "high_alarm" which should contain the
|
|
118
122
|
name of each variable in the dataframe that requires the alarming, the lower bound for acceptable data, and the upper bound for
|
|
119
123
|
acceptable data respectively
|
|
120
124
|
default_fault_time : int
|
|
121
125
|
Number of consecutive minutes that a sensor must be out of bounds for to trigger an alarm. Can be customized for each variable with
|
|
122
|
-
the fault_time column in
|
|
126
|
+
the fault_time column in Variable_Names.csv
|
|
123
127
|
system: str
|
|
124
128
|
string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
|
|
125
129
|
full_days : list
|
|
@@ -139,51 +143,10 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
|
|
|
139
143
|
except FileNotFoundError:
|
|
140
144
|
print("File Not Found: ", variable_names_path)
|
|
141
145
|
return pd.DataFrame()
|
|
146
|
+
alarm = Boundary(bounds_df, default_fault_time)
|
|
147
|
+
return alarm.find_alarms(df, None, config)
|
|
142
148
|
|
|
143
|
-
|
|
144
|
-
if not 'system' in bounds_df.columns:
|
|
145
|
-
raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
|
|
146
|
-
bounds_df = bounds_df.loc[bounds_df['system'] == system]
|
|
147
|
-
|
|
148
|
-
required_columns = ["variable_name", "high_alarm", "low_alarm"]
|
|
149
|
-
for required_column in required_columns:
|
|
150
|
-
if not required_column in bounds_df.columns:
|
|
151
|
-
raise Exception(f"{required_column} is not present in Variable_Names.csv")
|
|
152
|
-
if not 'pretty_name' in bounds_df.columns:
|
|
153
|
-
bounds_df['pretty_name'] = bounds_df['variable_name']
|
|
154
|
-
else:
|
|
155
|
-
bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
|
|
156
|
-
if not 'fault_time' in bounds_df.columns:
|
|
157
|
-
bounds_df['fault_time'] = default_fault_time
|
|
158
|
-
|
|
159
|
-
idx = df.index
|
|
160
|
-
if full_days is None:
|
|
161
|
-
full_days = pd.to_datetime(pd.Series(idx).dt.normalize().unique())
|
|
162
|
-
|
|
163
|
-
bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "fault_time", "pretty_name"]]
|
|
164
|
-
bounds_df.dropna(axis=0, thresh=2, inplace=True)
|
|
165
|
-
bounds_df.set_index(['variable_name'], inplace=True)
|
|
166
|
-
# ensure that lower and upper bounds are numbers
|
|
167
|
-
bounds_df['high_alarm'] = pd.to_numeric(bounds_df['high_alarm'], errors='coerce').astype(float)
|
|
168
|
-
bounds_df['low_alarm'] = pd.to_numeric(bounds_df['low_alarm'], errors='coerce').astype(float)
|
|
169
|
-
bounds_df['fault_time'] = pd.to_numeric(bounds_df['fault_time'], errors='coerce').astype('Int64')
|
|
170
|
-
bounds_df = bounds_df[bounds_df.index.notnull()]
|
|
171
|
-
alarms = {}
|
|
172
|
-
for bound_var, bounds in bounds_df.iterrows():
|
|
173
|
-
if bound_var in df.columns:
|
|
174
|
-
lower_mask = df[bound_var] < bounds["low_alarm"]
|
|
175
|
-
upper_mask = df[bound_var] > bounds["high_alarm"]
|
|
176
|
-
if pd.isna(bounds['fault_time']):
|
|
177
|
-
bounds['fault_time'] = default_fault_time
|
|
178
|
-
for day in full_days:
|
|
179
|
-
if bounds['fault_time'] < 1 :
|
|
180
|
-
print(f"Could not process alarm for {bound_var}. Fault time must be greater than or equal to 1 minute.")
|
|
181
|
-
_check_and_add_alarm(df, lower_mask, alarms, day, bounds["fault_time"], bound_var, bounds['pretty_name'], 'Lower')
|
|
182
|
-
_check_and_add_alarm(df, upper_mask, alarms, day, bounds["fault_time"], bound_var, bounds['pretty_name'], 'Upper')
|
|
183
|
-
|
|
184
|
-
return _convert_silent_alarm_dict_to_df(alarms)
|
|
185
|
-
|
|
186
|
-
def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
|
|
149
|
+
def flag_high_tm_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
|
|
187
150
|
system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
|
|
188
151
|
default_power_ratio : float = 0.4) -> pd.DataFrame:
|
|
189
152
|
"""
|
|
@@ -191,10 +154,10 @@ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config :
|
|
|
191
154
|
and create an dataframe with applicable alarm events
|
|
192
155
|
|
|
193
156
|
VarNames syntax:
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
157
|
+
TMSTPT_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
|
|
158
|
+
TMSTPT_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
|
|
159
|
+
TMSTPT_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
|
|
160
|
+
TMSTPT_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
|
|
198
161
|
|
|
199
162
|
Parameters
|
|
200
163
|
----------
|
|
@@ -205,9 +168,9 @@ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config :
|
|
|
205
168
|
post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
|
|
206
169
|
config : ecopipeline.ConfigManager
|
|
207
170
|
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
208
|
-
called
|
|
171
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
209
172
|
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
210
|
-
name of each variable in the dataframe that requires alarming and the
|
|
173
|
+
name of each variable in the dataframe that requires alarming and the TMSTPT alarm codes (e.g., TMSTPT_T_1:140, TMSTPT_SP_1:2.0)
|
|
211
174
|
default_fault_time : int
|
|
212
175
|
Number of consecutive minutes for T+SP alarms (default 3). T+SP alarms trigger when tank is powered and temperature exceeds
|
|
213
176
|
setpoint for this many consecutive minutes.
|
|
@@ -234,117 +197,148 @@ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config :
|
|
|
234
197
|
except FileNotFoundError:
|
|
235
198
|
print("File Not Found: ", variable_names_path)
|
|
236
199
|
return pd.DataFrame()
|
|
200
|
+
alarm = TMSetpoint(bounds_df, default_fault_time, default_setpoint, default_power_indication, default_power_ratio)
|
|
201
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
237
202
|
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
'SP': default_power_indication,
|
|
241
|
-
'TP': default_power_ratio,
|
|
242
|
-
'ST': default_setpoint},
|
|
243
|
-
system)
|
|
244
|
-
if bounds_df.empty:
|
|
245
|
-
return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
|
|
246
|
-
|
|
247
|
-
# Process each unique alarm_code_id
|
|
248
|
-
alarms = {}
|
|
249
|
-
for day in daily_df.index:
|
|
250
|
-
next_day = day + pd.Timedelta(days=1)
|
|
251
|
-
filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
|
|
252
|
-
alarmed_for_day = False
|
|
253
|
-
for alarm_id in bounds_df['alarm_code_id'].unique():
|
|
254
|
-
id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
|
|
255
|
-
|
|
256
|
-
# Get T and SP alarm codes for this ID
|
|
257
|
-
t_codes = id_group[id_group['alarm_code_type'] == 'T']
|
|
258
|
-
sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
|
|
259
|
-
tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
|
|
260
|
-
st_codes = id_group[id_group['alarm_code_type'] == 'ST']
|
|
261
|
-
|
|
262
|
-
# Check for multiple T or SP codes with same ID
|
|
263
|
-
if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
|
|
264
|
-
raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
|
|
265
|
-
|
|
266
|
-
# Check if we have both T and SP
|
|
267
|
-
if len(t_codes) == 1 and len(sp_codes) == 1:
|
|
268
|
-
t_var_name = t_codes.iloc[0]['variable_name']
|
|
269
|
-
sp_var_name = sp_codes.iloc[0]['variable_name']
|
|
270
|
-
sp_power_indication = sp_codes.iloc[0]['bound']
|
|
271
|
-
t_setpoint = t_codes.iloc[0]['bound']
|
|
272
|
-
# Check if both variables exist in df
|
|
273
|
-
if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
|
|
274
|
-
# Check for consecutive minutes where SP > default_power_indication
|
|
275
|
-
# AND T >= default_setpoint
|
|
276
|
-
power_mask = filtered_df[sp_var_name] >= sp_power_indication
|
|
277
|
-
temp_mask = filtered_df[t_var_name] >= t_setpoint
|
|
278
|
-
combined_mask = power_mask & temp_mask
|
|
279
|
-
|
|
280
|
-
# Check for 3 consecutive minutes
|
|
281
|
-
consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
|
|
282
|
-
if consecutive_condition.any():
|
|
283
|
-
# Get the first index where condition was met
|
|
284
|
-
first_true_index = consecutive_condition.idxmax()
|
|
285
|
-
# Adjust for the rolling window (first fault_time-1 minutes don't count)
|
|
286
|
-
adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
|
|
287
|
-
_add_an_alarm(alarms, adjusted_time, sp_var_name, f"High swing tank setpoint: Swing tank was powered at {adjusted_time} although temperature was above {t_setpoint}.")
|
|
288
|
-
alarmed_for_day = True
|
|
289
|
-
if not alarmed_for_day and len(st_codes) == 1:
|
|
290
|
-
st_var_name = st_codes.iloc[0]['variable_name']
|
|
291
|
-
st_setpoint = st_codes.iloc[0]['bound']
|
|
292
|
-
# Check if st_var_name exists in filtered_df
|
|
293
|
-
if st_var_name in filtered_df.columns:
|
|
294
|
-
# Check if setpoint was altered for over 10 minutes
|
|
295
|
-
altered_mask = filtered_df[st_var_name] != st_setpoint
|
|
296
|
-
consecutive_condition = altered_mask.rolling(window=10).min() == 1
|
|
297
|
-
if consecutive_condition.any():
|
|
298
|
-
# Get the first index where condition was met
|
|
299
|
-
first_true_index = consecutive_condition.idxmax()
|
|
300
|
-
# Adjust for the rolling window
|
|
301
|
-
adjusted_time = first_true_index - pd.Timedelta(minutes=9)
|
|
302
|
-
_add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
|
|
303
|
-
alarmed_for_day = True
|
|
304
|
-
if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
|
|
305
|
-
tp_var_name = tp_codes.iloc[0]['variable_name']
|
|
306
|
-
sp_var_name = sp_codes.iloc[0]['variable_name']
|
|
307
|
-
tp_ratio = tp_codes.iloc[0]['bound']
|
|
308
|
-
# Check if both variables exist in df
|
|
309
|
-
if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
|
|
310
|
-
# Check if swing tank power ratio exceeds threshold
|
|
311
|
-
if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
|
|
312
|
-
power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
|
|
313
|
-
if power_ratio > tp_ratio:
|
|
314
|
-
_add_an_alarm(alarms, day, sp_var_name, f"High swing tank power ratio: Swing tank accounted for more than {tp_ratio * 100}% of daily power.")
|
|
315
|
-
return _convert_silent_alarm_dict_to_df(alarms)
|
|
316
|
-
|
|
317
|
-
def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
|
|
203
|
+
def flag_backup_use(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager,
|
|
204
|
+
system: str = "", default_setpoint : float = 130.0, default_power_ratio : float = 0.1) -> pd.DataFrame:
|
|
318
205
|
"""
|
|
319
206
|
Function will take a pandas dataframe and location of alarm information in a csv,
|
|
320
207
|
and create an dataframe with applicable alarm events
|
|
321
208
|
|
|
322
209
|
VarNames syntax:
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
210
|
+
BU_P_ID - Back Up Tank Power Variable. Must be in same power units as total system power
|
|
211
|
+
BU_TP_ID:### - Total System Power for ratio alarming for alarming if back up power is more than ### (40% default) of usage
|
|
212
|
+
BU_ST_ID:### - Back Up Setpoint that should not change at all from ### (default 130)
|
|
326
213
|
|
|
327
214
|
Parameters
|
|
328
215
|
----------
|
|
216
|
+
df: pd.DataFrame
|
|
217
|
+
post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
|
|
218
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
329
219
|
daily_df: pd.DataFrame
|
|
330
|
-
post-transformed dataframe for daily data. Used for checking
|
|
331
|
-
power to heating output power.
|
|
220
|
+
post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
|
|
332
221
|
config : ecopipeline.ConfigManager
|
|
333
222
|
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
334
|
-
called
|
|
223
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
335
224
|
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
336
|
-
name of each variable in the dataframe that requires alarming and the
|
|
225
|
+
name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
|
|
337
226
|
system: str
|
|
338
227
|
string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
|
|
228
|
+
default_setpoint : float
|
|
229
|
+
Default temperature setpoint in degrees for T and ST alarm codes when no custom bound is specified (default 130.0)
|
|
230
|
+
default_power_indication : float
|
|
231
|
+
Default power threshold in kW for SP alarm codes when no custom bound is specified (default 1.0)
|
|
339
232
|
default_power_ratio : float
|
|
340
|
-
Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for
|
|
341
|
-
Alarm triggers when sum of ER equipment >= (OUT value / default_power_ratio)
|
|
233
|
+
Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4)
|
|
342
234
|
|
|
343
235
|
Returns
|
|
344
236
|
-------
|
|
345
237
|
pd.DataFrame:
|
|
346
238
|
Pandas dataframe with alarm events
|
|
347
239
|
"""
|
|
240
|
+
if df.empty:
|
|
241
|
+
print("cannot flag swing tank setpoint alarms. Dataframe is empty")
|
|
242
|
+
return pd.DataFrame()
|
|
243
|
+
variable_names_path = config.get_var_names_path()
|
|
244
|
+
try:
|
|
245
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
246
|
+
except FileNotFoundError:
|
|
247
|
+
print("File Not Found: ", variable_names_path)
|
|
248
|
+
return pd.DataFrame()
|
|
249
|
+
alarm = BackupUse(bounds_df, default_setpoint, default_power_ratio)
|
|
250
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
251
|
+
|
|
252
|
+
def flag_HP_outage(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", default_power_ratio : float = 0.3,
|
|
253
|
+
ratio_period_days : int = 7) -> pd.DataFrame:
|
|
254
|
+
"""
|
|
255
|
+
Detects possible heat pump failures or outages by checking if heat pump power consumption falls below
|
|
256
|
+
an expected ratio of total system power over a rolling period, or by checking for non-zero values in
|
|
257
|
+
a direct alarm variable from the heat pump controller.
|
|
258
|
+
|
|
259
|
+
VarNames syntax:
|
|
260
|
+
HPOUT_POW_[OPTIONAL ID]:### - Heat pump power variable. ### is the minimum expected ratio of HP power to total power
|
|
261
|
+
(default 0.3 for 30%). Must be in same power units as total system power.
|
|
262
|
+
HPOUT_TP_[OPTIONAL ID] - Total system power variable for ratio comparison. Required when using POW codes.
|
|
263
|
+
HPOUT_ALRM_[OPTIONAL ID] - Direct alarm variable from HP controller. Alarm triggers if any non-zero value is detected.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
df: pd.DataFrame
|
|
268
|
+
Post-transformed dataframe for minute data. Used for checking ALRM codes for non-zero values.
|
|
269
|
+
daily_df: pd.DataFrame
|
|
270
|
+
Post-transformed dataframe for daily data. Used for checking power ratios over the rolling period.
|
|
271
|
+
config : ecopipeline.ConfigManager
|
|
272
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
273
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
274
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
275
|
+
name of each variable in the dataframe that requires alarming and the HPOUT alarm codes (e.g., HPOUT_POW_1:0.3, HPOUT_TP_1, HPOUT_ALRM_1).
|
|
276
|
+
day_table_name : str
|
|
277
|
+
Name of the daily database table to fetch previous days' data for the rolling period calculation.
|
|
278
|
+
system: str
|
|
279
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
280
|
+
default_power_ratio : float
|
|
281
|
+
Default minimum power ratio threshold (as decimal, e.g., 0.3 for 30%) for POW alarm codes when no custom bound is specified (default 0.3).
|
|
282
|
+
An alarm triggers if HP power falls below this ratio of total power over the rolling period.
|
|
283
|
+
ratio_period_days : int
|
|
284
|
+
Number of days to use for the rolling power ratio calculation (default 7). Must be greater than 1.
|
|
285
|
+
|
|
286
|
+
Returns
|
|
287
|
+
-------
|
|
288
|
+
pd.DataFrame:
|
|
289
|
+
Pandas dataframe with alarm events
|
|
290
|
+
"""
|
|
291
|
+
if df.empty:
|
|
292
|
+
print("cannot flag swing tank setpoint alarms. Dataframe is empty")
|
|
293
|
+
return pd.DataFrame()
|
|
294
|
+
variable_names_path = config.get_var_names_path()
|
|
295
|
+
try:
|
|
296
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
297
|
+
except FileNotFoundError:
|
|
298
|
+
print("File Not Found: ", variable_names_path)
|
|
299
|
+
return pd.DataFrame()
|
|
300
|
+
|
|
301
|
+
alarm = HPWHOutage(bounds_df, day_table_name, default_power_ratio, ratio_period_days)
|
|
302
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
303
|
+
|
|
304
|
+
def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
|
|
305
|
+
"""
|
|
306
|
+
Detects recirculation balance issues by comparing sum of ER (equipment recirculation) heater
|
|
307
|
+
power to either total power or heating output.
|
|
308
|
+
|
|
309
|
+
VarNames syntax:
|
|
310
|
+
BV_ER_[OPTIONAL ID] - Indicates a power variable for an ER heater (equipment recirculation).
|
|
311
|
+
Multiple ER variables with the same ID will be summed together.
|
|
312
|
+
BV_TP_[OPTIONAL ID]:### - Indicates the Total Power of the system. Optional ### for the percentage
|
|
313
|
+
threshold that should not be crossed by the ER elements (default 0.4 for 40%).
|
|
314
|
+
Alarm triggers when sum of ER >= total_power * threshold.
|
|
315
|
+
BV_OUT_[OPTIONAL ID] - Indicates the heating output variable the ER heating contributes to.
|
|
316
|
+
Alarm triggers when sum of ER > sum of OUT * 0.95 (i.e., ER exceeds 95% of heating output).
|
|
317
|
+
Multiple OUT variables with the same ID will be summed together.
|
|
318
|
+
|
|
319
|
+
Note: Each alarm ID requires at least one ER code AND either one TP code OR at least one OUT code.
|
|
320
|
+
If a TP code exists for an ID, it takes precedence over OUT codes.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
daily_df: pd.DataFrame
|
|
325
|
+
Post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
|
|
326
|
+
power to total power or heating output power.
|
|
327
|
+
config : ecopipeline.ConfigManager
|
|
328
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
329
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
330
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
331
|
+
name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_TP_1:0.3)
|
|
332
|
+
system: str
|
|
333
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
334
|
+
default_power_ratio : float
|
|
335
|
+
Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4).
|
|
336
|
+
|
|
337
|
+
Returns
|
|
338
|
+
-------
|
|
339
|
+
pd.DataFrame:
|
|
340
|
+
Pandas dataframe with alarm events
|
|
341
|
+
"""
|
|
348
342
|
if daily_df.empty:
|
|
349
343
|
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
350
344
|
return pd.DataFrame()
|
|
@@ -354,36 +348,8 @@ def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, sy
|
|
|
354
348
|
except FileNotFoundError:
|
|
355
349
|
print("File Not Found: ", variable_names_path)
|
|
356
350
|
return pd.DataFrame()
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
system)
|
|
360
|
-
if bounds_df.empty:
|
|
361
|
-
return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
|
|
362
|
-
# Process each unique alarm_code_id
|
|
363
|
-
alarms = {}
|
|
364
|
-
for alarm_id in bounds_df['alarm_code_id'].unique():
|
|
365
|
-
id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
|
|
366
|
-
out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
|
|
367
|
-
out_var_name = out_codes.iloc[0]['variable_name']
|
|
368
|
-
out_bound = out_codes.iloc[0]['bound']
|
|
369
|
-
er_codes = id_group[id_group['alarm_code_type'] == 'ER']
|
|
370
|
-
if len(out_codes) > 1 or len(er_codes) < 1:
|
|
371
|
-
raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
|
|
372
|
-
for day in daily_df.index:
|
|
373
|
-
if out_var_name in daily_df.columns:
|
|
374
|
-
# Get list of ER variable names
|
|
375
|
-
er_var_names = er_codes['variable_name'].tolist()
|
|
376
|
-
|
|
377
|
-
# Check if all ER variables exist in daily_df
|
|
378
|
-
if all(var in daily_df.columns for var in er_var_names):
|
|
379
|
-
# Sum all ER variables for this day
|
|
380
|
-
er_sum = daily_df.loc[day, er_var_names].sum()
|
|
381
|
-
out_value = daily_df.loc[day, out_var_name]
|
|
382
|
-
|
|
383
|
-
# Check if sum of ER >= OUT value
|
|
384
|
-
if er_sum >= out_value*out_bound:
|
|
385
|
-
_add_an_alarm(alarms, day, out_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(out_bound * 100):.2f}% of heating output.")
|
|
386
|
-
return _convert_silent_alarm_dict_to_df(alarms)
|
|
351
|
+
alarm = BalancingValve(bounds_df, default_power_ratio)
|
|
352
|
+
return alarm.find_alarms(None, daily_df, config)
|
|
387
353
|
|
|
388
354
|
def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
|
|
389
355
|
default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
|
|
@@ -406,7 +372,7 @@ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Config
|
|
|
406
372
|
post-transformed dataframe for daily data.
|
|
407
373
|
config : ecopipeline.ConfigManager
|
|
408
374
|
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
409
|
-
called
|
|
375
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
410
376
|
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
411
377
|
name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
|
|
412
378
|
system: str
|
|
@@ -434,193 +400,335 @@ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Config
|
|
|
434
400
|
except FileNotFoundError:
|
|
435
401
|
print("File Not Found: ", variable_names_path)
|
|
436
402
|
return pd.DataFrame()
|
|
403
|
+
alarm = HPWHInlet(bounds_df, default_power_threshold, default_temp_threshold, fault_time)
|
|
404
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
437
405
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
# Process each unique alarm_code_id
|
|
446
|
-
alarms = {}
|
|
447
|
-
for alarm_id in bounds_df['alarm_code_id'].unique():
|
|
448
|
-
for day in daily_df.index:
|
|
449
|
-
next_day = day + pd.Timedelta(days=1)
|
|
450
|
-
filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
|
|
451
|
-
id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
|
|
452
|
-
pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
|
|
453
|
-
pow_var_name = pow_codes.iloc[0]['variable_name']
|
|
454
|
-
pow_thresh = pow_codes.iloc[0]['bound']
|
|
455
|
-
t_codes = id_group[id_group['alarm_code_type'] == 'T']
|
|
456
|
-
t_var_name = t_codes.iloc[0]['variable_name']
|
|
457
|
-
t_pretty_name = t_codes.iloc[0]['pretty_name']
|
|
458
|
-
t_thresh = t_codes.iloc[0]['bound']
|
|
459
|
-
if len(t_codes) != 1 or len(pow_codes) != 1:
|
|
460
|
-
raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
|
|
461
|
-
if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
|
|
462
|
-
# Check for consecutive minutes where both power and temp exceed thresholds
|
|
463
|
-
power_mask = filtered_df[pow_var_name] > pow_thresh
|
|
464
|
-
temp_mask = filtered_df[t_var_name] > t_thresh
|
|
465
|
-
combined_mask = power_mask & temp_mask
|
|
466
|
-
|
|
467
|
-
# Check for fault_time consecutive minutes
|
|
468
|
-
consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
|
|
469
|
-
if consecutive_condition.any():
|
|
470
|
-
first_true_index = consecutive_condition.idxmax()
|
|
471
|
-
adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
|
|
472
|
-
_add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
|
|
473
|
-
|
|
474
|
-
return _convert_silent_alarm_dict_to_df(alarms)
|
|
475
|
-
|
|
476
|
-
def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "") -> pd.DataFrame:
|
|
477
|
-
# Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
|
|
478
|
-
if (system != ""):
|
|
479
|
-
if not 'system' in bounds_df.columns:
|
|
480
|
-
raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
|
|
481
|
-
bounds_df = bounds_df.loc[bounds_df['system'] == system]
|
|
406
|
+
def flag_hp_outlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
|
|
407
|
+
default_temp_threshold : float = 140.0, fault_time : int = 5) -> pd.DataFrame:
|
|
408
|
+
"""
|
|
409
|
+
Detects low heat pump outlet temperature by checking if the outlet temperature falls below a threshold
|
|
410
|
+
while the heat pump is running. The first 10 minutes after each HP turn-on are excluded as a warmup
|
|
411
|
+
period. An alarm triggers if the temperature stays below the threshold for `fault_time` consecutive
|
|
412
|
+
minutes after the warmup period.
|
|
482
413
|
|
|
483
|
-
|
|
484
|
-
for
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
bounds_df['pretty_name'] = bounds_df['variable_name']
|
|
489
|
-
else:
|
|
490
|
-
bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
|
|
414
|
+
VarNames syntax:
|
|
415
|
+
HPO_POW_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
|
|
416
|
+
the heat pump is considered 'on'.
|
|
417
|
+
HPO_T_[OPTIONAL ID]:### - Indicates heat pump outlet temperature variable. ### is the temperature threshold (default 140.0)
|
|
418
|
+
that should always be exceeded while the heat pump is on after the 10-minute warmup period.
|
|
491
419
|
|
|
492
|
-
|
|
493
|
-
|
|
420
|
+
Parameters
|
|
421
|
+
----------
|
|
422
|
+
df: pd.DataFrame
|
|
423
|
+
Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
|
|
424
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
425
|
+
daily_df: pd.DataFrame
|
|
426
|
+
Post-transformed dataframe for daily data.
|
|
427
|
+
config : ecopipeline.ConfigManager
|
|
428
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
429
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
430
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
431
|
+
name of each variable in the dataframe that requires alarming and the HPO alarm codes (e.g., HPO_POW_1:1.0, HPO_T_1:140.0).
|
|
432
|
+
system: str
|
|
433
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
434
|
+
default_power_threshold : float
|
|
435
|
+
Default power threshold for POW alarm codes when no custom bound is specified (default 1.0). Heat pump is considered 'on'
|
|
436
|
+
when power exceeds this value.
|
|
437
|
+
default_temp_threshold : float
|
|
438
|
+
Default temperature threshold for T alarm codes when no custom bound is specified (default 140.0). Alarm triggers when
|
|
439
|
+
temperature falls BELOW this value while heat pump is on (after warmup period).
|
|
440
|
+
fault_time : int
|
|
441
|
+
Number of consecutive minutes that temperature must be below threshold (after warmup) before triggering an alarm (default 5).
|
|
494
442
|
|
|
495
|
-
|
|
496
|
-
|
|
443
|
+
Returns
|
|
444
|
+
-------
|
|
445
|
+
pd.DataFrame:
|
|
446
|
+
Pandas dataframe with alarm events
|
|
447
|
+
"""
|
|
448
|
+
if df.empty:
|
|
449
|
+
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
450
|
+
return pd.DataFrame()
|
|
451
|
+
variable_names_path = config.get_var_names_path()
|
|
452
|
+
try:
|
|
453
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
454
|
+
except FileNotFoundError:
|
|
455
|
+
print("File Not Found: ", variable_names_path)
|
|
497
456
|
return pd.DataFrame()
|
|
498
457
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
458
|
+
alarm = HPWHOutlet(bounds_df, default_power_threshold, default_temp_threshold, fault_time)
|
|
459
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
460
|
+
|
|
461
|
+
def flag_blown_fuse(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
|
|
462
|
+
default_power_range : float = 2.0, default_power_draw : float = 30, fault_time : int = 3) -> pd.DataFrame:
|
|
463
|
+
"""
|
|
464
|
+
Detects blown fuse alarms for heating elements by identifying when an element is drawing power
|
|
465
|
+
but significantly less than expected, which may indicate a blown fuse.
|
|
466
|
+
|
|
467
|
+
VarNames syntax:
|
|
468
|
+
BF_[OPTIONAL ID]:### - Indicates a blown fuse alarm for an element. ### is the expected kW input when the element is on.
|
|
469
|
+
|
|
470
|
+
Parameters
|
|
471
|
+
----------
|
|
472
|
+
df: pd.DataFrame
|
|
473
|
+
Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
|
|
474
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
475
|
+
daily_df: pd.DataFrame
|
|
476
|
+
Post-transformed dataframe for daily data.
|
|
477
|
+
config : ecopipeline.ConfigManager
|
|
478
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
479
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
480
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
481
|
+
name of each variable in the dataframe that requires alarming and the BF alarm codes (e.g., BF:30, BF_1:25).
|
|
482
|
+
system: str
|
|
483
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
484
|
+
default_power_threshold : float
|
|
485
|
+
Power threshold to determine if the element is "on" (default 1.0). Element is considered on when power exceeds this value.
|
|
486
|
+
default_power_range : float
|
|
487
|
+
Allowable variance below the expected power draw (default 2.0). An alarm triggers when the actual power draw is less than
|
|
488
|
+
(expected_power_draw - default_power_range) while the element is on.
|
|
489
|
+
default_power_draw : float
|
|
490
|
+
Default expected power draw in kW when no custom bound is specified in the alarm code (default 30).
|
|
491
|
+
fault_time : int
|
|
492
|
+
Number of consecutive minutes that the fault condition must persist before triggering an alarm (default 3).
|
|
493
|
+
|
|
494
|
+
Returns
|
|
495
|
+
-------
|
|
496
|
+
pd.DataFrame:
|
|
497
|
+
Pandas dataframe with alarm events
|
|
498
|
+
"""
|
|
499
|
+
if df.empty:
|
|
500
|
+
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
501
|
+
return pd.DataFrame()
|
|
502
|
+
variable_names_path = config.get_var_names_path()
|
|
503
|
+
try:
|
|
504
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
505
|
+
except FileNotFoundError:
|
|
506
|
+
print("File Not Found: ", variable_names_path)
|
|
507
|
+
return pd.DataFrame()
|
|
508
|
+
|
|
509
|
+
alarm = BlownFuse(bounds_df, default_power_threshold, default_power_range, default_power_draw,fault_time)
|
|
510
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
511
|
+
|
|
512
|
+
def flag_unexpected_soo_change(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
|
|
513
|
+
default_on_temp : float = 115.0, default_off_temp : float = 140.0) -> pd.DataFrame:
|
|
514
|
+
"""
|
|
515
|
+
Detects unexpected state of operation (SOO) changes by checking if the heat pump turns on or off
|
|
516
|
+
when the temperature is not near the expected aquastat setpoint thresholds. An alarm is triggered
|
|
517
|
+
if the HP turns on/off and the corresponding temperature is more than 5.0 degrees away from the
|
|
518
|
+
expected threshold.
|
|
519
|
+
|
|
520
|
+
VarNames syntax:
|
|
521
|
+
SOOCHNG_POW:### - Indicates a power variable for the heat pump system (should be total power across all primary heat pumps). ### is the power threshold (default 1.0) above which
|
|
522
|
+
the heat pump system is considered 'on'.
|
|
523
|
+
SOOCHNG_ON_[Mode ID]:### - Indicates the temperature variable at the ON aquastat fraction. ### is the temperature (default 115.0)
|
|
524
|
+
that should trigger the heat pump to turn ON. Mode ID should be the load up mode from ['loadUp','shed','criticalPeak','gridEmergency','advLoadUp','normal'] or left blank for normal mode
|
|
525
|
+
SOOCHNG_OFF_[Mode ID]:### - Indicates the temperature variable at the OFF aquastat fraction (can be same as ON aquastat). ### is the temperature (default 140.0)
|
|
526
|
+
that should trigger the heat pump to turn OFF. Mode ID should be the load up mode from ['loadUp','shed','criticalPeak','gridEmergency','advLoadUp','normal'] or left blank for normal mode
|
|
527
|
+
|
|
528
|
+
Parameters
|
|
529
|
+
----------
|
|
530
|
+
df: pd.DataFrame
|
|
531
|
+
Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
|
|
532
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
533
|
+
daily_df: pd.DataFrame
|
|
534
|
+
Post-transformed dataframe for daily data.
|
|
535
|
+
config : ecopipeline.ConfigManager
|
|
536
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
537
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
538
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
539
|
+
name of each variable in the dataframe that requires alarming and the SOOCHNG alarm codes (e.g., SOOCHNG_POW_normal:1.0, SOOCHNG_ON_normal:115.0, SOOCHNG_OFF_normal:140.0).
|
|
540
|
+
system: str
|
|
541
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
542
|
+
default_power_threshold : float
|
|
543
|
+
Default power threshold for POW alarm codes when no custom bound is specified (default 1.0). Heat pump is considered 'on'
|
|
544
|
+
when power exceeds this value.
|
|
545
|
+
default_on_temp : float
|
|
546
|
+
Default ON temperature threshold (default 115.0). When the HP turns on, an alarm triggers if the temperature
|
|
547
|
+
is more than 5.0 degrees away from this value.
|
|
548
|
+
default_off_temp : float
|
|
549
|
+
Default OFF temperature threshold (default 140.0). When the HP turns off, an alarm triggers if the temperature
|
|
550
|
+
is more than 5.0 degrees away from this value.
|
|
551
|
+
|
|
552
|
+
Returns
|
|
553
|
+
-------
|
|
554
|
+
pd.DataFrame:
|
|
555
|
+
Pandas dataframe with alarm events
|
|
556
|
+
"""
|
|
557
|
+
if df.empty:
|
|
558
|
+
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
559
|
+
return pd.DataFrame()
|
|
560
|
+
variable_names_path = config.get_var_names_path()
|
|
561
|
+
try:
|
|
562
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
563
|
+
except FileNotFoundError:
|
|
564
|
+
print("File Not Found: ", variable_names_path)
|
|
565
|
+
return pd.DataFrame()
|
|
566
|
+
|
|
567
|
+
alarm = SOOChange(bounds_df, default_power_threshold, default_on_temp, default_off_temp)
|
|
568
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
569
|
+
|
|
570
|
+
def flag_ls_mode_inconsistancy(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "") -> pd.DataFrame:
|
|
571
|
+
"""
|
|
572
|
+
Detects when reported loadshift mode does not match its expected value during a load shifting event.
|
|
573
|
+
An alarm is triggered if the variable value does not equal the expected value during the
|
|
574
|
+
time periods defined in the load shifting schedule for that mode.
|
|
575
|
+
|
|
576
|
+
VarNames syntax:
|
|
577
|
+
SOO_[mode]:### - Indicates a variable that should equal ### during [mode] load shifting events.
|
|
578
|
+
[mode] can be: normal, loadUp, shed, criticalPeak, gridEmergency, advLoadUp
|
|
579
|
+
### is the expected value (e.g., SOO_loadUp:1 means the variable should be 1 during loadUp events)
|
|
580
|
+
|
|
581
|
+
Parameters
|
|
582
|
+
----------
|
|
583
|
+
df: pd.DataFrame
|
|
584
|
+
Post-transformed dataframe for minute data. It should be noted that this function expects consecutive,
|
|
585
|
+
in order minutes. If minutes are out of order or have gaps, the function may return erroneous alarms.
|
|
586
|
+
daily_df: pd.DataFrame
|
|
587
|
+
Pandas dataframe with daily data. This dataframe should have a datetime index.
|
|
588
|
+
config : ecopipeline.ConfigManager
|
|
589
|
+
The ConfigManager object that holds configuration data for the pipeline.
|
|
590
|
+
system: str
|
|
591
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems.
|
|
592
|
+
|
|
593
|
+
Returns
|
|
594
|
+
-------
|
|
595
|
+
pd.DataFrame:
|
|
596
|
+
Pandas dataframe with alarm events
|
|
597
|
+
"""
|
|
598
|
+
if df.empty:
|
|
599
|
+
print("cannot flag load shift mode inconsistency alarms. Dataframe is empty")
|
|
600
|
+
return pd.DataFrame()
|
|
601
|
+
variable_names_path = config.get_var_names_path()
|
|
602
|
+
try:
|
|
603
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
604
|
+
except FileNotFoundError:
|
|
605
|
+
print("File Not Found: ", variable_names_path)
|
|
606
|
+
return pd.DataFrame()
|
|
526
607
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
608
|
+
alarm = LSInconsist(bounds_df)
|
|
609
|
+
return alarm.find_alarms(df, daily_df, config)
|
|
610
|
+
|
|
611
|
+
def flag_unexpected_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_high_temp : float = 130,
|
|
612
|
+
default_low_temp : float = 115, fault_time : int = 10) -> pd.DataFrame:
|
|
613
|
+
"""
|
|
614
|
+
Detects when a temperature value falls outside an acceptable range for
|
|
615
|
+
too long. An alarm is triggered if the temperature is above the high bound or below the low bound
|
|
616
|
+
for `fault_time` consecutive minutes.
|
|
617
|
+
|
|
618
|
+
VarNames syntax:
|
|
619
|
+
TMPRNG_[OPTIONAL ID]:###-### - Indicates a temperature variable. ###-### is the acceptable temperature range
|
|
620
|
+
(e.g., TMPRNG:110-130 means temperature should stay between 110 and 130 degrees).
|
|
621
|
+
|
|
622
|
+
Parameters
|
|
623
|
+
----------
|
|
624
|
+
df: pd.DataFrame
|
|
625
|
+
Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
|
|
626
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
627
|
+
daily_df: pd.DataFrame
|
|
628
|
+
Post-transformed dataframe for daily data. Used for determining which days to process.
|
|
629
|
+
config : ecopipeline.ConfigManager
|
|
630
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
631
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
632
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
633
|
+
name of each variable in the dataframe that requires alarming and the DHW alarm codes (e.g., DHW:110-130, DHW_1:115-125).
|
|
634
|
+
system: str
|
|
635
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
636
|
+
default_high_temp : float
|
|
637
|
+
Default high temperature bound when no custom range is specified in the alarm code (default 130). Temperature above this triggers alarm.
|
|
638
|
+
default_low_temp : float
|
|
639
|
+
Default low temperature bound when no custom range is specified in the alarm code (default 130). Temperature below this triggers alarm.
|
|
640
|
+
fault_time : int
|
|
641
|
+
Number of consecutive minutes that temperature must be outside the acceptable range before triggering an alarm (default 10).
|
|
642
|
+
|
|
643
|
+
Returns
|
|
644
|
+
-------
|
|
645
|
+
pd.DataFrame:
|
|
646
|
+
Pandas dataframe with alarm events
|
|
647
|
+
"""
|
|
648
|
+
if df.empty:
|
|
649
|
+
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
650
|
+
return pd.DataFrame()
|
|
651
|
+
variable_names_path = config.get_var_names_path()
|
|
652
|
+
try:
|
|
653
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
654
|
+
except FileNotFoundError:
|
|
655
|
+
print("File Not Found: ", variable_names_path)
|
|
656
|
+
return pd.DataFrame()
|
|
657
|
+
temp_alarm = TempRange(bounds_df, default_high_temp, default_low_temp, fault_time)
|
|
658
|
+
return temp_alarm.find_alarms(df, daily_df, config)
|
|
659
|
+
|
|
660
|
+
def flag_shortcycle(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
|
|
661
|
+
short_cycle_time : int = 15) -> pd.DataFrame:
|
|
662
|
+
"""
|
|
663
|
+
Detects short cycling by identifying when the heat pump turns on for less than `short_cycle_time`
|
|
664
|
+
consecutive minutes before turning off again. Short cycling can indicate equipment issues or
|
|
665
|
+
improper system sizing.
|
|
666
|
+
|
|
667
|
+
VarNames syntax:
|
|
668
|
+
SHRTCYC_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
|
|
669
|
+
the heat pump is considered 'on'.
|
|
670
|
+
|
|
671
|
+
Parameters
|
|
672
|
+
----------
|
|
673
|
+
df: pd.DataFrame
|
|
674
|
+
Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
|
|
675
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
676
|
+
daily_df: pd.DataFrame
|
|
677
|
+
Post-transformed dataframe for daily data.
|
|
678
|
+
config : ecopipeline.ConfigManager
|
|
679
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
680
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
681
|
+
The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
|
|
682
|
+
name of each variable in the dataframe that requires alarming and the SHRTCYC alarm codes (e.g., SHRTCYC:1.0, SHRTCYC_1:0.5).
|
|
683
|
+
system: str
|
|
684
|
+
String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
|
|
685
|
+
default_power_threshold : float
|
|
686
|
+
Default power threshold when no custom bound is specified in the alarm code (default 1.0). Heat pump is considered 'on'
|
|
687
|
+
when power exceeds this value.
|
|
688
|
+
short_cycle_time : int
|
|
689
|
+
Minimum expected run time in minutes (default 15). An alarm triggers if the heat pump runs for fewer than this many
|
|
690
|
+
consecutive minutes before turning off.
|
|
691
|
+
|
|
692
|
+
Returns
|
|
693
|
+
-------
|
|
694
|
+
pd.DataFrame:
|
|
695
|
+
Pandas dataframe with alarm events
|
|
696
|
+
"""
|
|
697
|
+
if df.empty:
|
|
698
|
+
print("cannot flag missing balancing valve alarms. Dataframe is empty")
|
|
699
|
+
return pd.DataFrame()
|
|
700
|
+
variable_names_path = config.get_var_names_path()
|
|
701
|
+
try:
|
|
702
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
703
|
+
except FileNotFoundError:
|
|
704
|
+
print("File Not Found: ", variable_names_path)
|
|
705
|
+
return pd.DataFrame()
|
|
706
|
+
|
|
707
|
+
short_alarm = ShortCycle(bounds_df, default_power_threshold, short_cycle_time)
|
|
708
|
+
return short_alarm.find_alarms(df, daily_df, config)
|
|
709
|
+
|
|
556
710
|
|
|
557
711
|
def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
|
|
558
712
|
events = {
|
|
559
713
|
'start_time_pt' : [],
|
|
560
714
|
'end_time_pt' : [],
|
|
561
|
-
'
|
|
562
|
-
'
|
|
715
|
+
'alarm_type' : [],
|
|
716
|
+
'alarm_detail' : [],
|
|
563
717
|
'variable_name' : []
|
|
564
718
|
}
|
|
565
719
|
for key, value_list in alarm_dict.items():
|
|
566
720
|
for value in value_list:
|
|
567
721
|
events['start_time_pt'].append(key)
|
|
568
|
-
|
|
569
|
-
events['
|
|
570
|
-
events['
|
|
722
|
+
# Use end_time from value[2] if provided, otherwise use key
|
|
723
|
+
events['end_time_pt'].append(value[2] if len(value) > 2 else key)
|
|
724
|
+
events['alarm_type'].append(value[3] if len(value) > 3 else 'SILENT_ALARM')
|
|
725
|
+
events['alarm_detail'].append(value[1])
|
|
571
726
|
events['variable_name'].append(value[0])
|
|
572
727
|
|
|
573
728
|
event_df = pd.DataFrame(events)
|
|
574
729
|
event_df.set_index('start_time_pt', inplace=True)
|
|
575
730
|
return event_df
|
|
576
731
|
|
|
577
|
-
def _convert_event_type_dict_to_df(alarm_dict : dict, event_type = 'DATA_LOSS_COP') -> pd.DataFrame:
|
|
578
|
-
events = {
|
|
579
|
-
'start_time_pt' : [],
|
|
580
|
-
'end_time_pt' : [],
|
|
581
|
-
'event_type' : [],
|
|
582
|
-
'event_detail' : [],
|
|
583
|
-
'variable_name' : []
|
|
584
|
-
}
|
|
585
|
-
for key, value in alarm_dict.items():
|
|
586
|
-
for i in range(len(value)):
|
|
587
|
-
events['start_time_pt'].append(key)
|
|
588
|
-
events['end_time_pt'].append(key)
|
|
589
|
-
events['event_type'].append(event_type)
|
|
590
|
-
events['event_detail'].append(value[i][1])
|
|
591
|
-
events['variable_name'].append(value[i][0])
|
|
592
|
-
|
|
593
|
-
event_df = pd.DataFrame(events)
|
|
594
|
-
event_df.set_index('start_time_pt', inplace=True)
|
|
595
|
-
return event_df
|
|
596
|
-
|
|
597
|
-
def _check_and_add_alarm(df : pd.DataFrame, mask : pd.Series, alarms_dict, day, fault_time : int, var_name : str, pretty_name : str, alarm_type : str = 'Lower'):
|
|
598
|
-
# KNOWN BUG : Avg value during fault time excludes the first (fault_time-1) minutes of each fault window
|
|
599
|
-
next_day = day + pd.Timedelta(days=1)
|
|
600
|
-
filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
|
|
601
|
-
consecutive_condition = filtered_df.rolling(window=fault_time).min() == 1
|
|
602
|
-
if consecutive_condition.any():
|
|
603
|
-
group = (consecutive_condition != consecutive_condition.shift()).cumsum()
|
|
604
|
-
streaks = consecutive_condition.groupby(group).agg(['sum', 'size', 'idxmin'])
|
|
605
|
-
true_streaks = streaks[consecutive_condition.groupby(group).first()]
|
|
606
|
-
longest_streak_length = true_streaks['size'].max()
|
|
607
|
-
avg_streak_length = true_streaks['size'].mean() + fault_time-1
|
|
608
|
-
longest_group = true_streaks['size'].idxmax()
|
|
609
|
-
streak_indices = consecutive_condition[group == longest_group].index
|
|
610
|
-
starting_index = streak_indices[0]
|
|
611
|
-
|
|
612
|
-
day_df = df.loc[(df.index >= day) & (df.index < next_day)]
|
|
613
|
-
average_value = day_df.loc[consecutive_condition, var_name].mean()
|
|
614
|
-
|
|
615
|
-
# first_true_index = consecutive_condition.idxmax()
|
|
616
|
-
# because first (fault_time-1) minutes don't count in window
|
|
617
|
-
adjusted_time = starting_index - pd.Timedelta(minutes=fault_time-1)
|
|
618
|
-
adjusted_longest_streak_length = longest_streak_length + fault_time-1
|
|
619
|
-
alarm_string = f"{alarm_type} bound alarm for {pretty_name} (longest at {adjusted_time.strftime('%H:%M')} for {adjusted_longest_streak_length} minutes). Avg fault time : {round(avg_streak_length,1)} minutes, Avg value during fault: {round(average_value,2)}"
|
|
620
|
-
if day in alarms_dict:
|
|
621
|
-
alarms_dict[day].append([var_name, alarm_string])
|
|
622
|
-
else:
|
|
623
|
-
alarms_dict[day] = [[var_name, alarm_string]]
|
|
624
732
|
|
|
625
733
|
def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", verbose : bool = False, ratio_period_days : int = 7) -> pd.DataFrame:
|
|
626
734
|
"""
|
|
@@ -634,7 +742,7 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
|
|
|
634
742
|
are out of order or have gaps, the function may return erroneous alarms.
|
|
635
743
|
config : ecopipeline.ConfigManager
|
|
636
744
|
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
637
|
-
called
|
|
745
|
+
called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
638
746
|
The file must have at least two columns which must be titled "variable_name", "alarm_codes" which should contain the
|
|
639
747
|
name of each variable in the dataframe that requires the alarming and the ratio alarm code in the form "PR_{Power Ratio Name}:{low percentage}-{high percentage}
|
|
640
748
|
system: str
|
|
@@ -647,306 +755,11 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
|
|
|
647
755
|
pd.DataFrame:
|
|
648
756
|
Pandas dataframe with alarm events, empty if no alarms triggered
|
|
649
757
|
"""
|
|
650
|
-
daily_df_copy = daily_df.copy()
|
|
651
758
|
variable_names_path = config.get_var_names_path()
|
|
652
759
|
try:
|
|
653
|
-
|
|
760
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
654
761
|
except FileNotFoundError:
|
|
655
762
|
print("File Not Found: ", variable_names_path)
|
|
656
763
|
return pd.DataFrame()
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
|
|
660
|
-
ratios_df = ratios_df.loc[ratios_df['system'] == system]
|
|
661
|
-
required_columns = ["variable_name", "alarm_codes"]
|
|
662
|
-
for required_column in required_columns:
|
|
663
|
-
if not required_column in ratios_df.columns:
|
|
664
|
-
raise Exception(f"{required_column} is not present in Variable_Names.csv")
|
|
665
|
-
if ratios_df['alarm_codes'].isna().all() or ratios_df['alarm_codes'].isnull().all():
|
|
666
|
-
print("No alarm codes in ", variable_names_path)
|
|
667
|
-
return pd.DataFrame()
|
|
668
|
-
if not 'pretty_name' in ratios_df.columns:
|
|
669
|
-
ratios_df['pretty_name'] = ratios_df['variable_name']
|
|
670
|
-
else:
|
|
671
|
-
ratios_df['pretty_name'] = ratios_df['pretty_name'].fillna(ratios_df['variable_name'])
|
|
672
|
-
ratios_df = ratios_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
|
|
673
|
-
ratios_df = ratios_df[ratios_df['alarm_codes'].str.contains('PR', na=False)]
|
|
674
|
-
ratios_df.dropna(axis=0, thresh=2, inplace=True)
|
|
675
|
-
if ratio_period_days > 1:
|
|
676
|
-
if verbose:
|
|
677
|
-
print(f"adding last {ratio_period_days} to daily_df")
|
|
678
|
-
daily_df_copy = _append_previous_days_to_df(daily_df_copy, config, ratio_period_days, day_table_name)
|
|
679
|
-
elif ratio_period_days < 1:
|
|
680
|
-
print("power ratio alarm period, ratio_period_days, must be more than 1")
|
|
681
|
-
return pd.DataFrame()
|
|
682
|
-
|
|
683
|
-
ratios_df.set_index(['variable_name'], inplace=True)
|
|
684
|
-
ratio_dict = {}
|
|
685
|
-
for ratios_var, ratios in ratios_df.iterrows():
|
|
686
|
-
if not ratios_var in daily_df_copy.columns:
|
|
687
|
-
daily_df_copy[ratios_var] = 0
|
|
688
|
-
alarm_codes = str(ratios['alarm_codes']).split(";")
|
|
689
|
-
for alarm_code in alarm_codes:
|
|
690
|
-
if alarm_code[:2] == "PR":
|
|
691
|
-
split_out_alarm = alarm_code.split(":")
|
|
692
|
-
low_high = split_out_alarm[1].split("-")
|
|
693
|
-
pr_id = split_out_alarm[0].split("_")[1]
|
|
694
|
-
if len(low_high) != 2:
|
|
695
|
-
raise Exception(f"Error processing alarm code {alarm_code}")
|
|
696
|
-
if pr_id in ratio_dict:
|
|
697
|
-
ratio_dict[pr_id][0].append(ratios_var)
|
|
698
|
-
ratio_dict[pr_id][1].append(float(low_high[0]))
|
|
699
|
-
ratio_dict[pr_id][2].append(float(low_high[1]))
|
|
700
|
-
ratio_dict[pr_id][3].append(ratios['pretty_name'])
|
|
701
|
-
else:
|
|
702
|
-
ratio_dict[pr_id] = [[ratios_var],[float(low_high[0])],[float(low_high[1])],[ratios['pretty_name']]]
|
|
703
|
-
if verbose:
|
|
704
|
-
print("ratio_dict keys:", ratio_dict.keys())
|
|
705
|
-
# Create blocks of ratio_period_days
|
|
706
|
-
blocks_df = _create_period_blocks(daily_df_copy, ratio_period_days, verbose)
|
|
707
|
-
|
|
708
|
-
if blocks_df.empty:
|
|
709
|
-
print("No complete blocks available for analysis")
|
|
710
|
-
return pd.DataFrame()
|
|
711
|
-
|
|
712
|
-
alarms = {}
|
|
713
|
-
for key, value_list in ratio_dict.items():
|
|
714
|
-
# Calculate total for each block
|
|
715
|
-
blocks_df[key] = blocks_df[value_list[0]].sum(axis=1)
|
|
716
|
-
for i in range(len(value_list[0])):
|
|
717
|
-
column_name = value_list[0][i]
|
|
718
|
-
# Calculate ratio for each block
|
|
719
|
-
blocks_df[f'{column_name}_{key}'] = (blocks_df[column_name]/blocks_df[key]) * 100
|
|
720
|
-
if verbose:
|
|
721
|
-
print(f"Block ratios for {column_name}_{key}:", blocks_df[f'{column_name}_{key}'])
|
|
722
|
-
_check_and_add_ratio_alarm_blocks(blocks_df, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i], ratio_period_days)
|
|
723
|
-
return _convert_silent_alarm_dict_to_df(alarms)
|
|
724
|
-
# alarms = {}
|
|
725
|
-
# for key, value_list in ratio_dict.items():
|
|
726
|
-
# daily_df_copy[key] = daily_df_copy[value_list[0]].sum(axis=1)
|
|
727
|
-
# for i in range(len(value_list[0])):
|
|
728
|
-
# column_name = value_list[0][i]
|
|
729
|
-
# daily_df_copy[f'{column_name}_{key}'] = (daily_df_copy[column_name]/daily_df_copy[key]) * 100
|
|
730
|
-
# if verbose:
|
|
731
|
-
# print(f"Ratios for {column_name}_{key}",daily_df_copy[f'{column_name}_{key}'])
|
|
732
|
-
# _check_and_add_ratio_alarm(daily_df_copy, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i])
|
|
733
|
-
# return _convert_silent_alarm_dict_to_df(alarms)
|
|
734
|
-
|
|
735
|
-
# def _check_and_add_ratio_alarm(daily_df: pd.DataFrame, alarm_key : str, column_name : str, pretty_name : str, alarms_dict : dict, high_bound : float, low_bound : float):
|
|
736
|
-
# alarm_daily_df = daily_df.loc[(daily_df[f"{column_name}_{alarm_key}"] < low_bound) | (daily_df[f"{column_name}_{alarm_key}"] > high_bound)]
|
|
737
|
-
# if not alarm_daily_df.empty:
|
|
738
|
-
# for day, values in alarm_daily_df.iterrows():
|
|
739
|
-
# alarm_str = f"Power ratio alarm: {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
|
|
740
|
-
# if day in alarms_dict:
|
|
741
|
-
# alarms_dict[day].append([column_name, alarm_str])
|
|
742
|
-
# else:
|
|
743
|
-
# alarms_dict[day] = [[column_name, alarm_str]]
|
|
744
|
-
def _check_and_add_ratio_alarm_blocks(blocks_df: pd.DataFrame, alarm_key: str, column_name: str, pretty_name: str, alarms_dict: dict, high_bound: float, low_bound: float, ratio_period_days: int):
|
|
745
|
-
"""
|
|
746
|
-
Check for alarms in block-based ratios and add to alarms dictionary.
|
|
747
|
-
"""
|
|
748
|
-
alarm_blocks_df = blocks_df.loc[(blocks_df[f"{column_name}_{alarm_key}"] < low_bound) | (blocks_df[f"{column_name}_{alarm_key}"] > high_bound)]
|
|
749
|
-
if not alarm_blocks_df.empty:
|
|
750
|
-
for block_end_date, values in alarm_blocks_df.iterrows():
|
|
751
|
-
alarm_str = f"Power ratio alarm ({ratio_period_days}-day block ending {block_end_date.strftime('%Y-%m-%d')}): {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
|
|
752
|
-
if block_end_date in alarms_dict:
|
|
753
|
-
alarms_dict[block_end_date].append([column_name, alarm_str])
|
|
754
|
-
else:
|
|
755
|
-
alarms_dict[block_end_date] = [[column_name, alarm_str]]
|
|
756
|
-
|
|
757
|
-
def _create_period_blocks(daily_df: pd.DataFrame, ratio_period_days: int, verbose: bool = False) -> pd.DataFrame:
|
|
758
|
-
"""
|
|
759
|
-
Create blocks of ratio_period_days by summing values within each block.
|
|
760
|
-
Each block will be represented by its end date.
|
|
761
|
-
"""
|
|
762
|
-
if len(daily_df) < ratio_period_days:
|
|
763
|
-
if verbose:
|
|
764
|
-
print(f"Not enough data for {ratio_period_days}-day blocks. Need at least {ratio_period_days} days, have {len(daily_df)}")
|
|
765
|
-
return pd.DataFrame()
|
|
766
|
-
|
|
767
|
-
blocks = []
|
|
768
|
-
block_dates = []
|
|
769
|
-
|
|
770
|
-
# Create blocks by summing consecutive groups of ratio_period_days
|
|
771
|
-
for i in range(ratio_period_days - 1, len(daily_df)):
|
|
772
|
-
start_idx = i - ratio_period_days + 1
|
|
773
|
-
end_idx = i + 1
|
|
774
|
-
|
|
775
|
-
block_data = daily_df.iloc[start_idx:end_idx].sum()
|
|
776
|
-
blocks.append(block_data)
|
|
777
|
-
# Use the end date of the block as the identifier
|
|
778
|
-
block_dates.append(daily_df.index[i])
|
|
779
|
-
|
|
780
|
-
if not blocks:
|
|
781
|
-
return pd.DataFrame()
|
|
782
|
-
|
|
783
|
-
blocks_df = pd.DataFrame(blocks, index=block_dates)
|
|
784
|
-
|
|
785
|
-
if verbose:
|
|
786
|
-
print(f"Created {len(blocks_df)} blocks of {ratio_period_days} days each")
|
|
787
|
-
print(f"Block date range: {blocks_df.index.min()} to {blocks_df.index.max()}")
|
|
788
|
-
|
|
789
|
-
return blocks_df
|
|
790
|
-
|
|
791
|
-
def _append_previous_days_to_df(daily_df: pd.DataFrame, config : ConfigManager, ratio_period_days : int, day_table_name : str, primary_key : str = "time_pt") -> pd.DataFrame:
|
|
792
|
-
db_connection, cursor = config.connect_db()
|
|
793
|
-
period_start = daily_df.index.min() - timedelta(ratio_period_days)
|
|
794
|
-
try:
|
|
795
|
-
# find existing times in database for upsert statement
|
|
796
|
-
cursor.execute(
|
|
797
|
-
f"SELECT * FROM {day_table_name} WHERE {primary_key} < '{daily_df.index.min()}' AND {primary_key} >= '{period_start}'")
|
|
798
|
-
result = cursor.fetchall()
|
|
799
|
-
column_names = [desc[0] for desc in cursor.description]
|
|
800
|
-
old_days_df = pd.DataFrame(result, columns=column_names)
|
|
801
|
-
old_days_df = old_days_df.set_index(primary_key)
|
|
802
|
-
daily_df = pd.concat([daily_df, old_days_df])
|
|
803
|
-
daily_df = daily_df.sort_index(ascending=True)
|
|
804
|
-
except mysqlerrors.Error:
|
|
805
|
-
print(f"Table {day_table_name} has no data.")
|
|
806
|
-
|
|
807
|
-
db_connection.close()
|
|
808
|
-
cursor.close()
|
|
809
|
-
return daily_df
|
|
810
|
-
|
|
811
|
-
# def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column : str, supply_temp : int = 110, consecutive_minutes : int = 15) -> pd.DataFrame:
|
|
812
|
-
# """
|
|
813
|
-
# Parameters
|
|
814
|
-
# ----------
|
|
815
|
-
# df : pd.DataFrame
|
|
816
|
-
# Single pandas dataframe of sensor data on minute intervals.
|
|
817
|
-
# daily_df : pd.DataFrame
|
|
818
|
-
# Single pandas dataframe of sensor data on daily intervals.
|
|
819
|
-
# dhw_outlet_column : str
|
|
820
|
-
# Name of the column in df and daily_df that contains temperature of DHW supplied to building occupants
|
|
821
|
-
# supply_temp : int
|
|
822
|
-
# the minimum DHW temperature acceptable to supply to building occupants
|
|
823
|
-
# consecutive_minutes : int
|
|
824
|
-
# the number of minutes in a row that DHW is not delivered to tenants to qualify as a DHW Outage
|
|
825
|
-
|
|
826
|
-
# Returns
|
|
827
|
-
# -------
|
|
828
|
-
# event_df : pd.DataFrame
|
|
829
|
-
# Dataframe with 'ALARM' events on the days in which there was a DHW Outage.
|
|
830
|
-
# """
|
|
831
|
-
# # TODO edge case for outage that spans over a day
|
|
832
|
-
# events = {
|
|
833
|
-
# 'start_time_pt' : [],
|
|
834
|
-
# 'end_time_pt' : [],
|
|
835
|
-
# 'event_type' : [],
|
|
836
|
-
# 'event_detail' : [],
|
|
837
|
-
# }
|
|
838
|
-
# mask = df[dhw_outlet_column] < supply_temp
|
|
839
|
-
# for day in daily_df.index:
|
|
840
|
-
# next_day = day + pd.Timedelta(days=1)
|
|
841
|
-
# filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
|
|
842
|
-
|
|
843
|
-
# consecutive_condition = filtered_df.rolling(window=consecutive_minutes).min() == 1
|
|
844
|
-
# if consecutive_condition.any():
|
|
845
|
-
# # first_true_index = consecutive_condition['supply_temp'].idxmax()
|
|
846
|
-
# first_true_index = consecutive_condition.idxmax()
|
|
847
|
-
# adjusted_time = first_true_index - pd.Timedelta(minutes=consecutive_minutes-1)
|
|
848
|
-
# events['start_time_pt'].append(day)
|
|
849
|
-
# events['end_time_pt'].append(next_day - pd.Timedelta(minutes=1))
|
|
850
|
-
# events['event_type'].append("ALARM")
|
|
851
|
-
# events['event_detail'].append(f"Hot Water Outage Occured (first one starting at {adjusted_time.strftime('%H:%M')})")
|
|
852
|
-
# event_df = pd.DataFrame(events)
|
|
853
|
-
# event_df.set_index('start_time_pt', inplace=True)
|
|
854
|
-
# return event_df
|
|
855
|
-
|
|
856
|
-
# def generate_event_log_df(config : ConfigManager):
|
|
857
|
-
# """
|
|
858
|
-
# Creates an event log df based on user submitted events in an event log csv
|
|
859
|
-
# Parameters
|
|
860
|
-
# ----------
|
|
861
|
-
# config : ecopipeline.ConfigManager
|
|
862
|
-
# The ConfigManager object that holds configuration data for the pipeline.
|
|
863
|
-
|
|
864
|
-
# Returns
|
|
865
|
-
# -------
|
|
866
|
-
# event_df : pd.DataFrame
|
|
867
|
-
# Dataframe formatted from events in Event_log.csv for pipeline.
|
|
868
|
-
# """
|
|
869
|
-
# event_filename = config.get_event_log_path()
|
|
870
|
-
# try:
|
|
871
|
-
# event_df = pd.read_csv(event_filename)
|
|
872
|
-
# event_df['start_time_pt'] = pd.to_datetime(event_df['start_time_pt'])
|
|
873
|
-
# event_df['end_time_pt'] = pd.to_datetime(event_df['end_time_pt'])
|
|
874
|
-
# event_df.set_index('start_time_pt', inplace=True)
|
|
875
|
-
# return event_df
|
|
876
|
-
# except Exception as e:
|
|
877
|
-
# print(f"Error processing file {event_filename}: {e}")
|
|
878
|
-
# return pd.DataFrame({
|
|
879
|
-
# 'start_time_pt' : [],
|
|
880
|
-
# 'end_time_pt' : [],
|
|
881
|
-
# 'event_type' : [],
|
|
882
|
-
# 'event_detail' : [],
|
|
883
|
-
# })
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
# def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
888
|
-
# """
|
|
889
|
-
# Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
|
|
890
|
-
# The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
|
|
891
|
-
# '_max_gap' respectively. the columns will carry the following statisctics:
|
|
892
|
-
# _missing_mins -> the number of minutes in the day that have no reported data value for the column
|
|
893
|
-
# _avg_gap -> the average gap (in minutes) between collected data values that day
|
|
894
|
-
# _max_gap -> the maximum gap (in minutes) between collected data values that day
|
|
895
|
-
|
|
896
|
-
# Parameters
|
|
897
|
-
# ----------
|
|
898
|
-
# df : pd.DataFrame
|
|
899
|
-
# minute data df after the rename_varriables() and before the ffill_missing() function has been called
|
|
900
|
-
|
|
901
|
-
# Returns
|
|
902
|
-
# -------
|
|
903
|
-
# daily_data_stats : pd.DataFrame
|
|
904
|
-
# new dataframe with the columns descriped in the function's description
|
|
905
|
-
# """
|
|
906
|
-
# min_time = df.index.min()
|
|
907
|
-
# start_day = min_time.floor('D')
|
|
908
|
-
|
|
909
|
-
# # If min_time is not exactly at the start of the day, move to the next day
|
|
910
|
-
# if min_time != start_day:
|
|
911
|
-
# start_day = start_day + pd.tseries.offsets.Day(1)
|
|
912
|
-
|
|
913
|
-
# # Build a complete minutely timestamp index over the full date range
|
|
914
|
-
# full_index = pd.date_range(start=start_day,
|
|
915
|
-
# end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
|
|
916
|
-
# freq='T')
|
|
917
|
-
|
|
918
|
-
# # Reindex to include any completely missing minutes
|
|
919
|
-
# df_full = df.reindex(full_index)
|
|
920
|
-
|
|
921
|
-
# # Resample daily to count missing values per column
|
|
922
|
-
# total_missing = df_full.isna().resample('D').sum().astype(int)
|
|
923
|
-
|
|
924
|
-
# # Function to calculate max consecutive missing values
|
|
925
|
-
# def max_consecutive_nans(x):
|
|
926
|
-
# is_na = x.isna()
|
|
927
|
-
# groups = (is_na != is_na.shift()).cumsum()
|
|
928
|
-
# return is_na.groupby(groups).sum().max() or 0
|
|
929
|
-
|
|
930
|
-
# # Function to calculate average consecutive missing values
|
|
931
|
-
# def avg_consecutive_nans(x):
|
|
932
|
-
# is_na = x.isna()
|
|
933
|
-
# groups = (is_na != is_na.shift()).cumsum()
|
|
934
|
-
# gap_lengths = is_na.groupby(groups).sum()
|
|
935
|
-
# gap_lengths = gap_lengths[gap_lengths > 0]
|
|
936
|
-
# if len(gap_lengths) == 0:
|
|
937
|
-
# return 0
|
|
938
|
-
# return gap_lengths.mean()
|
|
939
|
-
|
|
940
|
-
# # Apply daily, per column
|
|
941
|
-
# max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
|
|
942
|
-
# avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
|
|
943
|
-
|
|
944
|
-
# # Rename columns to include a suffix
|
|
945
|
-
# total_missing = total_missing.add_suffix('_missing_mins')
|
|
946
|
-
# max_consec_missing = max_consec_missing.add_suffix('_max_gap')
|
|
947
|
-
# avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
|
|
948
|
-
|
|
949
|
-
# # Concatenate along columns (axis=1)
|
|
950
|
-
# combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
|
|
951
|
-
|
|
952
|
-
# return combined_df
|
|
764
|
+
alarm = PowerRatio(bounds_df, day_table_name, ratio_period_days)
|
|
765
|
+
return alarm.find_alarms(None, daily_df, config)
|