ecopipeline 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. ecopipeline/event_tracking/Alarm.py +317 -0
  2. ecopipeline/event_tracking/__init__.py +18 -1
  3. ecopipeline/event_tracking/alarms/AbnormalCOP.py +76 -0
  4. ecopipeline/event_tracking/alarms/BackupUse.py +94 -0
  5. ecopipeline/event_tracking/alarms/BalancingValve.py +78 -0
  6. ecopipeline/event_tracking/alarms/BlownFuse.py +72 -0
  7. ecopipeline/event_tracking/alarms/Boundary.py +90 -0
  8. ecopipeline/event_tracking/alarms/HPWHInlet.py +73 -0
  9. ecopipeline/event_tracking/alarms/HPWHOutage.py +96 -0
  10. ecopipeline/event_tracking/alarms/HPWHOutlet.py +85 -0
  11. ecopipeline/event_tracking/alarms/LSInconsist.py +114 -0
  12. ecopipeline/event_tracking/alarms/PowerRatio.py +111 -0
  13. ecopipeline/event_tracking/alarms/SOOChange.py +127 -0
  14. ecopipeline/event_tracking/alarms/ShortCycle.py +59 -0
  15. ecopipeline/event_tracking/alarms/TMSetpoint.py +127 -0
  16. ecopipeline/event_tracking/alarms/TempRange.py +84 -0
  17. ecopipeline/event_tracking/alarms/__init__.py +0 -0
  18. ecopipeline/event_tracking/event_tracking.py +119 -1177
  19. ecopipeline/extract/extract.py +51 -0
  20. ecopipeline/extract/zip_to_lat_long.csv +41490 -0
  21. ecopipeline/load/__init__.py +2 -2
  22. ecopipeline/load/load.py +304 -3
  23. ecopipeline/utils/ConfigManager.py +30 -0
  24. {ecopipeline-1.0.5.dist-info → ecopipeline-1.1.1.dist-info}/METADATA +1 -1
  25. ecopipeline-1.1.1.dist-info/RECORD +42 -0
  26. {ecopipeline-1.0.5.dist-info → ecopipeline-1.1.1.dist-info}/WHEEL +1 -1
  27. ecopipeline-1.0.5.dist-info/RECORD +0 -25
  28. {ecopipeline-1.0.5.dist-info → ecopipeline-1.1.1.dist-info}/licenses/LICENSE +0 -0
  29. {ecopipeline-1.0.5.dist-info → ecopipeline-1.1.1.dist-info}/top_level.txt +0 -0
@@ -5,42 +5,93 @@ from ecopipeline import ConfigManager
5
5
  import re
6
6
  import mysql.connector.errors as mysqlerrors
7
7
  from datetime import timedelta
8
+ from .alarms.ShortCycle import ShortCycle
9
+ from .alarms.TempRange import TempRange
10
+ from .alarms.LSInconsist import LSInconsist
11
+ from .alarms.SOOChange import SOOChange
12
+ from .alarms.BlownFuse import BlownFuse
13
+ from .alarms.HPWHOutage import HPWHOutage
14
+ from .alarms.BackupUse import BackupUse
15
+ from .alarms.HPWHOutlet import HPWHOutlet
16
+ from .alarms.HPWHInlet import HPWHInlet
17
+ from .alarms.BalancingValve import BalancingValve
18
+ from .alarms.TMSetpoint import TMSetpoint
19
+ from .alarms.AbnormalCOP import AbnormalCOP
20
+ from .alarms.PowerRatio import PowerRatio
21
+ from .alarms.Boundary import Boundary
8
22
 
9
23
  def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config : ConfigManager, system: str = "",
10
24
  default_cop_high_bound : float = 4.5, default_cop_low_bound : float = 0,
11
25
  default_boundary_fault_time : int = 15, site_name : str = None, day_table_name_header : str = "day",
12
26
  power_ratio_period_days : int = 7) -> pd.DataFrame:
27
+ if df.empty:
28
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
29
+ return pd.DataFrame()
30
+ variable_names_path = config.get_var_names_path()
31
+ try:
32
+ bounds_df = pd.read_csv(variable_names_path)
33
+ except FileNotFoundError:
34
+ print("File Not Found: ", variable_names_path)
35
+ return pd.DataFrame()
36
+ if (system != ""):
37
+ if not 'system' in bounds_df.columns:
38
+ raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
39
+ bounds_df = bounds_df.loc[bounds_df['system'] == system]
40
+
13
41
  day_list = daily_data.index.to_list()
14
42
  print('Checking for alarms...')
15
43
  alarm_df = _convert_silent_alarm_dict_to_df({})
16
44
  dict_of_alarms = {}
17
- dict_of_alarms['boundary'] = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
18
- dict_of_alarms['power ratio'] = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
19
- dict_of_alarms['abnormal COP'] = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
20
- dict_of_alarms['temperature maintenance setpoint'] = flag_high_tm_setpoint(df, daily_data, config, system=system)
21
- dict_of_alarms['recirculation loop balancing valve'] = flag_recirc_balance_valve(daily_data, config, system=system)
22
- dict_of_alarms['HPWH inlet temperature'] = flag_hp_inlet_temp(df, daily_data, config, system)
23
- dict_of_alarms['HPWH outlet temperature'] = flag_hp_outlet_temp(df, daily_data, config, system)
24
- dict_of_alarms['improper backup heating use'] = flag_backup_use(df, daily_data, config, system)
25
- dict_of_alarms['blown equipment fuse'] = flag_blown_fuse(df, daily_data, config, system)
26
- dict_of_alarms['unexpected SOO change'] = flag_unexpected_soo_change(df, daily_data, config, system)
27
- dict_of_alarms['short cycle'] = flag_shortcycle(df, daily_data, config, system)
28
- dict_of_alarms['HPWH outage'] = flag_HP_outage(df, daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system)
29
- dict_of_alarms['unexpected temperature'] = flag_unexpected_temp(df, daily_data, config, system)
30
- dict_of_alarms['demand response inconsistency'] = flag_ls_mode_inconsistancy(df, daily_data, config, system)
31
-
45
+ dict_of_alarms['boundary'] = Boundary(bounds_df, default_fault_time= default_boundary_fault_time)
46
+ # flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
47
+ dict_of_alarms['power ratio'] = PowerRatio(bounds_df, day_table_name = config.get_table_name(day_table_name_header), ratio_period_days=power_ratio_period_days)
48
+ # power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
49
+ dict_of_alarms['abnormal COP'] = AbnormalCOP(bounds_df, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
50
+ # flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
51
+ dict_of_alarms['temperature maintenance setpoint'] = TMSetpoint(bounds_df)
52
+ # flag_high_tm_setpoint(df, daily_data, config, system=system)
53
+ dict_of_alarms['recirculation loop balancing valve'] = BalancingValve(bounds_df)
54
+ # flag_recirc_balance_valve(daily_data, config, system=system)
55
+ dict_of_alarms['HPWH inlet temperature'] = HPWHInlet(bounds_df)
56
+ # flag_hp_inlet_temp(df, daily_data, config, system)
57
+ dict_of_alarms['HPWH outlet temperature'] = HPWHOutlet(bounds_df)
58
+ # flag_hp_outlet_temp(df, daily_data, config, system)
59
+ dict_of_alarms['improper backup heating use'] = BackupUse(bounds_df)
60
+ # flag_backup_use(df, daily_data, config, system)
61
+ dict_of_alarms['HPWH outage'] = HPWHOutage(bounds_df, day_table_name = config.get_table_name(day_table_name_header))
62
+ # flag_HP_outage(df, daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system)
63
+ dict_of_alarms['blown equipment fuse'] = BlownFuse(bounds_df)
64
+ # flag_blown_fuse(df, daily_data, config, system)
65
+ dict_of_alarms['unexpected SOO change'] = SOOChange(bounds_df)
66
+ # flag_unexpected_soo_change(df, daily_data, config, system)
67
+ dict_of_alarms['short cycle'] = ShortCycle(bounds_df)
68
+ # flag_shortcycle(df, daily_data, config, system)
69
+ dict_of_alarms['unexpected temperature'] = TempRange(bounds_df)
70
+ # flag_unexpected_temp(df, daily_data, config, system)
71
+ dict_of_alarms['demand response inconsistency'] = LSInconsist(bounds_df)
72
+ # flag_ls_mode_inconsistancy(df, daily_data, config, system)
73
+ # return alarm.find_alarms(df, daily_df, config)
32
74
 
33
75
  ongoing_COP_exception = ['abnormal COP']
34
-
35
76
  for key, value in dict_of_alarms.items():
36
- if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
37
- print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
38
- elif len(value) > 0:
77
+ # if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
78
+ # print("Ongoing DATA_LOSS_COP detected. ABNORMAL_COP events will be uploaded")
79
+ specific_alarm_df = value.find_alarms(df, daily_data, config)
80
+ if len(specific_alarm_df) > 0:
39
81
  print(f"Detected {key} alarm(s). Adding to event df...")
40
- alarm_df = pd.concat([alarm_df, value])
82
+ alarm_df = pd.concat([alarm_df, specific_alarm_df])
41
83
  else:
42
84
  print(f"No {key} alarm(s) detected.")
43
85
 
86
+ # for key, value in dict_of_alarms.items():
87
+ # if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
88
+ # print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
89
+ # elif len(value) > 0:
90
+ # print(f"Detected {key} alarm(s). Adding to event df...")
91
+ # alarm_df = pd.concat([alarm_df, value])
92
+ # else:
93
+ # print(f"No {key} alarm(s) detected.")
94
+
44
95
  return alarm_df
45
96
 
46
97
  def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system: str = "", default_high_bound : float = 4.5, default_low_bound : float = 0) -> pd.DataFrame:
@@ -50,65 +101,9 @@ def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system:
50
101
  except FileNotFoundError:
51
102
  print("File Not Found: ", variable_names_path)
52
103
  return pd.DataFrame()
104
+ alarm = AbnormalCOP(bounds_df, default_high_bound, default_low_bound)
105
+ return alarm.find_alarms(None, daily_data, config)
53
106
 
54
- if (system != ""):
55
- if not 'system' in bounds_df.columns:
56
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
57
- bounds_df = bounds_df.loc[bounds_df['system'] == system]
58
- if not "variable_name" in bounds_df.columns:
59
- raise Exception(f"variable_name is not present in Variable_Names.csv")
60
- if not 'pretty_name' in bounds_df.columns:
61
- bounds_df['pretty_name'] = bounds_df['variable_name']
62
- else:
63
- bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
64
- if not 'high_alarm' in bounds_df.columns:
65
- bounds_df['high_alarm'] = default_high_bound
66
- else:
67
- bounds_df['high_alarm'] = bounds_df['high_alarm'].fillna(default_high_bound)
68
- if not 'low_alarm' in bounds_df.columns:
69
- bounds_df['low_alarm'] = default_low_bound
70
- else:
71
- bounds_df['low_alarm'] = bounds_df['low_alarm'].fillna(default_low_bound)
72
-
73
- bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "pretty_name"]]
74
- bounds_df.dropna(axis=0, thresh=2, inplace=True)
75
- bounds_df.set_index(['variable_name'], inplace=True)
76
-
77
- cop_pattern = re.compile(r'^(COP\w*|SystemCOP\w*)$')
78
- cop_columns = [col for col in daily_data.columns if re.match(cop_pattern, col)]
79
-
80
- alarms_dict = {}
81
- if not daily_data.empty and len(cop_columns) > 0:
82
- for bound_var, bounds in bounds_df.iterrows():
83
- if bound_var in cop_columns:
84
- for day, day_values in daily_data.iterrows():
85
- if not day_values[bound_var] is None and (day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']):
86
- alarm_str = f"Unexpected COP Value detected: {bounds['pretty_name']} = {round(day_values[bound_var],2)}"
87
- if day in alarms_dict:
88
- alarms_dict[day].append([bound_var, alarm_str])
89
- else:
90
- alarms_dict[day] = [[bound_var, alarm_str]]
91
- return _convert_event_type_dict_to_df(alarms_dict, event_type="SILENT_ALARM")
92
-
93
- def _check_if_during_ongoing_cop_alarm(daily_df : pd.DataFrame, config : ConfigManager, site_name : str = None) -> bool:
94
- if site_name is None:
95
- site_name = config.get_site_name()
96
- connection, cursor = config.connect_db()
97
- on_going_cop = False
98
- try:
99
- # find existing times in database for upsert statement
100
- cursor.execute(
101
- f"SELECT id FROM site_events WHERE start_time_pt <= '{daily_df.index.min()}' AND (end_time_pt IS NULL OR end_time_pt >= '{daily_df.index.max()}') AND site_name = '{site_name}' AND event_type = 'DATA_LOSS_COP'")
102
- # Fetch the results into a DataFrame
103
- existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id'])
104
- if not existing_rows.empty:
105
- on_going_cop = True
106
-
107
- except mysqlerrors.Error as e:
108
- print(f"Retrieving data from site_events caused exception: {e}")
109
- connection.close()
110
- cursor.close()
111
- return on_going_cop
112
107
 
113
108
  def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 15, system: str = "", full_days : list = None) -> pd.DataFrame:
114
109
  """
@@ -122,13 +117,13 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
122
117
  are out of order or have gaps, the function may return erroneous alarms.
123
118
  config : ecopipeline.ConfigManager
124
119
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
125
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
120
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
126
121
  The file must have at least three columns which must be titled "variable_name", "low_alarm", and "high_alarm" which should contain the
127
122
  name of each variable in the dataframe that requires the alarming, the lower bound for acceptable data, and the upper bound for
128
123
  acceptable data respectively
129
124
  default_fault_time : int
130
125
  Number of consecutive minutes that a sensor must be out of bounds for to trigger an alarm. Can be customized for each variable with
131
- the fault_time column in Varriable_Names.csv
126
+ the fault_time column in Variable_Names.csv
132
127
  system: str
133
128
  string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
134
129
  full_days : list
@@ -148,49 +143,8 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
148
143
  except FileNotFoundError:
149
144
  print("File Not Found: ", variable_names_path)
150
145
  return pd.DataFrame()
151
-
152
- if (system != ""):
153
- if not 'system' in bounds_df.columns:
154
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
155
- bounds_df = bounds_df.loc[bounds_df['system'] == system]
156
-
157
- required_columns = ["variable_name", "high_alarm", "low_alarm"]
158
- for required_column in required_columns:
159
- if not required_column in bounds_df.columns:
160
- raise Exception(f"{required_column} is not present in Variable_Names.csv")
161
- if not 'pretty_name' in bounds_df.columns:
162
- bounds_df['pretty_name'] = bounds_df['variable_name']
163
- else:
164
- bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
165
- if not 'fault_time' in bounds_df.columns:
166
- bounds_df['fault_time'] = default_fault_time
167
-
168
- idx = df.index
169
- if full_days is None:
170
- full_days = pd.to_datetime(pd.Series(idx).dt.normalize().unique())
171
-
172
- bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "fault_time", "pretty_name"]]
173
- bounds_df.dropna(axis=0, thresh=2, inplace=True)
174
- bounds_df.set_index(['variable_name'], inplace=True)
175
- # ensure that lower and upper bounds are numbers
176
- bounds_df['high_alarm'] = pd.to_numeric(bounds_df['high_alarm'], errors='coerce').astype(float)
177
- bounds_df['low_alarm'] = pd.to_numeric(bounds_df['low_alarm'], errors='coerce').astype(float)
178
- bounds_df['fault_time'] = pd.to_numeric(bounds_df['fault_time'], errors='coerce').astype('Int64')
179
- bounds_df = bounds_df[bounds_df.index.notnull()]
180
- alarms = {}
181
- for bound_var, bounds in bounds_df.iterrows():
182
- if bound_var in df.columns:
183
- lower_mask = df[bound_var] < bounds["low_alarm"]
184
- upper_mask = df[bound_var] > bounds["high_alarm"]
185
- if pd.isna(bounds['fault_time']):
186
- bounds['fault_time'] = default_fault_time
187
- for day in full_days:
188
- if bounds['fault_time'] < 1 :
189
- print(f"Could not process alarm for {bound_var}. Fault time must be greater than or equal to 1 minute.")
190
- _check_and_add_alarm(df, lower_mask, alarms, day, bounds["fault_time"], bound_var, bounds['pretty_name'], 'Lower')
191
- _check_and_add_alarm(df, upper_mask, alarms, day, bounds["fault_time"], bound_var, bounds['pretty_name'], 'Upper')
192
-
193
- return _convert_silent_alarm_dict_to_df(alarms)
146
+ alarm = Boundary(bounds_df, default_fault_time)
147
+ return alarm.find_alarms(df, None, config)
194
148
 
195
149
  def flag_high_tm_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
196
150
  system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
@@ -214,7 +168,7 @@ def flag_high_tm_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : Con
214
168
  post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
215
169
  config : ecopipeline.ConfigManager
216
170
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
217
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
171
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
218
172
  The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
219
173
  name of each variable in the dataframe that requires alarming and the TMSTPT alarm codes (e.g., TMSTPT_T_1:140, TMSTPT_SP_1:2.0)
220
174
  default_fault_time : int
@@ -243,89 +197,8 @@ def flag_high_tm_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : Con
243
197
  except FileNotFoundError:
244
198
  print("File Not Found: ", variable_names_path)
245
199
  return pd.DataFrame()
246
-
247
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'TMSTPT',
248
- {'T' : default_setpoint,
249
- 'SP': default_power_indication,
250
- 'TP': default_power_ratio,
251
- 'ST': default_setpoint},
252
- system)
253
- if bounds_df.empty:
254
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
255
-
256
- # Process each unique alarm_code_id
257
- alarms = {}
258
- for day in daily_df.index:
259
- next_day = day + pd.Timedelta(days=1)
260
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
261
- alarmed_for_day = False
262
- for alarm_id in bounds_df['alarm_code_id'].unique():
263
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
264
-
265
- # Get T and SP alarm codes for this ID
266
- t_codes = id_group[id_group['alarm_code_type'] == 'T']
267
- sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
268
- tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
269
- st_codes = id_group[id_group['alarm_code_type'] == 'ST']
270
-
271
- # Check for multiple T or SP codes with same ID
272
- if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
273
- raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
274
-
275
- # Check if we have both T and SP
276
- if len(t_codes) == 1 and len(sp_codes) == 1:
277
- t_var_name = t_codes.iloc[0]['variable_name']
278
- t_pretty_name = t_codes.iloc[0]['pretty_name']
279
- sp_var_name = sp_codes.iloc[0]['variable_name']
280
- sp_pretty_name = sp_codes.iloc[0]['pretty_name']
281
- sp_power_indication = sp_codes.iloc[0]['bound']
282
- t_setpoint = t_codes.iloc[0]['bound']
283
- # Check if both variables exist in df
284
- if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
285
- # Check for consecutive minutes where SP > default_power_indication
286
- # AND T >= default_setpoint
287
- power_mask = filtered_df[sp_var_name] >= sp_power_indication
288
- temp_mask = filtered_df[t_var_name] >= t_setpoint
289
- combined_mask = power_mask & temp_mask
290
-
291
- # Check for 3 consecutive minutes
292
- consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
293
- if consecutive_condition.any():
294
- # Get the first index where condition was met
295
- first_true_index = consecutive_condition.idxmax()
296
- # Adjust for the rolling window (first fault_time-1 minutes don't count)
297
- adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
298
- _add_an_alarm(alarms, adjusted_time, sp_var_name, f"High TM Setpoint: {sp_pretty_name} showed draw at {adjusted_time} although {t_pretty_name} was above {t_setpoint} F.")
299
- alarmed_for_day = True
300
- if not alarmed_for_day and len(st_codes) == 1:
301
- st_var_name = st_codes.iloc[0]['variable_name']
302
- st_setpoint = st_codes.iloc[0]['bound']
303
- st_pretty_name = st_codes.iloc[0]['pretty_name']
304
- # Check if st_var_name exists in filtered_df
305
- if st_var_name in filtered_df.columns:
306
- # Check if setpoint was altered for over 10 minutes
307
- altered_mask = filtered_df[st_var_name] != st_setpoint
308
- consecutive_condition = altered_mask.rolling(window=10).min() == 1
309
- if consecutive_condition.any():
310
- # Get the first index where condition was met
311
- first_true_index = consecutive_condition.idxmax()
312
- # Adjust for the rolling window
313
- adjusted_time = first_true_index - pd.Timedelta(minutes=9)
314
- _add_an_alarm(alarms, day, st_var_name, f"{st_pretty_name} was altered at {adjusted_time}")
315
- alarmed_for_day = True
316
- if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
317
- tp_var_name = tp_codes.iloc[0]['variable_name']
318
- sp_var_name = sp_codes.iloc[0]['variable_name']
319
- sp_pretty_name = sp_codes.iloc[0]['pretty_name']
320
- tp_ratio = tp_codes.iloc[0]['bound']
321
- # Check if both variables exist in df
322
- if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
323
- # Check if swing tank power ratio exceeds threshold
324
- if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
325
- power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
326
- if power_ratio > tp_ratio:
327
- _add_an_alarm(alarms, day, sp_var_name, f"High temperature maintenace power ratio: {sp_pretty_name} accounted for more than {tp_ratio * 100}% of daily power.")
328
- return _convert_silent_alarm_dict_to_df(alarms)
200
+ alarm = TMSetpoint(bounds_df, default_fault_time, default_setpoint, default_power_indication, default_power_ratio)
201
+ return alarm.find_alarms(df, daily_df, config)
329
202
 
330
203
  def flag_backup_use(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager,
331
204
  system: str = "", default_setpoint : float = 130.0, default_power_ratio : float = 0.1) -> pd.DataFrame:
@@ -334,7 +207,7 @@ def flag_backup_use(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigMan
334
207
  and create an dataframe with applicable alarm events
335
208
 
336
209
  VarNames syntax:
337
- BU_P_ID - Back Up Tank Power Varriable. Must be in same power units as total system power
210
+ BU_P_ID - Back Up Tank Power Variable. Must be in same power units as total system power
338
211
  BU_TP_ID:### - Total System Power for ratio alarming for alarming if back up power is more than ### (40% default) of usage
339
212
  BU_ST_ID:### - Back Up Setpoint that should not change at all from ### (default 130)
340
213
 
@@ -347,7 +220,7 @@ def flag_backup_use(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigMan
347
220
  post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
348
221
  config : ecopipeline.ConfigManager
349
222
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
350
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
223
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
351
224
  The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
352
225
  name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
353
226
  system: str
@@ -373,69 +246,8 @@ def flag_backup_use(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigMan
373
246
  except FileNotFoundError:
374
247
  print("File Not Found: ", variable_names_path)
375
248
  return pd.DataFrame()
376
-
377
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BU',
378
- {'POW': None,
379
- 'TP': default_power_ratio,
380
- 'ST': default_setpoint},
381
- system)
382
- if bounds_df.empty:
383
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
384
-
385
- # Process each unique alarm_code_id
386
- alarms = {}
387
- for day in daily_df.index:
388
- next_day = day + pd.Timedelta(days=1)
389
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
390
- alarmed_for_day = False
391
- for alarm_id in bounds_df['alarm_code_id'].unique():
392
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
393
-
394
- # Get T and SP alarm codes for this ID
395
- pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
396
- tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
397
- st_codes = id_group[id_group['alarm_code_type'] == 'ST']
398
-
399
- # Check for multiple T or SP codes with same ID
400
- if len(tp_codes) > 1:
401
- raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
402
-
403
- if not alarmed_for_day and len(st_codes) >= 1:
404
- # Check each ST code against its individual bound
405
- for idx, st_row in st_codes.iterrows():
406
- st_var_name = st_row['variable_name']
407
- st_setpoint = st_row['bound']
408
- # Check if st_var_name exists in filtered_df
409
- if st_var_name in filtered_df.columns:
410
- # Check if setpoint was altered for over 10 minutes
411
- altered_mask = filtered_df[st_var_name] != st_setpoint
412
- consecutive_condition = altered_mask.rolling(window=10).min() == 1
413
- if consecutive_condition.any():
414
- # Get the first index where condition was met
415
- first_true_index = consecutive_condition.idxmax()
416
- # Adjust for the rolling window
417
- adjusted_time = first_true_index - pd.Timedelta(minutes=9)
418
- _add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
419
- alarmed_for_day = True
420
- break # Exit loop once we've found an alarm for this day
421
- if not alarmed_for_day and len(tp_codes) == 1 and len(pow_codes) >= 1:
422
- tp_var_name = tp_codes.iloc[0]['variable_name']
423
- tp_bound = tp_codes.iloc[0]['bound']
424
- if tp_var_name in daily_df.columns:
425
- # Get list of ER variable names
426
- bu_pow_names = pow_codes['variable_name'].tolist()
427
-
428
- # Check if all ER variables exist in daily_df
429
- if all(var in daily_df.columns for var in bu_pow_names):
430
- # Sum all ER variables for this day
431
- bu_pow_sum = daily_df.loc[day, bu_pow_names].sum()
432
- tp_value = daily_df.loc[day, tp_var_name]
433
-
434
- # Check if sum of ER >= OUT value
435
- if bu_pow_sum >= tp_value*tp_bound:
436
- _add_an_alarm(alarms, day, tp_var_name, f"Improper Back Up Use: Sum of back up equipment ({bu_pow_sum:.2f}) exceeds {(tp_bound * 100):.2f}% of total power.")
437
-
438
- return _convert_silent_alarm_dict_to_df(alarms)
249
+ alarm = BackupUse(bounds_df, default_setpoint, default_power_ratio)
250
+ return alarm.find_alarms(df, daily_df, config)
439
251
 
440
252
  def flag_HP_outage(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", default_power_ratio : float = 0.3,
441
253
  ratio_period_days : int = 7) -> pd.DataFrame:
@@ -485,55 +297,9 @@ def flag_HP_outage(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigMana
485
297
  except FileNotFoundError:
486
298
  print("File Not Found: ", variable_names_path)
487
299
  return pd.DataFrame()
488
-
489
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPOUT',
490
- {'POW': default_power_ratio,
491
- 'TP': None,
492
- 'ALRM': None},
493
- system)
494
- if bounds_df.empty:
495
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
496
-
497
- # Process each unique alarm_code_id
498
- alarms = {}
499
- for alarm_id in bounds_df['alarm_code_id'].unique():
500
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
501
-
502
- # Get T and SP alarm codes for this ID
503
- pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
504
- tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
505
- alrm_codes = id_group[id_group['alarm_code_type'] == 'ALRM']
506
- if len(pow_codes) > 0 and len(tp_codes) != 1:
507
- raise Exception(f"Improper alarm codes for heat pump outage with id {alarm_id}. Requires 1 total power (TP) variable.")
508
- elif len(pow_codes) > 0 and len(tp_codes) == 1:
509
- if ratio_period_days <= 1:
510
- print("HP Outage alarm period, ratio_period_days, must be more than 1")
511
- else:
512
- tp_var_name = tp_codes.iloc[0]['variable_name']
513
- daily_df_copy = daily_df.copy()
514
- daily_df_copy = _append_previous_days_to_df(daily_df_copy, config, ratio_period_days, day_table_name)
515
- for i in range(ratio_period_days - 1, len(daily_df_copy)):
516
- start_idx = i - ratio_period_days + 1
517
- end_idx = i + 1
518
- day = daily_df_copy.index[i]
519
- block_data = daily_df_copy.iloc[start_idx:end_idx].sum()
520
- for j in range(len(pow_codes)):
521
- pow_var_name = pow_codes.iloc[j]['variable_name']
522
- pow_var_bound = pow_codes.iloc[j]['bound']
523
- if block_data[pow_var_name] < block_data[tp_var_name] * pow_var_bound:
524
- _add_an_alarm(alarms, day, pow_var_name, f"Possible Heat Pump failure or outage.")
525
- elif len(alrm_codes) > 0:
526
- for i in range(len(alrm_codes)):
527
- alrm_var_name = alrm_codes.iloc[i]['variable_name']
528
- if alrm_var_name in df.columns:
529
- for day in daily_df.index:
530
- next_day = day + pd.Timedelta(days=1)
531
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
532
- if not filtered_df.empty and (filtered_df[alrm_var_name] != 0).any():
533
- _add_an_alarm(alarms, day, alrm_var_name, f"Heat pump alarm triggered.")
534
- break
535
-
536
- return _convert_silent_alarm_dict_to_df(alarms)
300
+
301
+ alarm = HPWHOutage(bounds_df, day_table_name, default_power_ratio, ratio_period_days)
302
+ return alarm.find_alarms(df, daily_df, config)
537
303
 
538
304
  def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
539
305
  """
@@ -582,49 +348,8 @@ def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, sy
582
348
  except FileNotFoundError:
583
349
  print("File Not Found: ", variable_names_path)
584
350
  return pd.DataFrame()
585
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BV',
586
- {'TP' : default_power_ratio},
587
- system)
588
- if bounds_df.empty:
589
- return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
590
- # Process each unique alarm_code_id
591
- alarms = {}
592
- for alarm_id in bounds_df['alarm_code_id'].unique():
593
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
594
- out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
595
- tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
596
- er_codes = id_group[id_group['alarm_code_type'] == 'ER']
597
- if len(er_codes) < 1 or (len(out_codes) < 1 and len(tp_codes) != 1):
598
- raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
599
- er_var_names = er_codes['variable_name'].tolist()
600
- if len(tp_codes) == 1 and tp_codes.iloc[0]['variable_name']in daily_df.columns:
601
- tp_var_name = tp_codes.iloc[0]['variable_name']
602
- tp_bound = tp_codes.iloc[0]['bound']
603
- for day in daily_df.index:
604
-
605
- # Check if all ER variables exist in daily_df
606
- if all(var in daily_df.columns for var in er_var_names):
607
- # Sum all ER variables for this day
608
- er_sum = daily_df.loc[day, er_var_names].sum()
609
- tp_value = daily_df.loc[day, tp_var_name]
610
-
611
- # Check if sum of ER >= OUT value
612
- if er_sum >= tp_value*tp_bound:
613
- _add_an_alarm(alarms, day, tp_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(tp_bound * 100):.2f}% of total power.")
614
- elif len(out_codes) >= 1:
615
- out_var_names = out_codes['variable_name'].tolist()
616
- for day in daily_df.index:
617
-
618
- # Check if all ER variables exist in daily_df
619
- if all(var in daily_df.columns for var in er_var_names) and all(var in daily_df.columns for var in out_var_names):
620
- # Sum all ER variables for this day
621
- er_sum = daily_df.loc[day, er_var_names].sum()
622
- out_sum = daily_df.loc[day, out_var_names].sum()
623
-
624
- # Check if sum of ER >= OUT value
625
- if er_sum > out_sum:
626
- _add_an_alarm(alarms, day, out_codes.iloc[0]['variable_name'], f"Recirculation imbalance: Sum of recirculation equipment power ({er_sum:.2f} kW) exceeds TM heating output ({out_sum:.2f} kW).")
627
- return _convert_silent_alarm_dict_to_df(alarms)
351
+ alarm = BalancingValve(bounds_df, default_power_ratio)
352
+ return alarm.find_alarms(None, daily_df, config)
628
353
 
629
354
  def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
630
355
  default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
@@ -647,7 +372,7 @@ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Config
647
372
  post-transformed dataframe for daily data.
648
373
  config : ecopipeline.ConfigManager
649
374
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
650
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
375
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
651
376
  The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
652
377
  name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
653
378
  system: str
@@ -675,44 +400,8 @@ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Config
675
400
  except FileNotFoundError:
676
401
  print("File Not Found: ", variable_names_path)
677
402
  return pd.DataFrame()
678
-
679
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPI',
680
- {'POW' : default_power_threshold,
681
- 'T' : default_temp_threshold},
682
- system)
683
- if bounds_df.empty:
684
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
685
-
686
- # Process each unique alarm_code_id
687
- alarms = {}
688
- for alarm_id in bounds_df['alarm_code_id'].unique():
689
- for day in daily_df.index:
690
- next_day = day + pd.Timedelta(days=1)
691
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
692
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
693
- pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
694
- pow_var_name = pow_codes.iloc[0]['variable_name']
695
- pow_thresh = pow_codes.iloc[0]['bound']
696
- t_codes = id_group[id_group['alarm_code_type'] == 'T']
697
- t_var_name = t_codes.iloc[0]['variable_name']
698
- t_pretty_name = t_codes.iloc[0]['pretty_name']
699
- t_thresh = t_codes.iloc[0]['bound']
700
- if len(t_codes) != 1 or len(pow_codes) != 1:
701
- raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
702
- if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
703
- # Check for consecutive minutes where both power and temp exceed thresholds
704
- power_mask = filtered_df[pow_var_name] > pow_thresh
705
- temp_mask = filtered_df[t_var_name] > t_thresh
706
- combined_mask = power_mask & temp_mask
707
-
708
- # Check for fault_time consecutive minutes
709
- consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
710
- if consecutive_condition.any():
711
- first_true_index = consecutive_condition.idxmax()
712
- adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
713
- _add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
714
-
715
- return _convert_silent_alarm_dict_to_df(alarms)
403
+ alarm = HPWHInlet(bounds_df, default_power_threshold, default_temp_threshold, fault_time)
404
+ return alarm.find_alarms(df, daily_df, config)
716
405
 
717
406
  def flag_hp_outlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
718
407
  default_temp_threshold : float = 140.0, fault_time : int = 5) -> pd.DataFrame:
@@ -765,52 +454,9 @@ def flag_hp_outlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Confi
765
454
  except FileNotFoundError:
766
455
  print("File Not Found: ", variable_names_path)
767
456
  return pd.DataFrame()
768
-
769
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPO',
770
- {'POW' : default_power_threshold,
771
- 'T' : default_temp_threshold},
772
- system)
773
- if bounds_df.empty:
774
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
775
-
776
- # Process each unique alarm_code_id
777
- alarms = {}
778
- for alarm_id in bounds_df['alarm_code_id'].unique():
779
- for day in daily_df.index:
780
- next_day = day + pd.Timedelta(days=1)
781
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
782
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
783
- pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
784
- pow_var_name = pow_codes.iloc[0]['variable_name']
785
- pow_thresh = pow_codes.iloc[0]['bound']
786
- t_codes = id_group[id_group['alarm_code_type'] == 'T']
787
- t_var_name = t_codes.iloc[0]['variable_name']
788
- t_pretty_name = t_codes.iloc[0]['pretty_name']
789
- t_thresh = t_codes.iloc[0]['bound']
790
- if len(t_codes) != 1 or len(pow_codes) != 1:
791
- raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
792
- if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
793
- # Check for consecutive minutes where both power and temp exceed thresholds
794
- power_mask = filtered_df[pow_var_name] > pow_thresh
795
- temp_mask = filtered_df[t_var_name] < t_thresh
796
-
797
- # Exclude first 10 minutes after each HP turn-on (warmup period)
798
- warmup_minutes = 10
799
- mask_changes = power_mask != power_mask.shift(1)
800
- run_groups = mask_changes.cumsum()
801
- cumcount_in_run = power_mask.groupby(run_groups).cumcount() + 1
802
- past_warmup_mask = power_mask & (cumcount_in_run > warmup_minutes)
803
-
804
- combined_mask = past_warmup_mask & temp_mask
805
-
806
- # Check for fault_time consecutive minutes
807
- consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
808
- if consecutive_condition.any():
809
- first_true_index = consecutive_condition.idxmax()
810
- adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
811
- _add_an_alarm(alarms, day, t_var_name, f"Low heat pump outlet temperature: {t_pretty_name} was below {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
812
-
813
- return _convert_silent_alarm_dict_to_df(alarms)
457
+
458
+ alarm = HPWHOutlet(bounds_df, default_power_threshold, default_temp_threshold, fault_time)
459
+ return alarm.find_alarms(df, daily_df, config)
814
460
 
815
461
  def flag_blown_fuse(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
816
462
  default_power_range : float = 2.0, default_power_draw : float = 30, fault_time : int = 3) -> pd.DataFrame:
@@ -860,36 +506,8 @@ def flag_blown_fuse(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigMan
860
506
  print("File Not Found: ", variable_names_path)
861
507
  return pd.DataFrame()
862
508
 
863
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BF',
864
- {'default' : default_power_draw},
865
- system, two_part_tag=False)
866
- if bounds_df.empty:
867
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
868
-
869
- # Process each unique alarm_code_id
870
- alarms = {}
871
- for var_name in bounds_df['variable_name'].unique():
872
- for day in daily_df.index:
873
- next_day = day + pd.Timedelta(days=1)
874
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
875
- rows = bounds_df[bounds_df['variable_name'] == var_name]
876
- expected_power_draw = rows.iloc[0]['bound']
877
- if len(rows) != 1:
878
- raise Exception(f"Multiple blown fuse alarm codes for {var_name}")
879
- if var_name in filtered_df.columns:
880
- # Check for consecutive minutes where both power and temp exceed thresholds
881
- power_on_mask = filtered_df[var_name] > default_power_threshold
882
- unexpected_power_mask = filtered_df[var_name] < expected_power_draw - default_power_range
883
- combined_mask = power_on_mask & unexpected_power_mask
884
-
885
- # Check for fault_time consecutive minutes
886
- consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
887
- if consecutive_condition.any():
888
- first_true_index = consecutive_condition.idxmax()
889
- adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
890
- _add_an_alarm(alarms, day, var_name, f"Blown Fuse: {var_name} had a power draw less than {expected_power_draw - default_power_range:.1f} while element was ON starting at {adjusted_time}.")
891
-
892
- return _convert_silent_alarm_dict_to_df(alarms)
509
+ alarm = BlownFuse(bounds_df, default_power_threshold, default_power_range, default_power_draw,fault_time)
510
+ return alarm.find_alarms(df, daily_df, config)
893
511
 
894
512
  def flag_unexpected_soo_change(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
895
513
  default_on_temp : float = 115.0, default_off_temp : float = 140.0) -> pd.DataFrame:
@@ -936,13 +554,6 @@ def flag_unexpected_soo_change(df: pd.DataFrame, daily_df: pd.DataFrame, config
936
554
  pd.DataFrame:
937
555
  Pandas dataframe with alarm events
938
556
  """
939
- soo_dict = {
940
- 'loadUp' : 'LOAD UP',
941
- 'shed' : 'SHED',
942
- 'criticalPeak': 'CRITICAL PEAK',
943
- 'gridEmergency' : 'GRID EMERGENCY',
944
- 'advLoadUp' : 'ADVANCED LOAD UP'
945
- }
946
557
  if df.empty:
947
558
  print("cannot flag missing balancing valve alarms. Dataframe is empty")
948
559
  return pd.DataFrame()
@@ -953,99 +564,18 @@ def flag_unexpected_soo_change(df: pd.DataFrame, daily_df: pd.DataFrame, config
953
564
  print("File Not Found: ", variable_names_path)
954
565
  return pd.DataFrame()
955
566
 
956
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'SOOCHNG',
957
- {'POW' : default_power_threshold,
958
- 'ON' : default_on_temp,
959
- 'OFF' : default_off_temp},
960
- system)
961
- if bounds_df.empty:
962
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
963
-
964
- ls_df = config.get_ls_df()
965
-
966
- # Process each unique alarm_code_id
967
- alarms = {}
968
- pow_codes = bounds_df[bounds_df['alarm_code_type'] == 'POW']
969
- if len(pow_codes) != 1:
970
- raise Exception(f"Improper alarm codes for SOO changes; must have 1 POW variable to indicate power to HPWH(s).")
971
- pow_var_name = pow_codes.iloc[0]['variable_name']
972
- pow_thresh = pow_codes.iloc[0]['bound']
973
- bounds_df = bounds_df[bounds_df['alarm_code_type'] != 'POW']
974
-
975
- for alarm_id in bounds_df['alarm_code_id'].unique():
976
- ls_filtered_df = df.copy()
977
- soo_mode_name = 'NORMAL'
978
- if alarm_id in soo_dict.keys():
979
- if not ls_df.empty:
980
- # Filter ls_filtered_df for only date ranges in the right mode of ls_df
981
- mode_rows = ls_df[ls_df['event'] == alarm_id]
982
- mask = pd.Series(False, index=ls_filtered_df.index)
983
- for _, row in mode_rows.iterrows():
984
- mask |= (ls_filtered_df.index >= row['startDateTime']) & (ls_filtered_df.index < row['endDateTime'])
985
- ls_filtered_df = ls_filtered_df[mask]
986
- soo_mode_name = soo_dict[alarm_id]
987
- else:
988
- print(f"Cannot check for {alarm_id} because there are no {alarm_id} periods in time frame.")
989
- continue
990
- elif not ls_df.empty:
991
- # Filter out all date range rows from ls_filtered_df's indexes
992
- mask = pd.Series(True, index=ls_filtered_df.index)
993
- for _, row in ls_df.iterrows():
994
- mask &= ~((ls_filtered_df.index >= row['startDateTime']) & (ls_filtered_df.index < row['endDateTime']))
995
- ls_filtered_df = ls_filtered_df[mask]
996
-
997
- for day in daily_df.index:
998
- next_day = day + pd.Timedelta(days=1)
999
- filtered_df = ls_filtered_df.loc[(ls_filtered_df.index >= day) & (ls_filtered_df.index < next_day)]
1000
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
1001
- on_t_codes = id_group[id_group['alarm_code_type'] == 'ON']
1002
- off_t_codes = id_group[id_group['alarm_code_type'] == 'ON']
1003
- if len(on_t_codes) != 1 or len(off_t_codes) != 1:
1004
- raise Exception(f"Improper alarm codes for SOO changes with id {alarm_id}. Must have 1 ON and 1 OFF variable")
1005
- on_t_var_name = on_t_codes.iloc[0]['variable_name']
1006
- on_t_pretty_name = on_t_codes.iloc[0]['pretty_name']
1007
- on_t_thresh = on_t_codes.iloc[0]['bound']
1008
- off_t_var_name = off_t_codes.iloc[0]['variable_name']
1009
- off_t_pretty_name = off_t_codes.iloc[0]['pretty_name']
1010
- off_t_thresh = off_t_codes.iloc[0]['bound']
1011
- if pow_var_name in filtered_df.columns:
1012
- found_alarm = False
1013
- power_below = filtered_df[pow_var_name] <= pow_thresh
1014
- power_above = filtered_df[pow_var_name] > pow_thresh
1015
- if on_t_var_name in filtered_df.columns:
1016
- power_turn_on = power_below.shift(1) & power_above
1017
- power_on_times = filtered_df.index[power_turn_on.fillna(False)]
1018
- # Check if temperature is within 5.0 of on_t_thresh at each turn-on moment
1019
- for power_time in power_on_times:
1020
- temp_at_turn_on = filtered_df.loc[power_time, on_t_var_name]
1021
- if abs(temp_at_turn_on - on_t_thresh) > 5.0:
1022
- _add_an_alarm(alarms, day, on_t_var_name,
1023
- f"Unexpected SOO change: during {soo_mode_name}, HP turned on at {power_time} but {on_t_pretty_name} was {temp_at_turn_on:.1f} F (setpoint at {on_t_thresh} F).")
1024
- found_alarm = True
1025
- break # TODO soon don't do this
1026
- if not found_alarm and off_t_var_name in filtered_df.columns:
1027
- power_turn_off = power_above.shift(1) & power_below
1028
- power_off_times = filtered_df.index[power_turn_off.fillna(False)]
1029
- # Check if temperature is within 5.0 of off_t_thresh at each turn-on moment
1030
- for power_time in power_off_times:
1031
- temp_at_turn_off = filtered_df.loc[power_time, off_t_var_name]
1032
- if abs(temp_at_turn_off - off_t_thresh) > 5.0:
1033
- _add_an_alarm(alarms, day, off_t_var_name,
1034
- f"Unexpected SOO change: during {soo_mode_name}, HP turned off at {power_time} but {off_t_pretty_name} was {temp_at_turn_off:.1f} F (setpoint at {off_t_thresh} F)).")
1035
- found_alarm = True
1036
- break # TODO soon don't do this
1037
-
1038
- return _convert_silent_alarm_dict_to_df(alarms)
567
+ alarm = SOOChange(bounds_df, default_power_threshold, default_on_temp, default_off_temp)
568
+ return alarm.find_alarms(df, daily_df, config)
1039
569
 
1040
570
  def flag_ls_mode_inconsistancy(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "") -> pd.DataFrame:
1041
571
  """
1042
- Detects when a variable does not match its expected value during a load shifting event.
572
+ Detects when reported loadshift mode does not match its expected value during a load shifting event.
1043
573
  An alarm is triggered if the variable value does not equal the expected value during the
1044
574
  time periods defined in the load shifting schedule for that mode.
1045
575
 
1046
576
  VarNames syntax:
1047
577
  SOO_[mode]:### - Indicates a variable that should equal ### during [mode] load shifting events.
1048
- [mode] can be: loadUp, shed, criticalPeak, gridEmergency, advLoadUp
578
+ [mode] can be: normal, loadUp, shed, criticalPeak, gridEmergency, advLoadUp
1049
579
  ### is the expected value (e.g., SOO_loadUp:1 means the variable should be 1 during loadUp events)
1050
580
 
1051
581
  Parameters
@@ -1074,92 +604,14 @@ def flag_ls_mode_inconsistancy(df: pd.DataFrame, daily_df: pd.DataFrame, config
1074
604
  except FileNotFoundError:
1075
605
  print("File Not Found: ", variable_names_path)
1076
606
  return pd.DataFrame()
1077
-
1078
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'SOO', {}, system)
1079
- if bounds_df.empty:
1080
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
1081
-
1082
- ls_df = config.get_ls_df()
1083
- if ls_df.empty:
1084
- return _convert_silent_alarm_dict_to_df({}) # no load shifting events to check
1085
-
1086
- valid_modes = ['loadUp', 'shed', 'criticalPeak', 'gridEmergency', 'advLoadUp']
1087
-
1088
- alarms = {}
1089
- for _, row in bounds_df.iterrows():
1090
- mode = row['alarm_code_type']
1091
- if mode not in valid_modes and mode != 'normal':
1092
- continue
1093
-
1094
- var_name = row['variable_name']
1095
- pretty_name = row['pretty_name']
1096
- expected_value = row['bound']
1097
-
1098
- if var_name not in df.columns:
1099
- continue
1100
-
1101
- for day in daily_df.index:
1102
- next_day = day + pd.Timedelta(days=1)
1103
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
1104
-
1105
- if filtered_df.empty:
1106
- continue
1107
-
1108
- if mode == 'normal':
1109
- # For 'normal' mode, check periods NOT covered by any load shifting events
1110
- normal_df = filtered_df.copy()
1111
- if not ls_df.empty:
1112
- mask = pd.Series(True, index=normal_df.index)
1113
- for _, event_row in ls_df.iterrows():
1114
- event_start = event_row['startDateTime']
1115
- event_end = event_row['endDateTime']
1116
- mask &= ~((normal_df.index >= event_start) & (normal_df.index < event_end))
1117
- normal_df = normal_df[mask]
1118
-
1119
- if normal_df.empty:
1120
- continue
1121
-
1122
- # Check if any values don't match the expected value during normal periods
1123
- mismatched = normal_df[normal_df[var_name] != expected_value]
1124
-
1125
- if not mismatched.empty:
1126
- first_mismatch_time = mismatched.index[0]
1127
- actual_value = mismatched.iloc[0][var_name]
1128
- _add_an_alarm(alarms, day, var_name,
1129
- f"Load shift mode inconsistency: {pretty_name} was {actual_value} at {first_mismatch_time} during normal operation (expected {expected_value}).")
1130
- else:
1131
- # For load shifting modes, check periods covered by those specific events
1132
- mode_events = ls_df[ls_df['event'] == mode]
1133
- if mode_events.empty:
1134
- continue
1135
-
1136
- # Check each load shifting event for this mode on this day
1137
- for _, event_row in mode_events.iterrows():
1138
- event_start = event_row['startDateTime']
1139
- event_end = event_row['endDateTime']
1140
-
1141
- # Filter for data during this event
1142
- event_df = filtered_df.loc[(filtered_df.index >= event_start) & (filtered_df.index < event_end)]
1143
-
1144
- if event_df.empty:
1145
- continue
1146
-
1147
- # Check if any values don't match the expected value
1148
- mismatched = event_df[event_df[var_name] != expected_value]
1149
-
1150
- if not mismatched.empty:
1151
- first_mismatch_time = mismatched.index[0]
1152
- actual_value = mismatched.iloc[0][var_name]
1153
- _add_an_alarm(alarms, day, var_name,
1154
- f"Load shift mode inconsistency: {pretty_name} was {actual_value} at {first_mismatch_time} during {mode} event (expected {expected_value}).")
1155
- break # Only one alarm per variable per day
1156
-
1157
- return _convert_silent_alarm_dict_to_df(alarms)
607
+
608
+ alarm = LSInconsist(bounds_df)
609
+ return alarm.find_alarms(df, daily_df, config)
1158
610
 
1159
611
  def flag_unexpected_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_high_temp : float = 130,
1160
612
  default_low_temp : float = 115, fault_time : int = 10) -> pd.DataFrame:
1161
613
  """
1162
- Detects when domestic hot water (DHW) supply temperature falls outside an acceptable range for
614
+ Detects when a temperature value falls outside an acceptable range for
1163
615
  too long. An alarm is triggered if the temperature is above the high bound or below the low bound
1164
616
  for `fault_time` consecutive minutes.
1165
617
 
@@ -1202,38 +654,8 @@ def flag_unexpected_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Conf
1202
654
  except FileNotFoundError:
1203
655
  print("File Not Found: ", variable_names_path)
1204
656
  return pd.DataFrame()
1205
-
1206
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'TMPRNG',
1207
- {'default': [default_low_temp,default_high_temp]},
1208
- system, two_part_tag=False,
1209
- range_bounds=True)
1210
- if bounds_df.empty:
1211
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
1212
-
1213
- # Process each unique alarm_code_id
1214
- alarms = {}
1215
- for dhw_var in bounds_df['variable_name'].unique():
1216
- for day in daily_df.index:
1217
- next_day = day + pd.Timedelta(days=1)
1218
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
1219
- rows = bounds_df[bounds_df['variable_name'] == dhw_var]
1220
- low_bound = rows.iloc[0]['bound']
1221
- high_bound = rows.iloc[0]['bound2']
1222
- pretty_name = rows.iloc[0]['pretty_name']
1223
-
1224
- if dhw_var in filtered_df.columns:
1225
- # Check if temp is above high bound or below low bound
1226
- out_of_range_mask = (filtered_df[dhw_var] > high_bound) | (filtered_df[dhw_var] < low_bound)
1227
-
1228
- # Check for fault_time consecutive minutes
1229
- consecutive_condition = out_of_range_mask.rolling(window=fault_time).min() == 1
1230
- if consecutive_condition.any():
1231
- first_true_index = consecutive_condition.idxmax()
1232
- adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
1233
- _add_an_alarm(alarms, day, dhw_var,
1234
- f"Temperature out of range: {pretty_name} was outside {low_bound}-{high_bound} F for {fault_time}+ consecutive minutes starting at {adjusted_time}.")
1235
-
1236
- return _convert_silent_alarm_dict_to_df(alarms)
657
+ temp_alarm = TempRange(bounds_df, default_high_temp, default_low_temp, fault_time)
658
+ return temp_alarm.find_alarms(df, daily_df, config)
1237
659
 
1238
660
  def flag_shortcycle(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
1239
661
  short_cycle_time : int = 15) -> pd.DataFrame:
@@ -1282,216 +704,31 @@ def flag_shortcycle(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigMan
1282
704
  print("File Not Found: ", variable_names_path)
1283
705
  return pd.DataFrame()
1284
706
 
1285
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'SHRTCYC',
1286
- {'default' : default_power_threshold},
1287
- system, two_part_tag=False)
1288
- if bounds_df.empty:
1289
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
1290
-
1291
- # Process each unique alarm_code_id
1292
- alarms = {}
1293
- for var_name in bounds_df['variable_name'].unique():
1294
- for day in daily_df.index:
1295
- next_day = day + pd.Timedelta(days=1)
1296
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
1297
- rows = bounds_df[bounds_df['variable_name'] == var_name]
1298
- pwr_thresh = rows.iloc[0]['bound']
1299
- var_pretty = rows.iloc[0]['pretty_name']
1300
- if len(rows) != 1:
1301
- raise Exception(f"Multiple blown fuse alarm codes for {var_name}")
1302
- if var_name in filtered_df.columns:
1303
- power_on_mask = filtered_df[var_name] > pwr_thresh
1304
-
1305
- # Find runs of consecutive True values by detecting changes in the mask
1306
- mask_changes = power_on_mask != power_on_mask.shift(1)
1307
- run_groups = mask_changes.cumsum()
1308
-
1309
- # For each run where power is on, check if it's shorter than short_cycle_time
1310
- for group_id in run_groups[power_on_mask].unique():
1311
- run_indices = filtered_df.index[(run_groups == group_id) & power_on_mask]
1312
- run_length = len(run_indices)
1313
- if run_length > 0 and run_length < short_cycle_time:
1314
- start_time = run_indices[0]
1315
- _add_an_alarm(alarms, day, var_name,
1316
- f"Short cycle: {var_pretty} was on for only {run_length} minutes starting at {start_time}.")
1317
- break
1318
-
1319
- return _convert_silent_alarm_dict_to_df(alarms)
1320
-
1321
- def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "",
1322
- two_part_tag : bool = True, range_bounds : bool = False) -> pd.DataFrame:
1323
- # Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
1324
- if (system != ""):
1325
- if not 'system' in bounds_df.columns:
1326
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
1327
- bounds_df = bounds_df.loc[bounds_df['system'] == system]
1328
-
1329
- required_columns = ["variable_name", "alarm_codes"]
1330
- for required_column in required_columns:
1331
- if not required_column in bounds_df.columns:
1332
- raise Exception(f"{required_column} is not present in Variable_Names.csv")
1333
- if not 'pretty_name' in bounds_df.columns:
1334
- bounds_df['pretty_name'] = bounds_df['variable_name']
1335
- else:
1336
- bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
1337
-
1338
- bounds_df = bounds_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
1339
- bounds_df.dropna(axis=0, thresh=2, inplace=True)
707
+ short_alarm = ShortCycle(bounds_df, default_power_threshold, short_cycle_time)
708
+ return short_alarm.find_alarms(df, daily_df, config)
1340
709
 
1341
- # Check if all alarm_codes are null or if dataframe is empty
1342
- if bounds_df.empty or bounds_df['alarm_codes'].isna().all():
1343
- return pd.DataFrame()
1344
-
1345
- bounds_df = bounds_df[bounds_df['alarm_codes'].str.contains(alarm_tag, na=False)]
1346
-
1347
- # Split alarm_codes by semicolons and create a row for each STS code
1348
- expanded_rows = []
1349
- for idx, row in bounds_df.iterrows():
1350
- alarm_codes = str(row['alarm_codes']).split(';')
1351
- tag_codes = [code.strip() for code in alarm_codes if code.strip().startswith(alarm_tag)]
1352
-
1353
- if tag_codes: # Only process if there are STS codes
1354
- for tag_code in tag_codes:
1355
- new_row = row.copy()
1356
- if ":" in tag_code:
1357
- tag_parts = tag_code.split(':')
1358
- if len(tag_parts) > 2:
1359
- raise Exception(f"Improperly formated alarm code : {tag_code}")
1360
- if range_bounds:
1361
- bounds = tag_parts[1]
1362
- bound_range = bounds.split('-')
1363
- if len(bound_range) != 2:
1364
- raise Exception(f"Improperly formated alarm code : {tag_code}. Expected bound range in form '[number]-[number]' but recieved '{bounds}'.")
1365
- new_row['bound'] = bound_range[0]
1366
- new_row['bound2'] = bound_range[1]
1367
- else:
1368
- new_row['bound'] = tag_parts[1]
1369
- tag_code = tag_parts[0]
1370
- else:
1371
- new_row['bound'] = None
1372
- if range_bounds:
1373
- new_row['bound2'] = None
1374
- new_row['alarm_codes'] = tag_code
1375
-
1376
- expanded_rows.append(new_row)
1377
-
1378
- if expanded_rows:
1379
- bounds_df = pd.DataFrame(expanded_rows)
1380
- else:
1381
- return pd.DataFrame()# no tagged alarms to look into
1382
-
1383
- alarm_code_parts = []
1384
- for idx, row in bounds_df.iterrows():
1385
- parts = row['alarm_codes'].split('_')
1386
- if two_part_tag:
1387
- if len(parts) == 2:
1388
- alarm_code_parts.append([parts[1], "No ID"])
1389
- elif len(parts) == 3:
1390
- alarm_code_parts.append([parts[1], parts[2]])
1391
- else:
1392
- raise Exception(f"improper {alarm_tag} alarm code format for {row['variable_name']}")
1393
- else:
1394
- if len(parts) == 1:
1395
- alarm_code_parts.append(["default", "No ID"])
1396
- elif len(parts) == 2:
1397
- alarm_code_parts.append(["default", parts[1]])
1398
- else:
1399
- raise Exception(f"improper {alarm_tag} alarm code format for {row['variable_name']}")
1400
- if alarm_code_parts:
1401
- bounds_df[['alarm_code_type', 'alarm_code_id']] = pd.DataFrame(alarm_code_parts, index=bounds_df.index)
1402
-
1403
- # Replace None bounds with appropriate defaults based on alarm_code_type
1404
- for idx, row in bounds_df.iterrows():
1405
- if pd.isna(row['bound']) or row['bound'] is None:
1406
- if row['alarm_code_type'] in type_default_dict.keys():
1407
- if range_bounds:
1408
- bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']][0]
1409
- bounds_df.at[idx, 'bound2'] = type_default_dict[row['alarm_code_type']][1]
1410
- else:
1411
- bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']]
1412
- # Coerce bound column to float
1413
- bounds_df['bound'] = pd.to_numeric(bounds_df['bound'], errors='coerce').astype(float)
1414
- if range_bounds:
1415
- bounds_df['bound2'] = pd.to_numeric(bounds_df['bound2'], errors='coerce').astype(float)
1416
-
1417
- return bounds_df
1418
-
1419
- def _add_an_alarm(alarm_dict : dict, day : datetime, var_name : str, alarm_string : str):
1420
- # Round down to beginning of day
1421
- day = pd.Timestamp(day).normalize()
1422
-
1423
- if day in alarm_dict:
1424
- alarm_dict[day].append([var_name, alarm_string])
1425
- else:
1426
- alarm_dict[day] = [[var_name, alarm_string]]
1427
710
 
1428
711
  def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
1429
712
  events = {
1430
713
  'start_time_pt' : [],
1431
714
  'end_time_pt' : [],
1432
- 'event_type' : [],
1433
- 'event_detail' : [],
715
+ 'alarm_type' : [],
716
+ 'alarm_detail' : [],
1434
717
  'variable_name' : []
1435
718
  }
1436
719
  for key, value_list in alarm_dict.items():
1437
720
  for value in value_list:
1438
721
  events['start_time_pt'].append(key)
1439
- events['end_time_pt'].append(key)
1440
- events['event_type'].append('SILENT_ALARM')
1441
- events['event_detail'].append(value[1])
722
+ # Use end_time from value[2] if provided, otherwise use key
723
+ events['end_time_pt'].append(value[2] if len(value) > 2 else key)
724
+ events['alarm_type'].append(value[3] if len(value) > 3 else 'SILENT_ALARM')
725
+ events['alarm_detail'].append(value[1])
1442
726
  events['variable_name'].append(value[0])
1443
727
 
1444
728
  event_df = pd.DataFrame(events)
1445
729
  event_df.set_index('start_time_pt', inplace=True)
1446
730
  return event_df
1447
731
 
1448
- def _convert_event_type_dict_to_df(alarm_dict : dict, event_type = 'DATA_LOSS_COP') -> pd.DataFrame:
1449
- events = {
1450
- 'start_time_pt' : [],
1451
- 'end_time_pt' : [],
1452
- 'event_type' : [],
1453
- 'event_detail' : [],
1454
- 'variable_name' : []
1455
- }
1456
- for key, value in alarm_dict.items():
1457
- for i in range(len(value)):
1458
- events['start_time_pt'].append(key)
1459
- events['end_time_pt'].append(key)
1460
- events['event_type'].append(event_type)
1461
- events['event_detail'].append(value[i][1])
1462
- events['variable_name'].append(value[i][0])
1463
-
1464
- event_df = pd.DataFrame(events)
1465
- event_df.set_index('start_time_pt', inplace=True)
1466
- return event_df
1467
-
1468
- def _check_and_add_alarm(df : pd.DataFrame, mask : pd.Series, alarms_dict, day, fault_time : int, var_name : str, pretty_name : str, alarm_type : str = 'Lower'):
1469
- # KNOWN BUG : Avg value during fault time excludes the first (fault_time-1) minutes of each fault window
1470
- next_day = day + pd.Timedelta(days=1)
1471
- filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
1472
- consecutive_condition = filtered_df.rolling(window=fault_time).min() == 1
1473
- if consecutive_condition.any():
1474
- group = (consecutive_condition != consecutive_condition.shift()).cumsum()
1475
- streaks = consecutive_condition.groupby(group).agg(['sum', 'size', 'idxmin'])
1476
- true_streaks = streaks[consecutive_condition.groupby(group).first()]
1477
- longest_streak_length = true_streaks['size'].max()
1478
- avg_streak_length = true_streaks['size'].mean() + fault_time-1
1479
- longest_group = true_streaks['size'].idxmax()
1480
- streak_indices = consecutive_condition[group == longest_group].index
1481
- starting_index = streak_indices[0]
1482
-
1483
- day_df = df.loc[(df.index >= day) & (df.index < next_day)]
1484
- average_value = day_df.loc[consecutive_condition, var_name].mean()
1485
-
1486
- # first_true_index = consecutive_condition.idxmax()
1487
- # because first (fault_time-1) minutes don't count in window
1488
- adjusted_time = starting_index - pd.Timedelta(minutes=fault_time-1)
1489
- adjusted_longest_streak_length = longest_streak_length + fault_time-1
1490
- alarm_string = f"{alarm_type} bound alarm for {pretty_name} (longest at {adjusted_time.strftime('%H:%M')} for {adjusted_longest_streak_length} minutes). Avg fault time : {round(avg_streak_length,1)} minutes, Avg value during fault: {round(average_value,2)}"
1491
- if day in alarms_dict:
1492
- alarms_dict[day].append([var_name, alarm_string])
1493
- else:
1494
- alarms_dict[day] = [[var_name, alarm_string]]
1495
732
 
1496
733
  def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", verbose : bool = False, ratio_period_days : int = 7) -> pd.DataFrame:
1497
734
  """
@@ -1505,7 +742,7 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
1505
742
  are out of order or have gaps, the function may return erroneous alarms.
1506
743
  config : ecopipeline.ConfigManager
1507
744
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
1508
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
745
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
1509
746
  The file must have at least two columns which must be titled "variable_name", "alarm_codes" which should contain the
1510
747
  name of each variable in the dataframe that requires the alarming and the ratio alarm code in the form "PR_{Power Ratio Name}:{low percentage}-{high percentage}
1511
748
  system: str
@@ -1518,306 +755,11 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
1518
755
  pd.DataFrame:
1519
756
  Pandas dataframe with alarm events, empty if no alarms triggered
1520
757
  """
1521
- daily_df_copy = daily_df.copy()
1522
758
  variable_names_path = config.get_var_names_path()
1523
759
  try:
1524
- ratios_df = pd.read_csv(variable_names_path)
760
+ bounds_df = pd.read_csv(variable_names_path)
1525
761
  except FileNotFoundError:
1526
762
  print("File Not Found: ", variable_names_path)
1527
763
  return pd.DataFrame()
1528
- if (system != ""):
1529
- if not 'system' in ratios_df.columns:
1530
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
1531
- ratios_df = ratios_df.loc[ratios_df['system'] == system]
1532
- required_columns = ["variable_name", "alarm_codes"]
1533
- for required_column in required_columns:
1534
- if not required_column in ratios_df.columns:
1535
- raise Exception(f"{required_column} is not present in Variable_Names.csv")
1536
- if ratios_df['alarm_codes'].isna().all() or ratios_df['alarm_codes'].isnull().all():
1537
- print("No alarm codes in ", variable_names_path)
1538
- return pd.DataFrame()
1539
- if not 'pretty_name' in ratios_df.columns:
1540
- ratios_df['pretty_name'] = ratios_df['variable_name']
1541
- else:
1542
- ratios_df['pretty_name'] = ratios_df['pretty_name'].fillna(ratios_df['variable_name'])
1543
- ratios_df = ratios_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
1544
- ratios_df = ratios_df[ratios_df['alarm_codes'].str.contains('PR', na=False)]
1545
- ratios_df.dropna(axis=0, thresh=2, inplace=True)
1546
- if ratio_period_days > 1:
1547
- if verbose:
1548
- print(f"adding last {ratio_period_days} to daily_df")
1549
- daily_df_copy = _append_previous_days_to_df(daily_df_copy, config, ratio_period_days, day_table_name)
1550
- elif ratio_period_days < 1:
1551
- print("power ratio alarm period, ratio_period_days, must be more than 1")
1552
- return pd.DataFrame()
1553
-
1554
- ratios_df.set_index(['variable_name'], inplace=True)
1555
- ratio_dict = {}
1556
- for ratios_var, ratios in ratios_df.iterrows():
1557
- if not ratios_var in daily_df_copy.columns:
1558
- daily_df_copy[ratios_var] = 0
1559
- alarm_codes = str(ratios['alarm_codes']).split(";")
1560
- for alarm_code in alarm_codes:
1561
- if alarm_code[:2] == "PR":
1562
- split_out_alarm = alarm_code.split(":")
1563
- low_high = split_out_alarm[1].split("-")
1564
- pr_id = split_out_alarm[0].split("_")[1]
1565
- if len(low_high) != 2:
1566
- raise Exception(f"Error processing alarm code {alarm_code}")
1567
- if pr_id in ratio_dict:
1568
- ratio_dict[pr_id][0].append(ratios_var)
1569
- ratio_dict[pr_id][1].append(float(low_high[0]))
1570
- ratio_dict[pr_id][2].append(float(low_high[1]))
1571
- ratio_dict[pr_id][3].append(ratios['pretty_name'])
1572
- else:
1573
- ratio_dict[pr_id] = [[ratios_var],[float(low_high[0])],[float(low_high[1])],[ratios['pretty_name']]]
1574
- if verbose:
1575
- print("ratio_dict keys:", ratio_dict.keys())
1576
- # Create blocks of ratio_period_days
1577
- blocks_df = _create_period_blocks(daily_df_copy, ratio_period_days, verbose)
1578
-
1579
- if blocks_df.empty:
1580
- print("No complete blocks available for analysis")
1581
- return pd.DataFrame()
1582
-
1583
- alarms = {}
1584
- for key, value_list in ratio_dict.items():
1585
- # Calculate total for each block
1586
- blocks_df[key] = blocks_df[value_list[0]].sum(axis=1)
1587
- for i in range(len(value_list[0])):
1588
- column_name = value_list[0][i]
1589
- # Calculate ratio for each block
1590
- blocks_df[f'{column_name}_{key}'] = (blocks_df[column_name]/blocks_df[key]) * 100
1591
- if verbose:
1592
- print(f"Block ratios for {column_name}_{key}:", blocks_df[f'{column_name}_{key}'])
1593
- _check_and_add_ratio_alarm_blocks(blocks_df, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i], ratio_period_days)
1594
- return _convert_silent_alarm_dict_to_df(alarms)
1595
- # alarms = {}
1596
- # for key, value_list in ratio_dict.items():
1597
- # daily_df_copy[key] = daily_df_copy[value_list[0]].sum(axis=1)
1598
- # for i in range(len(value_list[0])):
1599
- # column_name = value_list[0][i]
1600
- # daily_df_copy[f'{column_name}_{key}'] = (daily_df_copy[column_name]/daily_df_copy[key]) * 100
1601
- # if verbose:
1602
- # print(f"Ratios for {column_name}_{key}",daily_df_copy[f'{column_name}_{key}'])
1603
- # _check_and_add_ratio_alarm(daily_df_copy, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i])
1604
- # return _convert_silent_alarm_dict_to_df(alarms)
1605
-
1606
- # def _check_and_add_ratio_alarm(daily_df: pd.DataFrame, alarm_key : str, column_name : str, pretty_name : str, alarms_dict : dict, high_bound : float, low_bound : float):
1607
- # alarm_daily_df = daily_df.loc[(daily_df[f"{column_name}_{alarm_key}"] < low_bound) | (daily_df[f"{column_name}_{alarm_key}"] > high_bound)]
1608
- # if not alarm_daily_df.empty:
1609
- # for day, values in alarm_daily_df.iterrows():
1610
- # alarm_str = f"Power ratio alarm: {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
1611
- # if day in alarms_dict:
1612
- # alarms_dict[day].append([column_name, alarm_str])
1613
- # else:
1614
- # alarms_dict[day] = [[column_name, alarm_str]]
1615
- def _check_and_add_ratio_alarm_blocks(blocks_df: pd.DataFrame, alarm_key: str, column_name: str, pretty_name: str, alarms_dict: dict, high_bound: float, low_bound: float, ratio_period_days: int):
1616
- """
1617
- Check for alarms in block-based ratios and add to alarms dictionary.
1618
- """
1619
- alarm_blocks_df = blocks_df.loc[(blocks_df[f"{column_name}_{alarm_key}"] < low_bound) | (blocks_df[f"{column_name}_{alarm_key}"] > high_bound)]
1620
- if not alarm_blocks_df.empty:
1621
- for block_end_date, values in alarm_blocks_df.iterrows():
1622
- alarm_str = f"Power ratio alarm ({ratio_period_days}-day block ending {block_end_date.strftime('%Y-%m-%d')}): {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
1623
- if block_end_date in alarms_dict:
1624
- alarms_dict[block_end_date].append([column_name, alarm_str])
1625
- else:
1626
- alarms_dict[block_end_date] = [[column_name, alarm_str]]
1627
-
1628
- def _create_period_blocks(daily_df: pd.DataFrame, ratio_period_days: int, verbose: bool = False) -> pd.DataFrame:
1629
- """
1630
- Create blocks of ratio_period_days by summing values within each block.
1631
- Each block will be represented by its end date.
1632
- """
1633
- if len(daily_df) < ratio_period_days:
1634
- if verbose:
1635
- print(f"Not enough data for {ratio_period_days}-day blocks. Need at least {ratio_period_days} days, have {len(daily_df)}")
1636
- return pd.DataFrame()
1637
-
1638
- blocks = []
1639
- block_dates = []
1640
-
1641
- # Create blocks by summing consecutive groups of ratio_period_days
1642
- for i in range(ratio_period_days - 1, len(daily_df)):
1643
- start_idx = i - ratio_period_days + 1
1644
- end_idx = i + 1
1645
-
1646
- block_data = daily_df.iloc[start_idx:end_idx].sum()
1647
- blocks.append(block_data)
1648
- # Use the end date of the block as the identifier
1649
- block_dates.append(daily_df.index[i])
1650
-
1651
- if not blocks:
1652
- return pd.DataFrame()
1653
-
1654
- blocks_df = pd.DataFrame(blocks, index=block_dates)
1655
-
1656
- if verbose:
1657
- print(f"Created {len(blocks_df)} blocks of {ratio_period_days} days each")
1658
- print(f"Block date range: {blocks_df.index.min()} to {blocks_df.index.max()}")
1659
-
1660
- return blocks_df
1661
-
1662
- def _append_previous_days_to_df(daily_df: pd.DataFrame, config : ConfigManager, ratio_period_days : int, day_table_name : str, primary_key : str = "time_pt") -> pd.DataFrame:
1663
- db_connection, cursor = config.connect_db()
1664
- period_start = daily_df.index.min() - timedelta(ratio_period_days)
1665
- try:
1666
- # find existing times in database for upsert statement
1667
- cursor.execute(
1668
- f"SELECT * FROM {day_table_name} WHERE {primary_key} < '{daily_df.index.min()}' AND {primary_key} >= '{period_start}'")
1669
- result = cursor.fetchall()
1670
- column_names = [desc[0] for desc in cursor.description]
1671
- old_days_df = pd.DataFrame(result, columns=column_names)
1672
- old_days_df = old_days_df.set_index(primary_key)
1673
- daily_df = pd.concat([daily_df, old_days_df])
1674
- daily_df = daily_df.sort_index(ascending=True)
1675
- except mysqlerrors.Error:
1676
- print(f"Table {day_table_name} has no data.")
1677
-
1678
- db_connection.close()
1679
- cursor.close()
1680
- return daily_df
1681
-
1682
- # def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column : str, supply_temp : int = 110, consecutive_minutes : int = 15) -> pd.DataFrame:
1683
- # """
1684
- # Parameters
1685
- # ----------
1686
- # df : pd.DataFrame
1687
- # Single pandas dataframe of sensor data on minute intervals.
1688
- # daily_df : pd.DataFrame
1689
- # Single pandas dataframe of sensor data on daily intervals.
1690
- # dhw_outlet_column : str
1691
- # Name of the column in df and daily_df that contains temperature of DHW supplied to building occupants
1692
- # supply_temp : int
1693
- # the minimum DHW temperature acceptable to supply to building occupants
1694
- # consecutive_minutes : int
1695
- # the number of minutes in a row that DHW is not delivered to tenants to qualify as a DHW Outage
1696
-
1697
- # Returns
1698
- # -------
1699
- # event_df : pd.DataFrame
1700
- # Dataframe with 'ALARM' events on the days in which there was a DHW Outage.
1701
- # """
1702
- # # TODO edge case for outage that spans over a day
1703
- # events = {
1704
- # 'start_time_pt' : [],
1705
- # 'end_time_pt' : [],
1706
- # 'event_type' : [],
1707
- # 'event_detail' : [],
1708
- # }
1709
- # mask = df[dhw_outlet_column] < supply_temp
1710
- # for day in daily_df.index:
1711
- # next_day = day + pd.Timedelta(days=1)
1712
- # filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
1713
-
1714
- # consecutive_condition = filtered_df.rolling(window=consecutive_minutes).min() == 1
1715
- # if consecutive_condition.any():
1716
- # # first_true_index = consecutive_condition['supply_temp'].idxmax()
1717
- # first_true_index = consecutive_condition.idxmax()
1718
- # adjusted_time = first_true_index - pd.Timedelta(minutes=consecutive_minutes-1)
1719
- # events['start_time_pt'].append(day)
1720
- # events['end_time_pt'].append(next_day - pd.Timedelta(minutes=1))
1721
- # events['event_type'].append("ALARM")
1722
- # events['event_detail'].append(f"Hot Water Outage Occured (first one starting at {adjusted_time.strftime('%H:%M')})")
1723
- # event_df = pd.DataFrame(events)
1724
- # event_df.set_index('start_time_pt', inplace=True)
1725
- # return event_df
1726
-
1727
- # def generate_event_log_df(config : ConfigManager):
1728
- # """
1729
- # Creates an event log df based on user submitted events in an event log csv
1730
- # Parameters
1731
- # ----------
1732
- # config : ecopipeline.ConfigManager
1733
- # The ConfigManager object that holds configuration data for the pipeline.
1734
-
1735
- # Returns
1736
- # -------
1737
- # event_df : pd.DataFrame
1738
- # Dataframe formatted from events in Event_log.csv for pipeline.
1739
- # """
1740
- # event_filename = config.get_event_log_path()
1741
- # try:
1742
- # event_df = pd.read_csv(event_filename)
1743
- # event_df['start_time_pt'] = pd.to_datetime(event_df['start_time_pt'])
1744
- # event_df['end_time_pt'] = pd.to_datetime(event_df['end_time_pt'])
1745
- # event_df.set_index('start_time_pt', inplace=True)
1746
- # return event_df
1747
- # except Exception as e:
1748
- # print(f"Error processing file {event_filename}: {e}")
1749
- # return pd.DataFrame({
1750
- # 'start_time_pt' : [],
1751
- # 'end_time_pt' : [],
1752
- # 'event_type' : [],
1753
- # 'event_detail' : [],
1754
- # })
1755
-
1756
-
1757
-
1758
- # def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1759
- # """
1760
- # Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
1761
- # The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
1762
- # '_max_gap' respectively. the columns will carry the following statisctics:
1763
- # _missing_mins -> the number of minutes in the day that have no reported data value for the column
1764
- # _avg_gap -> the average gap (in minutes) between collected data values that day
1765
- # _max_gap -> the maximum gap (in minutes) between collected data values that day
1766
-
1767
- # Parameters
1768
- # ----------
1769
- # df : pd.DataFrame
1770
- # minute data df after the rename_varriables() and before the ffill_missing() function has been called
1771
-
1772
- # Returns
1773
- # -------
1774
- # daily_data_stats : pd.DataFrame
1775
- # new dataframe with the columns descriped in the function's description
1776
- # """
1777
- # min_time = df.index.min()
1778
- # start_day = min_time.floor('D')
1779
-
1780
- # # If min_time is not exactly at the start of the day, move to the next day
1781
- # if min_time != start_day:
1782
- # start_day = start_day + pd.tseries.offsets.Day(1)
1783
-
1784
- # # Build a complete minutely timestamp index over the full date range
1785
- # full_index = pd.date_range(start=start_day,
1786
- # end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
1787
- # freq='T')
1788
-
1789
- # # Reindex to include any completely missing minutes
1790
- # df_full = df.reindex(full_index)
1791
-
1792
- # # Resample daily to count missing values per column
1793
- # total_missing = df_full.isna().resample('D').sum().astype(int)
1794
-
1795
- # # Function to calculate max consecutive missing values
1796
- # def max_consecutive_nans(x):
1797
- # is_na = x.isna()
1798
- # groups = (is_na != is_na.shift()).cumsum()
1799
- # return is_na.groupby(groups).sum().max() or 0
1800
-
1801
- # # Function to calculate average consecutive missing values
1802
- # def avg_consecutive_nans(x):
1803
- # is_na = x.isna()
1804
- # groups = (is_na != is_na.shift()).cumsum()
1805
- # gap_lengths = is_na.groupby(groups).sum()
1806
- # gap_lengths = gap_lengths[gap_lengths > 0]
1807
- # if len(gap_lengths) == 0:
1808
- # return 0
1809
- # return gap_lengths.mean()
1810
-
1811
- # # Apply daily, per column
1812
- # max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
1813
- # avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
1814
-
1815
- # # Rename columns to include a suffix
1816
- # total_missing = total_missing.add_suffix('_missing_mins')
1817
- # max_consec_missing = max_consec_missing.add_suffix('_max_gap')
1818
- # avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
1819
-
1820
- # # Concatenate along columns (axis=1)
1821
- # combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
1822
-
1823
- # return combined_df
764
+ alarm = PowerRatio(bounds_df, day_table_name, ratio_period_days)
765
+ return alarm.find_alarms(None, daily_df, config)