ecopipeline 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ecopipeline/event_tracking/Alarm.py +317 -0
  2. ecopipeline/event_tracking/__init__.py +20 -2
  3. ecopipeline/event_tracking/alarms/AbnormalCOP.py +76 -0
  4. ecopipeline/event_tracking/alarms/BackupUse.py +94 -0
  5. ecopipeline/event_tracking/alarms/BalancingValve.py +78 -0
  6. ecopipeline/event_tracking/alarms/BlownFuse.py +72 -0
  7. ecopipeline/event_tracking/alarms/Boundary.py +90 -0
  8. ecopipeline/event_tracking/alarms/HPWHInlet.py +73 -0
  9. ecopipeline/event_tracking/alarms/HPWHOutage.py +96 -0
  10. ecopipeline/event_tracking/alarms/HPWHOutlet.py +85 -0
  11. ecopipeline/event_tracking/alarms/LSInconsist.py +114 -0
  12. ecopipeline/event_tracking/alarms/PowerRatio.py +111 -0
  13. ecopipeline/event_tracking/alarms/SOOChange.py +127 -0
  14. ecopipeline/event_tracking/alarms/ShortCycle.py +59 -0
  15. ecopipeline/event_tracking/alarms/TMSetpoint.py +127 -0
  16. ecopipeline/event_tracking/alarms/TempRange.py +84 -0
  17. ecopipeline/event_tracking/alarms/__init__.py +0 -0
  18. ecopipeline/event_tracking/event_tracking.py +517 -704
  19. ecopipeline/extract/__init__.py +2 -2
  20. ecopipeline/extract/extract.py +84 -0
  21. ecopipeline/load/__init__.py +2 -2
  22. ecopipeline/load/load.py +304 -3
  23. ecopipeline/transform/transform.py +1 -1
  24. ecopipeline/utils/ConfigManager.py +15 -2
  25. {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/METADATA +1 -1
  26. ecopipeline-1.1.0.dist-info/RECORD +41 -0
  27. {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/WHEEL +1 -1
  28. ecopipeline-1.0.4.dist-info/RECORD +0 -25
  29. {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/licenses/LICENSE +0 -0
  30. {ecopipeline-1.0.4.dist-info → ecopipeline-1.1.0.dist-info}/top_level.txt +0 -0
@@ -5,33 +5,93 @@ from ecopipeline import ConfigManager
5
5
  import re
6
6
  import mysql.connector.errors as mysqlerrors
7
7
  from datetime import timedelta
8
+ from .alarms.ShortCycle import ShortCycle
9
+ from .alarms.TempRange import TempRange
10
+ from .alarms.LSInconsist import LSInconsist
11
+ from .alarms.SOOChange import SOOChange
12
+ from .alarms.BlownFuse import BlownFuse
13
+ from .alarms.HPWHOutage import HPWHOutage
14
+ from .alarms.BackupUse import BackupUse
15
+ from .alarms.HPWHOutlet import HPWHOutlet
16
+ from .alarms.HPWHInlet import HPWHInlet
17
+ from .alarms.BalancingValve import BalancingValve
18
+ from .alarms.TMSetpoint import TMSetpoint
19
+ from .alarms.AbnormalCOP import AbnormalCOP
20
+ from .alarms.PowerRatio import PowerRatio
21
+ from .alarms.Boundary import Boundary
8
22
 
9
23
  def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config : ConfigManager, system: str = "",
10
24
  default_cop_high_bound : float = 4.5, default_cop_low_bound : float = 0,
11
25
  default_boundary_fault_time : int = 15, site_name : str = None, day_table_name_header : str = "day",
12
26
  power_ratio_period_days : int = 7) -> pd.DataFrame:
27
+ if df.empty:
28
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
29
+ return pd.DataFrame()
30
+ variable_names_path = config.get_var_names_path()
31
+ try:
32
+ bounds_df = pd.read_csv(variable_names_path)
33
+ except FileNotFoundError:
34
+ print("File Not Found: ", variable_names_path)
35
+ return pd.DataFrame()
36
+ if (system != ""):
37
+ if not 'system' in bounds_df.columns:
38
+ raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
39
+ bounds_df = bounds_df.loc[bounds_df['system'] == system]
40
+
13
41
  day_list = daily_data.index.to_list()
14
42
  print('Checking for alarms...')
15
43
  alarm_df = _convert_silent_alarm_dict_to_df({})
16
44
  dict_of_alarms = {}
17
- dict_of_alarms['boundary'] = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
18
- dict_of_alarms['power ratio'] = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
19
- dict_of_alarms['abnormal COP'] = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
20
- dict_of_alarms['swing tank setpoint'] = flag_high_swing_setpoint(df, daily_data, config, system=system)
21
- dict_of_alarms['recirculation loop balancing valve'] = flag_recirc_balance_valve(daily_data, config, system=system)
22
- dict_of_alarms['HPWH inlet temperature'] = flag_hp_inlet_temp(df, daily_data, config, system)
45
+ dict_of_alarms['boundary'] = Boundary(bounds_df, default_fault_time= default_boundary_fault_time)
46
+ # flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
47
+ dict_of_alarms['power ratio'] = PowerRatio(bounds_df, day_table_name = config.get_table_name(day_table_name_header), ratio_period_days=power_ratio_period_days)
48
+ # power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
49
+ dict_of_alarms['abnormal COP'] = AbnormalCOP(bounds_df, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
50
+ # flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
51
+ dict_of_alarms['temperature maintenance setpoint'] = TMSetpoint(bounds_df)
52
+ # flag_high_tm_setpoint(df, daily_data, config, system=system)
53
+ dict_of_alarms['recirculation loop balancing valve'] = BalancingValve(bounds_df)
54
+ # flag_recirc_balance_valve(daily_data, config, system=system)
55
+ dict_of_alarms['HPWH inlet temperature'] = HPWHInlet(bounds_df)
56
+ # flag_hp_inlet_temp(df, daily_data, config, system)
57
+ dict_of_alarms['HPWH outlet temperature'] = HPWHOutlet(bounds_df)
58
+ # flag_hp_outlet_temp(df, daily_data, config, system)
59
+ dict_of_alarms['improper backup heating use'] = BackupUse(bounds_df)
60
+ # flag_backup_use(df, daily_data, config, system)
61
+ dict_of_alarms['HPWH outage'] = HPWHOutage(bounds_df, day_table_name = config.get_table_name(day_table_name_header))
62
+ # flag_HP_outage(df, daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system)
63
+ dict_of_alarms['blown equipment fuse'] = BlownFuse(bounds_df)
64
+ # flag_blown_fuse(df, daily_data, config, system)
65
+ dict_of_alarms['unexpected SOO change'] = SOOChange(bounds_df)
66
+ # flag_unexpected_soo_change(df, daily_data, config, system)
67
+ dict_of_alarms['short cycle'] = ShortCycle(bounds_df)
68
+ # flag_shortcycle(df, daily_data, config, system)
69
+ dict_of_alarms['unexpected temperature'] = TempRange(bounds_df)
70
+ # flag_unexpected_temp(df, daily_data, config, system)
71
+ dict_of_alarms['demand response inconsistency'] = LSInconsist(bounds_df)
72
+ # flag_ls_mode_inconsistancy(df, daily_data, config, system)
73
+ # return alarm.find_alarms(df, daily_df, config)
23
74
 
24
75
  ongoing_COP_exception = ['abnormal COP']
25
-
26
76
  for key, value in dict_of_alarms.items():
27
- if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
28
- print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
29
- elif len(value) > 0:
77
+ # if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
78
+ # print("Ongoing DATA_LOSS_COP detected. ABNORMAL_COP events will be uploaded")
79
+ specific_alarm_df = value.find_alarms(df, daily_data, config)
80
+ if len(specific_alarm_df) > 0:
30
81
  print(f"Detected {key} alarm(s). Adding to event df...")
31
- alarm_df = pd.concat([alarm_df, value])
82
+ alarm_df = pd.concat([alarm_df, specific_alarm_df])
32
83
  else:
33
84
  print(f"No {key} alarm(s) detected.")
34
85
 
86
+ # for key, value in dict_of_alarms.items():
87
+ # if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
88
+ # print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
89
+ # elif len(value) > 0:
90
+ # print(f"Detected {key} alarm(s). Adding to event df...")
91
+ # alarm_df = pd.concat([alarm_df, value])
92
+ # else:
93
+ # print(f"No {key} alarm(s) detected.")
94
+
35
95
  return alarm_df
36
96
 
37
97
  def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system: str = "", default_high_bound : float = 4.5, default_low_bound : float = 0) -> pd.DataFrame:
@@ -41,65 +101,9 @@ def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system:
41
101
  except FileNotFoundError:
42
102
  print("File Not Found: ", variable_names_path)
43
103
  return pd.DataFrame()
104
+ alarm = AbnormalCOP(bounds_df, default_high_bound, default_low_bound)
105
+ return alarm.find_alarms(None, daily_data, config)
44
106
 
45
- if (system != ""):
46
- if not 'system' in bounds_df.columns:
47
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
48
- bounds_df = bounds_df.loc[bounds_df['system'] == system]
49
- if not "variable_name" in bounds_df.columns:
50
- raise Exception(f"variable_name is not present in Variable_Names.csv")
51
- if not 'pretty_name' in bounds_df.columns:
52
- bounds_df['pretty_name'] = bounds_df['variable_name']
53
- else:
54
- bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
55
- if not 'high_alarm' in bounds_df.columns:
56
- bounds_df['high_alarm'] = default_high_bound
57
- else:
58
- bounds_df['high_alarm'] = bounds_df['high_alarm'].fillna(default_high_bound)
59
- if not 'low_alarm' in bounds_df.columns:
60
- bounds_df['low_alarm'] = default_low_bound
61
- else:
62
- bounds_df['low_alarm'] = bounds_df['low_alarm'].fillna(default_low_bound)
63
-
64
- bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "pretty_name"]]
65
- bounds_df.dropna(axis=0, thresh=2, inplace=True)
66
- bounds_df.set_index(['variable_name'], inplace=True)
67
-
68
- cop_pattern = re.compile(r'^(COP\w*|SystemCOP\w*)$')
69
- cop_columns = [col for col in daily_data.columns if re.match(cop_pattern, col)]
70
-
71
- alarms_dict = {}
72
- if not daily_data.empty and len(cop_columns) > 0:
73
- for bound_var, bounds in bounds_df.iterrows():
74
- if bound_var in cop_columns:
75
- for day, day_values in daily_data.iterrows():
76
- if not day_values[bound_var] is None and (day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']):
77
- alarm_str = f"Unexpected COP Value detected: {bounds['pretty_name']} = {round(day_values[bound_var],2)}"
78
- if day in alarms_dict:
79
- alarms_dict[day].append([bound_var, alarm_str])
80
- else:
81
- alarms_dict[day] = [[bound_var, alarm_str]]
82
- return _convert_event_type_dict_to_df(alarms_dict, event_type="SILENT_ALARM")
83
-
84
- def _check_if_during_ongoing_cop_alarm(daily_df : pd.DataFrame, config : ConfigManager, site_name : str = None) -> bool:
85
- if site_name is None:
86
- site_name = config.get_site_name()
87
- connection, cursor = config.connect_db()
88
- on_going_cop = False
89
- try:
90
- # find existing times in database for upsert statement
91
- cursor.execute(
92
- f"SELECT id FROM site_events WHERE start_time_pt <= '{daily_df.index.min()}' AND (end_time_pt IS NULL OR end_time_pt >= '{daily_df.index.max()}') AND site_name = '{site_name}' AND event_type = 'DATA_LOSS_COP'")
93
- # Fetch the results into a DataFrame
94
- existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id'])
95
- if not existing_rows.empty:
96
- on_going_cop = True
97
-
98
- except mysqlerrors.Error as e:
99
- print(f"Retrieving data from site_events caused exception: {e}")
100
- connection.close()
101
- cursor.close()
102
- return on_going_cop
103
107
 
104
108
  def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 15, system: str = "", full_days : list = None) -> pd.DataFrame:
105
109
  """
@@ -113,13 +117,13 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
113
117
  are out of order or have gaps, the function may return erroneous alarms.
114
118
  config : ecopipeline.ConfigManager
115
119
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
116
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
120
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
117
121
  The file must have at least three columns which must be titled "variable_name", "low_alarm", and "high_alarm" which should contain the
118
122
  name of each variable in the dataframe that requires the alarming, the lower bound for acceptable data, and the upper bound for
119
123
  acceptable data respectively
120
124
  default_fault_time : int
121
125
  Number of consecutive minutes that a sensor must be out of bounds for to trigger an alarm. Can be customized for each variable with
122
- the fault_time column in Varriable_Names.csv
126
+ the fault_time column in Variable_Names.csv
123
127
  system: str
124
128
  string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
125
129
  full_days : list
@@ -139,51 +143,10 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
139
143
  except FileNotFoundError:
140
144
  print("File Not Found: ", variable_names_path)
141
145
  return pd.DataFrame()
146
+ alarm = Boundary(bounds_df, default_fault_time)
147
+ return alarm.find_alarms(df, None, config)
142
148
 
143
- if (system != ""):
144
- if not 'system' in bounds_df.columns:
145
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
146
- bounds_df = bounds_df.loc[bounds_df['system'] == system]
147
-
148
- required_columns = ["variable_name", "high_alarm", "low_alarm"]
149
- for required_column in required_columns:
150
- if not required_column in bounds_df.columns:
151
- raise Exception(f"{required_column} is not present in Variable_Names.csv")
152
- if not 'pretty_name' in bounds_df.columns:
153
- bounds_df['pretty_name'] = bounds_df['variable_name']
154
- else:
155
- bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
156
- if not 'fault_time' in bounds_df.columns:
157
- bounds_df['fault_time'] = default_fault_time
158
-
159
- idx = df.index
160
- if full_days is None:
161
- full_days = pd.to_datetime(pd.Series(idx).dt.normalize().unique())
162
-
163
- bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "fault_time", "pretty_name"]]
164
- bounds_df.dropna(axis=0, thresh=2, inplace=True)
165
- bounds_df.set_index(['variable_name'], inplace=True)
166
- # ensure that lower and upper bounds are numbers
167
- bounds_df['high_alarm'] = pd.to_numeric(bounds_df['high_alarm'], errors='coerce').astype(float)
168
- bounds_df['low_alarm'] = pd.to_numeric(bounds_df['low_alarm'], errors='coerce').astype(float)
169
- bounds_df['fault_time'] = pd.to_numeric(bounds_df['fault_time'], errors='coerce').astype('Int64')
170
- bounds_df = bounds_df[bounds_df.index.notnull()]
171
- alarms = {}
172
- for bound_var, bounds in bounds_df.iterrows():
173
- if bound_var in df.columns:
174
- lower_mask = df[bound_var] < bounds["low_alarm"]
175
- upper_mask = df[bound_var] > bounds["high_alarm"]
176
- if pd.isna(bounds['fault_time']):
177
- bounds['fault_time'] = default_fault_time
178
- for day in full_days:
179
- if bounds['fault_time'] < 1 :
180
- print(f"Could not process alarm for {bound_var}. Fault time must be greater than or equal to 1 minute.")
181
- _check_and_add_alarm(df, lower_mask, alarms, day, bounds["fault_time"], bound_var, bounds['pretty_name'], 'Lower')
182
- _check_and_add_alarm(df, upper_mask, alarms, day, bounds["fault_time"], bound_var, bounds['pretty_name'], 'Upper')
183
-
184
- return _convert_silent_alarm_dict_to_df(alarms)
185
-
186
- def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
149
+ def flag_high_tm_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
187
150
  system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
188
151
  default_power_ratio : float = 0.4) -> pd.DataFrame:
189
152
  """
@@ -191,10 +154,10 @@ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config :
191
154
  and create an dataframe with applicable alarm events
192
155
 
193
156
  VarNames syntax:
194
- STS_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
195
- STS_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
196
- STS_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
197
- STS_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
157
+ TMSTPT_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
158
+ TMSTPT_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
159
+ TMSTPT_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
160
+ TMSTPT_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
198
161
 
199
162
  Parameters
200
163
  ----------
@@ -205,9 +168,9 @@ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config :
205
168
  post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
206
169
  config : ecopipeline.ConfigManager
207
170
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
208
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
171
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
209
172
  The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
210
- name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
173
+ name of each variable in the dataframe that requires alarming and the TMSTPT alarm codes (e.g., TMSTPT_T_1:140, TMSTPT_SP_1:2.0)
211
174
  default_fault_time : int
212
175
  Number of consecutive minutes for T+SP alarms (default 3). T+SP alarms trigger when tank is powered and temperature exceeds
213
176
  setpoint for this many consecutive minutes.
@@ -234,117 +197,148 @@ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config :
234
197
  except FileNotFoundError:
235
198
  print("File Not Found: ", variable_names_path)
236
199
  return pd.DataFrame()
200
+ alarm = TMSetpoint(bounds_df, default_fault_time, default_setpoint, default_power_indication, default_power_ratio)
201
+ return alarm.find_alarms(df, daily_df, config)
237
202
 
238
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'STS',
239
- {'T' : default_setpoint,
240
- 'SP': default_power_indication,
241
- 'TP': default_power_ratio,
242
- 'ST': default_setpoint},
243
- system)
244
- if bounds_df.empty:
245
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
246
-
247
- # Process each unique alarm_code_id
248
- alarms = {}
249
- for day in daily_df.index:
250
- next_day = day + pd.Timedelta(days=1)
251
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
252
- alarmed_for_day = False
253
- for alarm_id in bounds_df['alarm_code_id'].unique():
254
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
255
-
256
- # Get T and SP alarm codes for this ID
257
- t_codes = id_group[id_group['alarm_code_type'] == 'T']
258
- sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
259
- tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
260
- st_codes = id_group[id_group['alarm_code_type'] == 'ST']
261
-
262
- # Check for multiple T or SP codes with same ID
263
- if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
264
- raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
265
-
266
- # Check if we have both T and SP
267
- if len(t_codes) == 1 and len(sp_codes) == 1:
268
- t_var_name = t_codes.iloc[0]['variable_name']
269
- sp_var_name = sp_codes.iloc[0]['variable_name']
270
- sp_power_indication = sp_codes.iloc[0]['bound']
271
- t_setpoint = t_codes.iloc[0]['bound']
272
- # Check if both variables exist in df
273
- if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
274
- # Check for consecutive minutes where SP > default_power_indication
275
- # AND T >= default_setpoint
276
- power_mask = filtered_df[sp_var_name] >= sp_power_indication
277
- temp_mask = filtered_df[t_var_name] >= t_setpoint
278
- combined_mask = power_mask & temp_mask
279
-
280
- # Check for 3 consecutive minutes
281
- consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
282
- if consecutive_condition.any():
283
- # Get the first index where condition was met
284
- first_true_index = consecutive_condition.idxmax()
285
- # Adjust for the rolling window (first fault_time-1 minutes don't count)
286
- adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
287
- _add_an_alarm(alarms, adjusted_time, sp_var_name, f"High swing tank setpoint: Swing tank was powered at {adjusted_time} although temperature was above {t_setpoint}.")
288
- alarmed_for_day = True
289
- if not alarmed_for_day and len(st_codes) == 1:
290
- st_var_name = st_codes.iloc[0]['variable_name']
291
- st_setpoint = st_codes.iloc[0]['bound']
292
- # Check if st_var_name exists in filtered_df
293
- if st_var_name in filtered_df.columns:
294
- # Check if setpoint was altered for over 10 minutes
295
- altered_mask = filtered_df[st_var_name] != st_setpoint
296
- consecutive_condition = altered_mask.rolling(window=10).min() == 1
297
- if consecutive_condition.any():
298
- # Get the first index where condition was met
299
- first_true_index = consecutive_condition.idxmax()
300
- # Adjust for the rolling window
301
- adjusted_time = first_true_index - pd.Timedelta(minutes=9)
302
- _add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
303
- alarmed_for_day = True
304
- if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
305
- tp_var_name = tp_codes.iloc[0]['variable_name']
306
- sp_var_name = sp_codes.iloc[0]['variable_name']
307
- tp_ratio = tp_codes.iloc[0]['bound']
308
- # Check if both variables exist in df
309
- if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
310
- # Check if swing tank power ratio exceeds threshold
311
- if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
312
- power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
313
- if power_ratio > tp_ratio:
314
- _add_an_alarm(alarms, day, sp_var_name, f"High swing tank power ratio: Swing tank accounted for more than {tp_ratio * 100}% of daily power.")
315
- return _convert_silent_alarm_dict_to_df(alarms)
316
-
317
- def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
203
+ def flag_backup_use(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager,
204
+ system: str = "", default_setpoint : float = 130.0, default_power_ratio : float = 0.1) -> pd.DataFrame:
318
205
  """
319
206
  Function will take a pandas dataframe and location of alarm information in a csv,
320
207
  and create an dataframe with applicable alarm events
321
208
 
322
209
  VarNames syntax:
323
- BV_ER_[OPTIONAL ID] : Indicates a power variable for an ER heater (equipment recirculation)
324
- BV_OUT_[OPTIONAL ID]:### - Indicates the heating output variable the ER heating contributes to. Optional ### for the percentage
325
- threshold that should not be crossed by the ER elements (default 0.4 for 40%)
210
+ BU_P_ID - Back Up Tank Power Variable. Must be in same power units as total system power
211
+ BU_TP_ID:### - Total System Power for ratio alarming for alarming if back up power is more than ### (40% default) of usage
212
+ BU_ST_ID:### - Back Up Setpoint that should not change at all from ### (default 130)
326
213
 
327
214
  Parameters
328
215
  ----------
216
+ df: pd.DataFrame
217
+ post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
218
+ are out of order or have gaps, the function may return erroneous alarms.
329
219
  daily_df: pd.DataFrame
330
- post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
331
- power to heating output power.
220
+ post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
332
221
  config : ecopipeline.ConfigManager
333
222
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
334
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
223
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
335
224
  The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
336
- name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_OUT_1:0.5)
225
+ name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
337
226
  system: str
338
227
  string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
228
+ default_setpoint : float
229
+ Default temperature setpoint in degrees for T and ST alarm codes when no custom bound is specified (default 130.0)
230
+ default_power_indication : float
231
+ Default power threshold in kW for SP alarm codes when no custom bound is specified (default 1.0)
339
232
  default_power_ratio : float
340
- Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for OUT alarm codes when no custom bound is specified (default 0.4).
341
- Alarm triggers when sum of ER equipment >= (OUT value / default_power_ratio)
233
+ Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4)
342
234
 
343
235
  Returns
344
236
  -------
345
237
  pd.DataFrame:
346
238
  Pandas dataframe with alarm events
347
239
  """
240
+ if df.empty:
241
+ print("cannot flag swing tank setpoint alarms. Dataframe is empty")
242
+ return pd.DataFrame()
243
+ variable_names_path = config.get_var_names_path()
244
+ try:
245
+ bounds_df = pd.read_csv(variable_names_path)
246
+ except FileNotFoundError:
247
+ print("File Not Found: ", variable_names_path)
248
+ return pd.DataFrame()
249
+ alarm = BackupUse(bounds_df, default_setpoint, default_power_ratio)
250
+ return alarm.find_alarms(df, daily_df, config)
251
+
252
+ def flag_HP_outage(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", default_power_ratio : float = 0.3,
253
+ ratio_period_days : int = 7) -> pd.DataFrame:
254
+ """
255
+ Detects possible heat pump failures or outages by checking if heat pump power consumption falls below
256
+ an expected ratio of total system power over a rolling period, or by checking for non-zero values in
257
+ a direct alarm variable from the heat pump controller.
258
+
259
+ VarNames syntax:
260
+ HPOUT_POW_[OPTIONAL ID]:### - Heat pump power variable. ### is the minimum expected ratio of HP power to total power
261
+ (default 0.3 for 30%). Must be in same power units as total system power.
262
+ HPOUT_TP_[OPTIONAL ID] - Total system power variable for ratio comparison. Required when using POW codes.
263
+ HPOUT_ALRM_[OPTIONAL ID] - Direct alarm variable from HP controller. Alarm triggers if any non-zero value is detected.
264
+
265
+ Parameters
266
+ ----------
267
+ df: pd.DataFrame
268
+ Post-transformed dataframe for minute data. Used for checking ALRM codes for non-zero values.
269
+ daily_df: pd.DataFrame
270
+ Post-transformed dataframe for daily data. Used for checking power ratios over the rolling period.
271
+ config : ecopipeline.ConfigManager
272
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
273
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
274
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
275
+ name of each variable in the dataframe that requires alarming and the HPOUT alarm codes (e.g., HPOUT_POW_1:0.3, HPOUT_TP_1, HPOUT_ALRM_1).
276
+ day_table_name : str
277
+ Name of the daily database table to fetch previous days' data for the rolling period calculation.
278
+ system: str
279
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
280
+ default_power_ratio : float
281
+ Default minimum power ratio threshold (as decimal, e.g., 0.3 for 30%) for POW alarm codes when no custom bound is specified (default 0.3).
282
+ An alarm triggers if HP power falls below this ratio of total power over the rolling period.
283
+ ratio_period_days : int
284
+ Number of days to use for the rolling power ratio calculation (default 7). Must be greater than 1.
285
+
286
+ Returns
287
+ -------
288
+ pd.DataFrame:
289
+ Pandas dataframe with alarm events
290
+ """
291
+ if df.empty:
292
+ print("cannot flag swing tank setpoint alarms. Dataframe is empty")
293
+ return pd.DataFrame()
294
+ variable_names_path = config.get_var_names_path()
295
+ try:
296
+ bounds_df = pd.read_csv(variable_names_path)
297
+ except FileNotFoundError:
298
+ print("File Not Found: ", variable_names_path)
299
+ return pd.DataFrame()
300
+
301
+ alarm = HPWHOutage(bounds_df, day_table_name, default_power_ratio, ratio_period_days)
302
+ return alarm.find_alarms(df, daily_df, config)
303
+
304
+ def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
305
+ """
306
+ Detects recirculation balance issues by comparing sum of ER (equipment recirculation) heater
307
+ power to either total power or heating output.
308
+
309
+ VarNames syntax:
310
+ BV_ER_[OPTIONAL ID] - Indicates a power variable for an ER heater (equipment recirculation).
311
+ Multiple ER variables with the same ID will be summed together.
312
+ BV_TP_[OPTIONAL ID]:### - Indicates the Total Power of the system. Optional ### for the percentage
313
+ threshold that should not be crossed by the ER elements (default 0.4 for 40%).
314
+ Alarm triggers when sum of ER >= total_power * threshold.
315
+ BV_OUT_[OPTIONAL ID] - Indicates the heating output variable the ER heating contributes to.
316
+ Alarm triggers when sum of ER > sum of OUT * 0.95 (i.e., ER exceeds 95% of heating output).
317
+ Multiple OUT variables with the same ID will be summed together.
318
+
319
+ Note: Each alarm ID requires at least one ER code AND either one TP code OR at least one OUT code.
320
+ If a TP code exists for an ID, it takes precedence over OUT codes.
321
+
322
+ Parameters
323
+ ----------
324
+ daily_df: pd.DataFrame
325
+ Post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
326
+ power to total power or heating output power.
327
+ config : ecopipeline.ConfigManager
328
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
329
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
330
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
331
+ name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_TP_1:0.3)
332
+ system: str
333
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
334
+ default_power_ratio : float
335
+ Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4).
336
+
337
+ Returns
338
+ -------
339
+ pd.DataFrame:
340
+ Pandas dataframe with alarm events
341
+ """
348
342
  if daily_df.empty:
349
343
  print("cannot flag missing balancing valve alarms. Dataframe is empty")
350
344
  return pd.DataFrame()
@@ -354,36 +348,8 @@ def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, sy
354
348
  except FileNotFoundError:
355
349
  print("File Not Found: ", variable_names_path)
356
350
  return pd.DataFrame()
357
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BV',
358
- {'OUT' : default_power_ratio},
359
- system)
360
- if bounds_df.empty:
361
- return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
362
- # Process each unique alarm_code_id
363
- alarms = {}
364
- for alarm_id in bounds_df['alarm_code_id'].unique():
365
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
366
- out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
367
- out_var_name = out_codes.iloc[0]['variable_name']
368
- out_bound = out_codes.iloc[0]['bound']
369
- er_codes = id_group[id_group['alarm_code_type'] == 'ER']
370
- if len(out_codes) > 1 or len(er_codes) < 1:
371
- raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
372
- for day in daily_df.index:
373
- if out_var_name in daily_df.columns:
374
- # Get list of ER variable names
375
- er_var_names = er_codes['variable_name'].tolist()
376
-
377
- # Check if all ER variables exist in daily_df
378
- if all(var in daily_df.columns for var in er_var_names):
379
- # Sum all ER variables for this day
380
- er_sum = daily_df.loc[day, er_var_names].sum()
381
- out_value = daily_df.loc[day, out_var_name]
382
-
383
- # Check if sum of ER >= OUT value
384
- if er_sum >= out_value*out_bound:
385
- _add_an_alarm(alarms, day, out_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(out_bound * 100):.2f}% of heating output.")
386
- return _convert_silent_alarm_dict_to_df(alarms)
351
+ alarm = BalancingValve(bounds_df, default_power_ratio)
352
+ return alarm.find_alarms(None, daily_df, config)
387
353
 
388
354
  def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
389
355
  default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
@@ -406,7 +372,7 @@ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Config
406
372
  post-transformed dataframe for daily data.
407
373
  config : ecopipeline.ConfigManager
408
374
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
409
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
375
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
410
376
  The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
411
377
  name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
412
378
  system: str
@@ -434,193 +400,335 @@ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : Config
434
400
  except FileNotFoundError:
435
401
  print("File Not Found: ", variable_names_path)
436
402
  return pd.DataFrame()
403
+ alarm = HPWHInlet(bounds_df, default_power_threshold, default_temp_threshold, fault_time)
404
+ return alarm.find_alarms(df, daily_df, config)
437
405
 
438
- bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPI',
439
- {'POW' : default_power_threshold,
440
- 'T' : default_temp_threshold},
441
- system)
442
- if bounds_df.empty:
443
- return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
444
-
445
- # Process each unique alarm_code_id
446
- alarms = {}
447
- for alarm_id in bounds_df['alarm_code_id'].unique():
448
- for day in daily_df.index:
449
- next_day = day + pd.Timedelta(days=1)
450
- filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
451
- id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
452
- pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
453
- pow_var_name = pow_codes.iloc[0]['variable_name']
454
- pow_thresh = pow_codes.iloc[0]['bound']
455
- t_codes = id_group[id_group['alarm_code_type'] == 'T']
456
- t_var_name = t_codes.iloc[0]['variable_name']
457
- t_pretty_name = t_codes.iloc[0]['pretty_name']
458
- t_thresh = t_codes.iloc[0]['bound']
459
- if len(t_codes) != 1 or len(pow_codes) != 1:
460
- raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
461
- if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
462
- # Check for consecutive minutes where both power and temp exceed thresholds
463
- power_mask = filtered_df[pow_var_name] > pow_thresh
464
- temp_mask = filtered_df[t_var_name] > t_thresh
465
- combined_mask = power_mask & temp_mask
466
-
467
- # Check for fault_time consecutive minutes
468
- consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
469
- if consecutive_condition.any():
470
- first_true_index = consecutive_condition.idxmax()
471
- adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
472
- _add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
473
-
474
- return _convert_silent_alarm_dict_to_df(alarms)
475
-
476
- def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "") -> pd.DataFrame:
477
- # Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
478
- if (system != ""):
479
- if not 'system' in bounds_df.columns:
480
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
481
- bounds_df = bounds_df.loc[bounds_df['system'] == system]
406
+ def flag_hp_outlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
407
+ default_temp_threshold : float = 140.0, fault_time : int = 5) -> pd.DataFrame:
408
+ """
409
+ Detects low heat pump outlet temperature by checking if the outlet temperature falls below a threshold
410
+ while the heat pump is running. The first 10 minutes after each HP turn-on are excluded as a warmup
411
+ period. An alarm triggers if the temperature stays below the threshold for `fault_time` consecutive
412
+ minutes after the warmup period.
482
413
 
483
- required_columns = ["variable_name", "alarm_codes"]
484
- for required_column in required_columns:
485
- if not required_column in bounds_df.columns:
486
- raise Exception(f"{required_column} is not present in Variable_Names.csv")
487
- if not 'pretty_name' in bounds_df.columns:
488
- bounds_df['pretty_name'] = bounds_df['variable_name']
489
- else:
490
- bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
414
+ VarNames syntax:
415
+ HPO_POW_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
416
+ the heat pump is considered 'on'.
417
+ HPO_T_[OPTIONAL ID]:### - Indicates heat pump outlet temperature variable. ### is the temperature threshold (default 140.0)
418
+ that should always be exceeded while the heat pump is on after the 10-minute warmup period.
491
419
 
492
- bounds_df = bounds_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
493
- bounds_df.dropna(axis=0, thresh=2, inplace=True)
420
+ Parameters
421
+ ----------
422
+ df: pd.DataFrame
423
+ Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
424
+ are out of order or have gaps, the function may return erroneous alarms.
425
+ daily_df: pd.DataFrame
426
+ Post-transformed dataframe for daily data.
427
+ config : ecopipeline.ConfigManager
428
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
429
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
430
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
431
+ name of each variable in the dataframe that requires alarming and the HPO alarm codes (e.g., HPO_POW_1:1.0, HPO_T_1:140.0).
432
+ system: str
433
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
434
+ default_power_threshold : float
435
+ Default power threshold for POW alarm codes when no custom bound is specified (default 1.0). Heat pump is considered 'on'
436
+ when power exceeds this value.
437
+ default_temp_threshold : float
438
+ Default temperature threshold for T alarm codes when no custom bound is specified (default 140.0). Alarm triggers when
439
+ temperature falls BELOW this value while heat pump is on (after warmup period).
440
+ fault_time : int
441
+ Number of consecutive minutes that temperature must be below threshold (after warmup) before triggering an alarm (default 5).
494
442
 
495
- # Check if all alarm_codes are null or if dataframe is empty
496
- if bounds_df.empty or bounds_df['alarm_codes'].isna().all():
443
+ Returns
444
+ -------
445
+ pd.DataFrame:
446
+ Pandas dataframe with alarm events
447
+ """
448
+ if df.empty:
449
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
450
+ return pd.DataFrame()
451
+ variable_names_path = config.get_var_names_path()
452
+ try:
453
+ bounds_df = pd.read_csv(variable_names_path)
454
+ except FileNotFoundError:
455
+ print("File Not Found: ", variable_names_path)
497
456
  return pd.DataFrame()
498
457
 
499
- bounds_df = bounds_df[bounds_df['alarm_codes'].str.contains(alarm_tag, na=False)]
500
-
501
- # Split alarm_codes by semicolons and create a row for each STS code
502
- expanded_rows = []
503
- for idx, row in bounds_df.iterrows():
504
- alarm_codes = str(row['alarm_codes']).split(';')
505
- tag_codes = [code.strip() for code in alarm_codes if code.strip().startswith(alarm_tag)]
506
-
507
- if tag_codes: # Only process if there are STS codes
508
- for tag_code in tag_codes:
509
- new_row = row.copy()
510
- if ":" in tag_code:
511
- tag_parts = tag_code.split(':')
512
- if len(tag_parts) > 2:
513
- raise Exception(f"Improperly formated alarm code : {tag_code}")
514
- new_row['bound'] = tag_parts[1]
515
- tag_code = tag_parts[0]
516
- else:
517
- new_row['bound'] = None
518
- new_row['alarm_codes'] = tag_code
519
-
520
- expanded_rows.append(new_row)
521
-
522
- if expanded_rows:
523
- bounds_df = pd.DataFrame(expanded_rows)
524
- else:
525
- return pd.DataFrame()# no tagged alarms to look into
458
+ alarm = HPWHOutlet(bounds_df, default_power_threshold, default_temp_threshold, fault_time)
459
+ return alarm.find_alarms(df, daily_df, config)
460
+
461
+ def flag_blown_fuse(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
462
+ default_power_range : float = 2.0, default_power_draw : float = 30, fault_time : int = 3) -> pd.DataFrame:
463
+ """
464
+ Detects blown fuse alarms for heating elements by identifying when an element is drawing power
465
+ but significantly less than expected, which may indicate a blown fuse.
466
+
467
+ VarNames syntax:
468
+ BF_[OPTIONAL ID]:### - Indicates a blown fuse alarm for an element. ### is the expected kW input when the element is on.
469
+
470
+ Parameters
471
+ ----------
472
+ df: pd.DataFrame
473
+ Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
474
+ are out of order or have gaps, the function may return erroneous alarms.
475
+ daily_df: pd.DataFrame
476
+ Post-transformed dataframe for daily data.
477
+ config : ecopipeline.ConfigManager
478
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
479
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
480
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
481
+ name of each variable in the dataframe that requires alarming and the BF alarm codes (e.g., BF:30, BF_1:25).
482
+ system: str
483
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
484
+ default_power_threshold : float
485
+ Power threshold to determine if the element is "on" (default 1.0). Element is considered on when power exceeds this value.
486
+ default_power_range : float
487
+ Allowable variance below the expected power draw (default 2.0). An alarm triggers when the actual power draw is less than
488
+ (expected_power_draw - default_power_range) while the element is on.
489
+ default_power_draw : float
490
+ Default expected power draw in kW when no custom bound is specified in the alarm code (default 30).
491
+ fault_time : int
492
+ Number of consecutive minutes that the fault condition must persist before triggering an alarm (default 3).
493
+
494
+ Returns
495
+ -------
496
+ pd.DataFrame:
497
+ Pandas dataframe with alarm events
498
+ """
499
+ if df.empty:
500
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
501
+ return pd.DataFrame()
502
+ variable_names_path = config.get_var_names_path()
503
+ try:
504
+ bounds_df = pd.read_csv(variable_names_path)
505
+ except FileNotFoundError:
506
+ print("File Not Found: ", variable_names_path)
507
+ return pd.DataFrame()
508
+
509
+ alarm = BlownFuse(bounds_df, default_power_threshold, default_power_range, default_power_draw,fault_time)
510
+ return alarm.find_alarms(df, daily_df, config)
511
+
512
+ def flag_unexpected_soo_change(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
513
+ default_on_temp : float = 115.0, default_off_temp : float = 140.0) -> pd.DataFrame:
514
+ """
515
+ Detects unexpected state of operation (SOO) changes by checking if the heat pump turns on or off
516
+ when the temperature is not near the expected aquastat setpoint thresholds. An alarm is triggered
517
+ if the HP turns on/off and the corresponding temperature is more than 5.0 degrees away from the
518
+ expected threshold.
519
+
520
+ VarNames syntax:
521
+ SOOCHNG_POW:### - Indicates a power variable for the heat pump system (should be total power across all primary heat pumps). ### is the power threshold (default 1.0) above which
522
+ the heat pump system is considered 'on'.
523
+ SOOCHNG_ON_[Mode ID]:### - Indicates the temperature variable at the ON aquastat fraction. ### is the temperature (default 115.0)
524
+ that should trigger the heat pump to turn ON. Mode ID should be the load up mode from ['loadUp','shed','criticalPeak','gridEmergency','advLoadUp','normal'] or left blank for normal mode
525
+ SOOCHNG_OFF_[Mode ID]:### - Indicates the temperature variable at the OFF aquastat fraction (can be same as ON aquastat). ### is the temperature (default 140.0)
526
+ that should trigger the heat pump to turn OFF. Mode ID should be the load up mode from ['loadUp','shed','criticalPeak','gridEmergency','advLoadUp','normal'] or left blank for normal mode
527
+
528
+ Parameters
529
+ ----------
530
+ df: pd.DataFrame
531
+ Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
532
+ are out of order or have gaps, the function may return erroneous alarms.
533
+ daily_df: pd.DataFrame
534
+ Post-transformed dataframe for daily data.
535
+ config : ecopipeline.ConfigManager
536
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
537
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
538
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
539
+ name of each variable in the dataframe that requires alarming and the SOOCHNG alarm codes (e.g., SOOCHNG_POW_normal:1.0, SOOCHNG_ON_normal:115.0, SOOCHNG_OFF_normal:140.0).
540
+ system: str
541
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
542
+ default_power_threshold : float
543
+ Default power threshold for POW alarm codes when no custom bound is specified (default 1.0). Heat pump is considered 'on'
544
+ when power exceeds this value.
545
+ default_on_temp : float
546
+ Default ON temperature threshold (default 115.0). When the HP turns on, an alarm triggers if the temperature
547
+ is more than 5.0 degrees away from this value.
548
+ default_off_temp : float
549
+ Default OFF temperature threshold (default 140.0). When the HP turns off, an alarm triggers if the temperature
550
+ is more than 5.0 degrees away from this value.
551
+
552
+ Returns
553
+ -------
554
+ pd.DataFrame:
555
+ Pandas dataframe with alarm events
556
+ """
557
+ if df.empty:
558
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
559
+ return pd.DataFrame()
560
+ variable_names_path = config.get_var_names_path()
561
+ try:
562
+ bounds_df = pd.read_csv(variable_names_path)
563
+ except FileNotFoundError:
564
+ print("File Not Found: ", variable_names_path)
565
+ return pd.DataFrame()
566
+
567
+ alarm = SOOChange(bounds_df, default_power_threshold, default_on_temp, default_off_temp)
568
+ return alarm.find_alarms(df, daily_df, config)
569
+
570
+ def flag_ls_mode_inconsistancy(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "") -> pd.DataFrame:
571
+ """
572
+ Detects when reported loadshift mode does not match its expected value during a load shifting event.
573
+ An alarm is triggered if the variable value does not equal the expected value during the
574
+ time periods defined in the load shifting schedule for that mode.
575
+
576
+ VarNames syntax:
577
+ SOO_[mode]:### - Indicates a variable that should equal ### during [mode] load shifting events.
578
+ [mode] can be: normal, loadUp, shed, criticalPeak, gridEmergency, advLoadUp
579
+ ### is the expected value (e.g., SOO_loadUp:1 means the variable should be 1 during loadUp events)
580
+
581
+ Parameters
582
+ ----------
583
+ df: pd.DataFrame
584
+ Post-transformed dataframe for minute data. It should be noted that this function expects consecutive,
585
+ in order minutes. If minutes are out of order or have gaps, the function may return erroneous alarms.
586
+ daily_df: pd.DataFrame
587
+ Pandas dataframe with daily data. This dataframe should have a datetime index.
588
+ config : ecopipeline.ConfigManager
589
+ The ConfigManager object that holds configuration data for the pipeline.
590
+ system: str
591
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems.
592
+
593
+ Returns
594
+ -------
595
+ pd.DataFrame:
596
+ Pandas dataframe with alarm events
597
+ """
598
+ if df.empty:
599
+ print("cannot flag load shift mode inconsistency alarms. Dataframe is empty")
600
+ return pd.DataFrame()
601
+ variable_names_path = config.get_var_names_path()
602
+ try:
603
+ bounds_df = pd.read_csv(variable_names_path)
604
+ except FileNotFoundError:
605
+ print("File Not Found: ", variable_names_path)
606
+ return pd.DataFrame()
526
607
 
527
- alarm_code_parts = []
528
- for idx, row in bounds_df.iterrows():
529
- parts = row['alarm_codes'].split('_')
530
- if len(parts) == 2:
531
- alarm_code_parts.append([parts[1], "No ID"])
532
- elif len(parts) == 3:
533
- alarm_code_parts.append([parts[1], parts[2]])
534
- else:
535
- raise Exception(f"improper STS alarm code format for {row['variable_name']}")
536
- if alarm_code_parts:
537
- bounds_df[['alarm_code_type', 'alarm_code_id']] = pd.DataFrame(alarm_code_parts, index=bounds_df.index)
538
-
539
- # Replace None bounds with appropriate defaults based on alarm_code_type
540
- for idx, row in bounds_df.iterrows():
541
- if pd.isna(row['bound']) or row['bound'] is None:
542
- if row['alarm_code_type'] in type_default_dict.keys():
543
- bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']]
544
- # Coerce bound column to float
545
- bounds_df['bound'] = pd.to_numeric(bounds_df['bound'], errors='coerce').astype(float)
546
- return bounds_df
547
-
548
- def _add_an_alarm(alarm_dict : dict, day : datetime, var_name : str, alarm_string : str):
549
- # Round down to beginning of day
550
- day = pd.Timestamp(day).normalize()
551
-
552
- if day in alarm_dict:
553
- alarm_dict[day].append([var_name, alarm_string])
554
- else:
555
- alarm_dict[day] = [[var_name, alarm_string]]
608
+ alarm = LSInconsist(bounds_df)
609
+ return alarm.find_alarms(df, daily_df, config)
610
+
611
+ def flag_unexpected_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_high_temp : float = 130,
612
+ default_low_temp : float = 115, fault_time : int = 10) -> pd.DataFrame:
613
+ """
614
+ Detects when a temperature value falls outside an acceptable range for
615
+ too long. An alarm is triggered if the temperature is above the high bound or below the low bound
616
+ for `fault_time` consecutive minutes.
617
+
618
+ VarNames syntax:
619
+ TMPRNG_[OPTIONAL ID]:###-### - Indicates a temperature variable. ###-### is the acceptable temperature range
620
+ (e.g., TMPRNG:110-130 means temperature should stay between 110 and 130 degrees).
621
+
622
+ Parameters
623
+ ----------
624
+ df: pd.DataFrame
625
+ Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
626
+ are out of order or have gaps, the function may return erroneous alarms.
627
+ daily_df: pd.DataFrame
628
+ Post-transformed dataframe for daily data. Used for determining which days to process.
629
+ config : ecopipeline.ConfigManager
630
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
631
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
632
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
633
+ name of each variable in the dataframe that requires alarming and the DHW alarm codes (e.g., DHW:110-130, DHW_1:115-125).
634
+ system: str
635
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
636
+ default_high_temp : float
637
+ Default high temperature bound when no custom range is specified in the alarm code (default 130). Temperature above this triggers alarm.
638
+ default_low_temp : float
639
+ Default low temperature bound when no custom range is specified in the alarm code (default 130). Temperature below this triggers alarm.
640
+ fault_time : int
641
+ Number of consecutive minutes that temperature must be outside the acceptable range before triggering an alarm (default 10).
642
+
643
+ Returns
644
+ -------
645
+ pd.DataFrame:
646
+ Pandas dataframe with alarm events
647
+ """
648
+ if df.empty:
649
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
650
+ return pd.DataFrame()
651
+ variable_names_path = config.get_var_names_path()
652
+ try:
653
+ bounds_df = pd.read_csv(variable_names_path)
654
+ except FileNotFoundError:
655
+ print("File Not Found: ", variable_names_path)
656
+ return pd.DataFrame()
657
+ temp_alarm = TempRange(bounds_df, default_high_temp, default_low_temp, fault_time)
658
+ return temp_alarm.find_alarms(df, daily_df, config)
659
+
660
+ def flag_shortcycle(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
661
+ short_cycle_time : int = 15) -> pd.DataFrame:
662
+ """
663
+ Detects short cycling by identifying when the heat pump turns on for less than `short_cycle_time`
664
+ consecutive minutes before turning off again. Short cycling can indicate equipment issues or
665
+ improper system sizing.
666
+
667
+ VarNames syntax:
668
+ SHRTCYC_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
669
+ the heat pump is considered 'on'.
670
+
671
+ Parameters
672
+ ----------
673
+ df: pd.DataFrame
674
+ Post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
675
+ are out of order or have gaps, the function may return erroneous alarms.
676
+ daily_df: pd.DataFrame
677
+ Post-transformed dataframe for daily data.
678
+ config : ecopipeline.ConfigManager
679
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
680
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
681
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
682
+ name of each variable in the dataframe that requires alarming and the SHRTCYC alarm codes (e.g., SHRTCYC:1.0, SHRTCYC_1:0.5).
683
+ system: str
684
+ String of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not applicable.
685
+ default_power_threshold : float
686
+ Default power threshold when no custom bound is specified in the alarm code (default 1.0). Heat pump is considered 'on'
687
+ when power exceeds this value.
688
+ short_cycle_time : int
689
+ Minimum expected run time in minutes (default 15). An alarm triggers if the heat pump runs for fewer than this many
690
+ consecutive minutes before turning off.
691
+
692
+ Returns
693
+ -------
694
+ pd.DataFrame:
695
+ Pandas dataframe with alarm events
696
+ """
697
+ if df.empty:
698
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
699
+ return pd.DataFrame()
700
+ variable_names_path = config.get_var_names_path()
701
+ try:
702
+ bounds_df = pd.read_csv(variable_names_path)
703
+ except FileNotFoundError:
704
+ print("File Not Found: ", variable_names_path)
705
+ return pd.DataFrame()
706
+
707
+ short_alarm = ShortCycle(bounds_df, default_power_threshold, short_cycle_time)
708
+ return short_alarm.find_alarms(df, daily_df, config)
709
+
556
710
 
557
711
  def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
558
712
  events = {
559
713
  'start_time_pt' : [],
560
714
  'end_time_pt' : [],
561
- 'event_type' : [],
562
- 'event_detail' : [],
715
+ 'alarm_type' : [],
716
+ 'alarm_detail' : [],
563
717
  'variable_name' : []
564
718
  }
565
719
  for key, value_list in alarm_dict.items():
566
720
  for value in value_list:
567
721
  events['start_time_pt'].append(key)
568
- events['end_time_pt'].append(key)
569
- events['event_type'].append('SILENT_ALARM')
570
- events['event_detail'].append(value[1])
722
+ # Use end_time from value[2] if provided, otherwise use key
723
+ events['end_time_pt'].append(value[2] if len(value) > 2 else key)
724
+ events['alarm_type'].append(value[3] if len(value) > 3 else 'SILENT_ALARM')
725
+ events['alarm_detail'].append(value[1])
571
726
  events['variable_name'].append(value[0])
572
727
 
573
728
  event_df = pd.DataFrame(events)
574
729
  event_df.set_index('start_time_pt', inplace=True)
575
730
  return event_df
576
731
 
577
- def _convert_event_type_dict_to_df(alarm_dict : dict, event_type = 'DATA_LOSS_COP') -> pd.DataFrame:
578
- events = {
579
- 'start_time_pt' : [],
580
- 'end_time_pt' : [],
581
- 'event_type' : [],
582
- 'event_detail' : [],
583
- 'variable_name' : []
584
- }
585
- for key, value in alarm_dict.items():
586
- for i in range(len(value)):
587
- events['start_time_pt'].append(key)
588
- events['end_time_pt'].append(key)
589
- events['event_type'].append(event_type)
590
- events['event_detail'].append(value[i][1])
591
- events['variable_name'].append(value[i][0])
592
-
593
- event_df = pd.DataFrame(events)
594
- event_df.set_index('start_time_pt', inplace=True)
595
- return event_df
596
-
597
- def _check_and_add_alarm(df : pd.DataFrame, mask : pd.Series, alarms_dict, day, fault_time : int, var_name : str, pretty_name : str, alarm_type : str = 'Lower'):
598
- # KNOWN BUG : Avg value during fault time excludes the first (fault_time-1) minutes of each fault window
599
- next_day = day + pd.Timedelta(days=1)
600
- filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
601
- consecutive_condition = filtered_df.rolling(window=fault_time).min() == 1
602
- if consecutive_condition.any():
603
- group = (consecutive_condition != consecutive_condition.shift()).cumsum()
604
- streaks = consecutive_condition.groupby(group).agg(['sum', 'size', 'idxmin'])
605
- true_streaks = streaks[consecutive_condition.groupby(group).first()]
606
- longest_streak_length = true_streaks['size'].max()
607
- avg_streak_length = true_streaks['size'].mean() + fault_time-1
608
- longest_group = true_streaks['size'].idxmax()
609
- streak_indices = consecutive_condition[group == longest_group].index
610
- starting_index = streak_indices[0]
611
-
612
- day_df = df.loc[(df.index >= day) & (df.index < next_day)]
613
- average_value = day_df.loc[consecutive_condition, var_name].mean()
614
-
615
- # first_true_index = consecutive_condition.idxmax()
616
- # because first (fault_time-1) minutes don't count in window
617
- adjusted_time = starting_index - pd.Timedelta(minutes=fault_time-1)
618
- adjusted_longest_streak_length = longest_streak_length + fault_time-1
619
- alarm_string = f"{alarm_type} bound alarm for {pretty_name} (longest at {adjusted_time.strftime('%H:%M')} for {adjusted_longest_streak_length} minutes). Avg fault time : {round(avg_streak_length,1)} minutes, Avg value during fault: {round(average_value,2)}"
620
- if day in alarms_dict:
621
- alarms_dict[day].append([var_name, alarm_string])
622
- else:
623
- alarms_dict[day] = [[var_name, alarm_string]]
624
732
 
625
733
  def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", verbose : bool = False, ratio_period_days : int = 7) -> pd.DataFrame:
626
734
  """
@@ -634,7 +742,7 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
634
742
  are out of order or have gaps, the function may return erroneous alarms.
635
743
  config : ecopipeline.ConfigManager
636
744
  The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
637
- called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
745
+ called Variable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
638
746
  The file must have at least two columns which must be titled "variable_name", "alarm_codes" which should contain the
639
747
  name of each variable in the dataframe that requires the alarming and the ratio alarm code in the form "PR_{Power Ratio Name}:{low percentage}-{high percentage}
640
748
  system: str
@@ -647,306 +755,11 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
647
755
  pd.DataFrame:
648
756
  Pandas dataframe with alarm events, empty if no alarms triggered
649
757
  """
650
- daily_df_copy = daily_df.copy()
651
758
  variable_names_path = config.get_var_names_path()
652
759
  try:
653
- ratios_df = pd.read_csv(variable_names_path)
760
+ bounds_df = pd.read_csv(variable_names_path)
654
761
  except FileNotFoundError:
655
762
  print("File Not Found: ", variable_names_path)
656
763
  return pd.DataFrame()
657
- if (system != ""):
658
- if not 'system' in ratios_df.columns:
659
- raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
660
- ratios_df = ratios_df.loc[ratios_df['system'] == system]
661
- required_columns = ["variable_name", "alarm_codes"]
662
- for required_column in required_columns:
663
- if not required_column in ratios_df.columns:
664
- raise Exception(f"{required_column} is not present in Variable_Names.csv")
665
- if ratios_df['alarm_codes'].isna().all() or ratios_df['alarm_codes'].isnull().all():
666
- print("No alarm codes in ", variable_names_path)
667
- return pd.DataFrame()
668
- if not 'pretty_name' in ratios_df.columns:
669
- ratios_df['pretty_name'] = ratios_df['variable_name']
670
- else:
671
- ratios_df['pretty_name'] = ratios_df['pretty_name'].fillna(ratios_df['variable_name'])
672
- ratios_df = ratios_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
673
- ratios_df = ratios_df[ratios_df['alarm_codes'].str.contains('PR', na=False)]
674
- ratios_df.dropna(axis=0, thresh=2, inplace=True)
675
- if ratio_period_days > 1:
676
- if verbose:
677
- print(f"adding last {ratio_period_days} to daily_df")
678
- daily_df_copy = _append_previous_days_to_df(daily_df_copy, config, ratio_period_days, day_table_name)
679
- elif ratio_period_days < 1:
680
- print("power ratio alarm period, ratio_period_days, must be more than 1")
681
- return pd.DataFrame()
682
-
683
- ratios_df.set_index(['variable_name'], inplace=True)
684
- ratio_dict = {}
685
- for ratios_var, ratios in ratios_df.iterrows():
686
- if not ratios_var in daily_df_copy.columns:
687
- daily_df_copy[ratios_var] = 0
688
- alarm_codes = str(ratios['alarm_codes']).split(";")
689
- for alarm_code in alarm_codes:
690
- if alarm_code[:2] == "PR":
691
- split_out_alarm = alarm_code.split(":")
692
- low_high = split_out_alarm[1].split("-")
693
- pr_id = split_out_alarm[0].split("_")[1]
694
- if len(low_high) != 2:
695
- raise Exception(f"Error processing alarm code {alarm_code}")
696
- if pr_id in ratio_dict:
697
- ratio_dict[pr_id][0].append(ratios_var)
698
- ratio_dict[pr_id][1].append(float(low_high[0]))
699
- ratio_dict[pr_id][2].append(float(low_high[1]))
700
- ratio_dict[pr_id][3].append(ratios['pretty_name'])
701
- else:
702
- ratio_dict[pr_id] = [[ratios_var],[float(low_high[0])],[float(low_high[1])],[ratios['pretty_name']]]
703
- if verbose:
704
- print("ratio_dict keys:", ratio_dict.keys())
705
- # Create blocks of ratio_period_days
706
- blocks_df = _create_period_blocks(daily_df_copy, ratio_period_days, verbose)
707
-
708
- if blocks_df.empty:
709
- print("No complete blocks available for analysis")
710
- return pd.DataFrame()
711
-
712
- alarms = {}
713
- for key, value_list in ratio_dict.items():
714
- # Calculate total for each block
715
- blocks_df[key] = blocks_df[value_list[0]].sum(axis=1)
716
- for i in range(len(value_list[0])):
717
- column_name = value_list[0][i]
718
- # Calculate ratio for each block
719
- blocks_df[f'{column_name}_{key}'] = (blocks_df[column_name]/blocks_df[key]) * 100
720
- if verbose:
721
- print(f"Block ratios for {column_name}_{key}:", blocks_df[f'{column_name}_{key}'])
722
- _check_and_add_ratio_alarm_blocks(blocks_df, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i], ratio_period_days)
723
- return _convert_silent_alarm_dict_to_df(alarms)
724
- # alarms = {}
725
- # for key, value_list in ratio_dict.items():
726
- # daily_df_copy[key] = daily_df_copy[value_list[0]].sum(axis=1)
727
- # for i in range(len(value_list[0])):
728
- # column_name = value_list[0][i]
729
- # daily_df_copy[f'{column_name}_{key}'] = (daily_df_copy[column_name]/daily_df_copy[key]) * 100
730
- # if verbose:
731
- # print(f"Ratios for {column_name}_{key}",daily_df_copy[f'{column_name}_{key}'])
732
- # _check_and_add_ratio_alarm(daily_df_copy, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i])
733
- # return _convert_silent_alarm_dict_to_df(alarms)
734
-
735
- # def _check_and_add_ratio_alarm(daily_df: pd.DataFrame, alarm_key : str, column_name : str, pretty_name : str, alarms_dict : dict, high_bound : float, low_bound : float):
736
- # alarm_daily_df = daily_df.loc[(daily_df[f"{column_name}_{alarm_key}"] < low_bound) | (daily_df[f"{column_name}_{alarm_key}"] > high_bound)]
737
- # if not alarm_daily_df.empty:
738
- # for day, values in alarm_daily_df.iterrows():
739
- # alarm_str = f"Power ratio alarm: {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
740
- # if day in alarms_dict:
741
- # alarms_dict[day].append([column_name, alarm_str])
742
- # else:
743
- # alarms_dict[day] = [[column_name, alarm_str]]
744
- def _check_and_add_ratio_alarm_blocks(blocks_df: pd.DataFrame, alarm_key: str, column_name: str, pretty_name: str, alarms_dict: dict, high_bound: float, low_bound: float, ratio_period_days: int):
745
- """
746
- Check for alarms in block-based ratios and add to alarms dictionary.
747
- """
748
- alarm_blocks_df = blocks_df.loc[(blocks_df[f"{column_name}_{alarm_key}"] < low_bound) | (blocks_df[f"{column_name}_{alarm_key}"] > high_bound)]
749
- if not alarm_blocks_df.empty:
750
- for block_end_date, values in alarm_blocks_df.iterrows():
751
- alarm_str = f"Power ratio alarm ({ratio_period_days}-day block ending {block_end_date.strftime('%Y-%m-%d')}): {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
752
- if block_end_date in alarms_dict:
753
- alarms_dict[block_end_date].append([column_name, alarm_str])
754
- else:
755
- alarms_dict[block_end_date] = [[column_name, alarm_str]]
756
-
757
- def _create_period_blocks(daily_df: pd.DataFrame, ratio_period_days: int, verbose: bool = False) -> pd.DataFrame:
758
- """
759
- Create blocks of ratio_period_days by summing values within each block.
760
- Each block will be represented by its end date.
761
- """
762
- if len(daily_df) < ratio_period_days:
763
- if verbose:
764
- print(f"Not enough data for {ratio_period_days}-day blocks. Need at least {ratio_period_days} days, have {len(daily_df)}")
765
- return pd.DataFrame()
766
-
767
- blocks = []
768
- block_dates = []
769
-
770
- # Create blocks by summing consecutive groups of ratio_period_days
771
- for i in range(ratio_period_days - 1, len(daily_df)):
772
- start_idx = i - ratio_period_days + 1
773
- end_idx = i + 1
774
-
775
- block_data = daily_df.iloc[start_idx:end_idx].sum()
776
- blocks.append(block_data)
777
- # Use the end date of the block as the identifier
778
- block_dates.append(daily_df.index[i])
779
-
780
- if not blocks:
781
- return pd.DataFrame()
782
-
783
- blocks_df = pd.DataFrame(blocks, index=block_dates)
784
-
785
- if verbose:
786
- print(f"Created {len(blocks_df)} blocks of {ratio_period_days} days each")
787
- print(f"Block date range: {blocks_df.index.min()} to {blocks_df.index.max()}")
788
-
789
- return blocks_df
790
-
791
- def _append_previous_days_to_df(daily_df: pd.DataFrame, config : ConfigManager, ratio_period_days : int, day_table_name : str, primary_key : str = "time_pt") -> pd.DataFrame:
792
- db_connection, cursor = config.connect_db()
793
- period_start = daily_df.index.min() - timedelta(ratio_period_days)
794
- try:
795
- # find existing times in database for upsert statement
796
- cursor.execute(
797
- f"SELECT * FROM {day_table_name} WHERE {primary_key} < '{daily_df.index.min()}' AND {primary_key} >= '{period_start}'")
798
- result = cursor.fetchall()
799
- column_names = [desc[0] for desc in cursor.description]
800
- old_days_df = pd.DataFrame(result, columns=column_names)
801
- old_days_df = old_days_df.set_index(primary_key)
802
- daily_df = pd.concat([daily_df, old_days_df])
803
- daily_df = daily_df.sort_index(ascending=True)
804
- except mysqlerrors.Error:
805
- print(f"Table {day_table_name} has no data.")
806
-
807
- db_connection.close()
808
- cursor.close()
809
- return daily_df
810
-
811
- # def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column : str, supply_temp : int = 110, consecutive_minutes : int = 15) -> pd.DataFrame:
812
- # """
813
- # Parameters
814
- # ----------
815
- # df : pd.DataFrame
816
- # Single pandas dataframe of sensor data on minute intervals.
817
- # daily_df : pd.DataFrame
818
- # Single pandas dataframe of sensor data on daily intervals.
819
- # dhw_outlet_column : str
820
- # Name of the column in df and daily_df that contains temperature of DHW supplied to building occupants
821
- # supply_temp : int
822
- # the minimum DHW temperature acceptable to supply to building occupants
823
- # consecutive_minutes : int
824
- # the number of minutes in a row that DHW is not delivered to tenants to qualify as a DHW Outage
825
-
826
- # Returns
827
- # -------
828
- # event_df : pd.DataFrame
829
- # Dataframe with 'ALARM' events on the days in which there was a DHW Outage.
830
- # """
831
- # # TODO edge case for outage that spans over a day
832
- # events = {
833
- # 'start_time_pt' : [],
834
- # 'end_time_pt' : [],
835
- # 'event_type' : [],
836
- # 'event_detail' : [],
837
- # }
838
- # mask = df[dhw_outlet_column] < supply_temp
839
- # for day in daily_df.index:
840
- # next_day = day + pd.Timedelta(days=1)
841
- # filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
842
-
843
- # consecutive_condition = filtered_df.rolling(window=consecutive_minutes).min() == 1
844
- # if consecutive_condition.any():
845
- # # first_true_index = consecutive_condition['supply_temp'].idxmax()
846
- # first_true_index = consecutive_condition.idxmax()
847
- # adjusted_time = first_true_index - pd.Timedelta(minutes=consecutive_minutes-1)
848
- # events['start_time_pt'].append(day)
849
- # events['end_time_pt'].append(next_day - pd.Timedelta(minutes=1))
850
- # events['event_type'].append("ALARM")
851
- # events['event_detail'].append(f"Hot Water Outage Occured (first one starting at {adjusted_time.strftime('%H:%M')})")
852
- # event_df = pd.DataFrame(events)
853
- # event_df.set_index('start_time_pt', inplace=True)
854
- # return event_df
855
-
856
- # def generate_event_log_df(config : ConfigManager):
857
- # """
858
- # Creates an event log df based on user submitted events in an event log csv
859
- # Parameters
860
- # ----------
861
- # config : ecopipeline.ConfigManager
862
- # The ConfigManager object that holds configuration data for the pipeline.
863
-
864
- # Returns
865
- # -------
866
- # event_df : pd.DataFrame
867
- # Dataframe formatted from events in Event_log.csv for pipeline.
868
- # """
869
- # event_filename = config.get_event_log_path()
870
- # try:
871
- # event_df = pd.read_csv(event_filename)
872
- # event_df['start_time_pt'] = pd.to_datetime(event_df['start_time_pt'])
873
- # event_df['end_time_pt'] = pd.to_datetime(event_df['end_time_pt'])
874
- # event_df.set_index('start_time_pt', inplace=True)
875
- # return event_df
876
- # except Exception as e:
877
- # print(f"Error processing file {event_filename}: {e}")
878
- # return pd.DataFrame({
879
- # 'start_time_pt' : [],
880
- # 'end_time_pt' : [],
881
- # 'event_type' : [],
882
- # 'event_detail' : [],
883
- # })
884
-
885
-
886
-
887
- # def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
888
- # """
889
- # Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
890
- # The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
891
- # '_max_gap' respectively. the columns will carry the following statisctics:
892
- # _missing_mins -> the number of minutes in the day that have no reported data value for the column
893
- # _avg_gap -> the average gap (in minutes) between collected data values that day
894
- # _max_gap -> the maximum gap (in minutes) between collected data values that day
895
-
896
- # Parameters
897
- # ----------
898
- # df : pd.DataFrame
899
- # minute data df after the rename_varriables() and before the ffill_missing() function has been called
900
-
901
- # Returns
902
- # -------
903
- # daily_data_stats : pd.DataFrame
904
- # new dataframe with the columns descriped in the function's description
905
- # """
906
- # min_time = df.index.min()
907
- # start_day = min_time.floor('D')
908
-
909
- # # If min_time is not exactly at the start of the day, move to the next day
910
- # if min_time != start_day:
911
- # start_day = start_day + pd.tseries.offsets.Day(1)
912
-
913
- # # Build a complete minutely timestamp index over the full date range
914
- # full_index = pd.date_range(start=start_day,
915
- # end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
916
- # freq='T')
917
-
918
- # # Reindex to include any completely missing minutes
919
- # df_full = df.reindex(full_index)
920
-
921
- # # Resample daily to count missing values per column
922
- # total_missing = df_full.isna().resample('D').sum().astype(int)
923
-
924
- # # Function to calculate max consecutive missing values
925
- # def max_consecutive_nans(x):
926
- # is_na = x.isna()
927
- # groups = (is_na != is_na.shift()).cumsum()
928
- # return is_na.groupby(groups).sum().max() or 0
929
-
930
- # # Function to calculate average consecutive missing values
931
- # def avg_consecutive_nans(x):
932
- # is_na = x.isna()
933
- # groups = (is_na != is_na.shift()).cumsum()
934
- # gap_lengths = is_na.groupby(groups).sum()
935
- # gap_lengths = gap_lengths[gap_lengths > 0]
936
- # if len(gap_lengths) == 0:
937
- # return 0
938
- # return gap_lengths.mean()
939
-
940
- # # Apply daily, per column
941
- # max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
942
- # avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
943
-
944
- # # Rename columns to include a suffix
945
- # total_missing = total_missing.add_suffix('_missing_mins')
946
- # max_consec_missing = max_consec_missing.add_suffix('_max_gap')
947
- # avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
948
-
949
- # # Concatenate along columns (axis=1)
950
- # combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
951
-
952
- # return combined_df
764
+ alarm = PowerRatio(bounds_df, day_table_name, ratio_period_days)
765
+ return alarm.find_alarms(None, daily_df, config)