ecopipeline 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,3 @@
1
1
  from .event_tracking import *
2
- __all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP']
2
+ __all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP','flag_high_swing_setpoint',
3
+ 'flag_recirc_balance_valve','flag_hp_inlet_temp']
@@ -1,6 +1,6 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
- import datetime as dt
3
+ import datetime as datetime
4
4
  from ecopipeline import ConfigManager
5
5
  import re
6
6
  import mysql.connector.errors as mysqlerrors
@@ -13,29 +13,24 @@ def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config
13
13
  day_list = daily_data.index.to_list()
14
14
  print('Checking for alarms...')
15
15
  alarm_df = _convert_silent_alarm_dict_to_df({})
16
- boundary_alarm_df = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
17
- pwr_alarm_df = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
18
- abnormal_COP_df = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
19
-
20
- if len(boundary_alarm_df) > 0:
21
- print("Boundary alarms detected. Adding them to event df...")
22
- alarm_df = boundary_alarm_df
23
- else:
24
- print("No boundary alarms detected.")
25
-
26
- if len(pwr_alarm_df) > 0:
27
- print("Power alarms detected. Adding them to event df...")
28
- alarm_df = pd.concat([alarm_df, pwr_alarm_df])
29
- else:
30
- print("No power alarms detected.")
31
-
32
- if _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
33
- print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
34
- elif len(abnormal_COP_df) > 0:
35
- print("Abnormal COPs detected. Adding them to event df...")
36
- alarm_df = pd.concat([alarm_df, abnormal_COP_df])
37
- else:
38
- print("No abnormal COPs.")
16
+ dict_of_alarms = {}
17
+ dict_of_alarms['boundary'] = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
18
+ dict_of_alarms['power ratio'] = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
19
+ dict_of_alarms['abnormal COP'] = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
20
+ dict_of_alarms['swing tank setpoint'] = flag_high_swing_setpoint(df, daily_data, config, system=system)
21
+ dict_of_alarms['recirculation loop balancing valve'] = flag_recirc_balance_valve(daily_data, config, system=system)
22
+ dict_of_alarms['HPWH inlet temperature'] = flag_hp_inlet_temp(df, daily_data, config, system)
23
+
24
+ ongoing_COP_exception = ['abnormal COP']
25
+
26
+ for key, value in dict_of_alarms.items():
27
+ if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
28
+ print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
29
+ elif len(value) > 0:
30
+ print(f"Detected {key} alarm(s). Adding to event df...")
31
+ alarm_df = pd.concat([alarm_df, value])
32
+ else:
33
+ print(f"No {key} alarm(s) detected.")
39
34
 
40
35
  return alarm_df
41
36
 
@@ -188,6 +183,377 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
188
183
 
189
184
  return _convert_silent_alarm_dict_to_df(alarms)
190
185
 
186
+ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
187
+ system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
188
+ default_power_ratio : float = 0.4) -> pd.DataFrame:
189
+ """
190
+ Function will take a pandas dataframe and location of alarm information in a csv,
191
+ and create an dataframe with applicable alarm events
192
+
193
+ VarNames syntax:
194
+ STS_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
195
+ STS_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
196
+ STS_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
197
+ STS_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
198
+
199
+ Parameters
200
+ ----------
201
+ df: pd.DataFrame
202
+ post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
203
+ are out of order or have gaps, the function may return erroneous alarms.
204
+ daily_df: pd.DataFrame
205
+ post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
206
+ config : ecopipeline.ConfigManager
207
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
208
+ called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
209
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
210
+ name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
211
+ default_fault_time : int
212
+ Number of consecutive minutes for T+SP alarms (default 3). T+SP alarms trigger when tank is powered and temperature exceeds
213
+ setpoint for this many consecutive minutes.
214
+ system: str
215
+ string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
216
+ default_setpoint : float
217
+ Default temperature setpoint in degrees for T and ST alarm codes when no custom bound is specified (default 130.0)
218
+ default_power_indication : float
219
+ Default power threshold in kW for SP alarm codes when no custom bound is specified (default 1.0)
220
+ default_power_ratio : float
221
+ Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4)
222
+
223
+ Returns
224
+ -------
225
+ pd.DataFrame:
226
+ Pandas dataframe with alarm events
227
+ """
228
+ if df.empty:
229
+ print("cannot flag swing tank setpoint alarms. Dataframe is empty")
230
+ return pd.DataFrame()
231
+ variable_names_path = config.get_var_names_path()
232
+ try:
233
+ bounds_df = pd.read_csv(variable_names_path)
234
+ except FileNotFoundError:
235
+ print("File Not Found: ", variable_names_path)
236
+ return pd.DataFrame()
237
+
238
+ bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'STS',
239
+ {'T' : default_setpoint,
240
+ 'SP': default_power_indication,
241
+ 'TP': default_power_ratio,
242
+ 'ST': default_setpoint},
243
+ system)
244
+ if bounds_df.empty:
245
+ return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
246
+
247
+ # Process each unique alarm_code_id
248
+ alarms = {}
249
+ for day in daily_df.index:
250
+ next_day = day + pd.Timedelta(days=1)
251
+ filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
252
+ alarmed_for_day = False
253
+ for alarm_id in bounds_df['alarm_code_id'].unique():
254
+ id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
255
+
256
+ # Get T and SP alarm codes for this ID
257
+ t_codes = id_group[id_group['alarm_code_type'] == 'T']
258
+ sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
259
+ tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
260
+ st_codes = id_group[id_group['alarm_code_type'] == 'ST']
261
+
262
+ # Check for multiple T or SP codes with same ID
263
+ if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
264
+ raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
265
+
266
+ # Check if we have both T and SP
267
+ if len(t_codes) == 1 and len(sp_codes) == 1:
268
+ t_var_name = t_codes.iloc[0]['variable_name']
269
+ sp_var_name = sp_codes.iloc[0]['variable_name']
270
+ sp_power_indication = sp_codes.iloc[0]['bound']
271
+ t_setpoint = t_codes.iloc[0]['bound']
272
+ # Check if both variables exist in df
273
+ if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
274
+ # Check for consecutive minutes where SP > default_power_indication
275
+ # AND T >= default_setpoint
276
+ power_mask = filtered_df[sp_var_name] >= sp_power_indication
277
+ temp_mask = filtered_df[t_var_name] >= t_setpoint
278
+ combined_mask = power_mask & temp_mask
279
+
280
+ # Check for 3 consecutive minutes
281
+ consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
282
+ if consecutive_condition.any():
283
+ # Get the first index where condition was met
284
+ first_true_index = consecutive_condition.idxmax()
285
+ # Adjust for the rolling window (first fault_time-1 minutes don't count)
286
+ adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
287
+ _add_an_alarm(alarms, adjusted_time, sp_var_name, f"High swing tank setpoint: Swing tank was powered at {adjusted_time} although temperature was above {t_setpoint}.")
288
+ alarmed_for_day = True
289
+ if not alarmed_for_day and len(st_codes) == 1:
290
+ st_var_name = st_codes.iloc[0]['variable_name']
291
+ st_setpoint = st_codes.iloc[0]['bound']
292
+ # Check if st_var_name exists in filtered_df
293
+ if st_var_name in filtered_df.columns:
294
+ # Check if setpoint was altered for over 10 minutes
295
+ altered_mask = filtered_df[st_var_name] != st_setpoint
296
+ consecutive_condition = altered_mask.rolling(window=10).min() == 1
297
+ if consecutive_condition.any():
298
+ # Get the first index where condition was met
299
+ first_true_index = consecutive_condition.idxmax()
300
+ # Adjust for the rolling window
301
+ adjusted_time = first_true_index - pd.Timedelta(minutes=9)
302
+ _add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
303
+ alarmed_for_day = True
304
+ if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
305
+ tp_var_name = tp_codes.iloc[0]['variable_name']
306
+ sp_var_name = sp_codes.iloc[0]['variable_name']
307
+ tp_ratio = tp_codes.iloc[0]['bound']
308
+ # Check if both variables exist in df
309
+ if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
310
+ # Check if swing tank power ratio exceeds threshold
311
+ if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
312
+ power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
313
+ if power_ratio > tp_ratio:
314
+ _add_an_alarm(alarms, day, sp_var_name, f"High swing tank power ratio: Swing tank accounted for more than {tp_ratio * 100}% of daily power.")
315
+ return _convert_silent_alarm_dict_to_df(alarms)
316
+
317
+ def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
318
+ """
319
+ Function will take a pandas dataframe and location of alarm information in a csv,
320
+ and create an dataframe with applicable alarm events
321
+
322
+ VarNames syntax:
323
+ BV_ER_[OPTIONAL ID] : Indicates a power variable for an ER heater (equipment recirculation)
324
+ BV_OUT_[OPTIONAL ID]:### - Indicates the heating output variable the ER heating contributes to. Optional ### for the percentage
325
+ threshold that should not be crossed by the ER elements (default 0.4 for 40%)
326
+
327
+ Parameters
328
+ ----------
329
+ daily_df: pd.DataFrame
330
+ post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
331
+ power to heating output power.
332
+ config : ecopipeline.ConfigManager
333
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
334
+ called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
335
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
336
+ name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_OUT_1:0.5)
337
+ system: str
338
+ string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
339
+ default_power_ratio : float
340
+ Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for OUT alarm codes when no custom bound is specified (default 0.4).
341
+ Alarm triggers when sum of ER equipment >= (OUT value / default_power_ratio)
342
+
343
+ Returns
344
+ -------
345
+ pd.DataFrame:
346
+ Pandas dataframe with alarm events
347
+ """
348
+ if daily_df.empty:
349
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
350
+ return pd.DataFrame()
351
+ variable_names_path = config.get_var_names_path()
352
+ try:
353
+ bounds_df = pd.read_csv(variable_names_path)
354
+ except FileNotFoundError:
355
+ print("File Not Found: ", variable_names_path)
356
+ return pd.DataFrame()
357
+ bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BV',
358
+ {'OUT' : default_power_ratio},
359
+ system)
360
+ if bounds_df.empty:
361
+ return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
362
+ # Process each unique alarm_code_id
363
+ alarms = {}
364
+ for alarm_id in bounds_df['alarm_code_id'].unique():
365
+ id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
366
+ out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
367
+ out_var_name = out_codes.iloc[0]['variable_name']
368
+ out_bound = out_codes.iloc[0]['bound']
369
+ er_codes = id_group[id_group['alarm_code_type'] == 'ER']
370
+ if len(out_codes) > 1 or len(er_codes) < 1:
371
+ raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
372
+ for day in daily_df.index:
373
+ if out_var_name in daily_df.columns:
374
+ # Get list of ER variable names
375
+ er_var_names = er_codes['variable_name'].tolist()
376
+
377
+ # Check if all ER variables exist in daily_df
378
+ if all(var in daily_df.columns for var in er_var_names):
379
+ # Sum all ER variables for this day
380
+ er_sum = daily_df.loc[day, er_var_names].sum()
381
+ out_value = daily_df.loc[day, out_var_name]
382
+
383
+ # Check if sum of ER >= OUT value
384
+ if er_sum >= out_value*out_bound:
385
+ _add_an_alarm(alarms, day, out_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(out_bound * 100):.2f}% of heating output.")
386
+ return _convert_silent_alarm_dict_to_df(alarms)
387
+
388
+ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
389
+ default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
390
+ """
391
+ Function will take a pandas dataframe and location of alarm information in a csv,
392
+ and create an dataframe with applicable alarm events
393
+
394
+ VarNames syntax:
395
+ HPI_POW_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
396
+ the heat pump is considered 'on'
397
+ HPI_T_[OPTIONAL ID]:### - Indicates heat pump inlet temperature variable. ### is the temperature threshold (default 120.0)
398
+ that should not be exceeded while the heat pump is on
399
+
400
+ Parameters
401
+ ----------
402
+ df: pd.DataFrame
403
+ post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
404
+ are out of order or have gaps, the function may return erroneous alarms.
405
+ daily_df: pd.DataFrame
406
+ post-transformed dataframe for daily data.
407
+ config : ecopipeline.ConfigManager
408
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
409
+ called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
410
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
411
+ name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
412
+ system: str
413
+ string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
414
+ default_power_threshold : float
415
+ Default power threshold for POW alarm codes when no custom bound is specified (default 0.4). Heat pump is considered 'on'
416
+ when power exceeds this value.
417
+ default_temp_threshold : float
418
+ Default temperature threshold for T alarm codes when no custom bound is specified (default 120.0). Alarm triggers when
419
+ temperature exceeds this value while heat pump is on.
420
+ fault_time : int
421
+ Number of consecutive minutes that both power and temperature must exceed their thresholds before triggering an alarm (default 10).
422
+
423
+ Returns
424
+ -------
425
+ pd.DataFrame:
426
+ Pandas dataframe with alarm events
427
+ """
428
+ if df.empty:
429
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
430
+ return pd.DataFrame()
431
+ variable_names_path = config.get_var_names_path()
432
+ try:
433
+ bounds_df = pd.read_csv(variable_names_path)
434
+ except FileNotFoundError:
435
+ print("File Not Found: ", variable_names_path)
436
+ return pd.DataFrame()
437
+
438
+ bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPI',
439
+ {'POW' : default_power_threshold,
440
+ 'T' : default_temp_threshold},
441
+ system)
442
+ if bounds_df.empty:
443
+ return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
444
+
445
+ # Process each unique alarm_code_id
446
+ alarms = {}
447
+ for alarm_id in bounds_df['alarm_code_id'].unique():
448
+ for day in daily_df.index:
449
+ next_day = day + pd.Timedelta(days=1)
450
+ filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
451
+ id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
452
+ pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
453
+ pow_var_name = pow_codes.iloc[0]['variable_name']
454
+ pow_thresh = pow_codes.iloc[0]['bound']
455
+ t_codes = id_group[id_group['alarm_code_type'] == 'T']
456
+ t_var_name = t_codes.iloc[0]['variable_name']
457
+ t_pretty_name = t_codes.iloc[0]['pretty_name']
458
+ t_thresh = t_codes.iloc[0]['bound']
459
+ if len(t_codes) != 1 or len(pow_codes) != 1:
460
+ raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
461
+ if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
462
+ # Check for consecutive minutes where both power and temp exceed thresholds
463
+ power_mask = filtered_df[pow_var_name] > pow_thresh
464
+ temp_mask = filtered_df[t_var_name] > t_thresh
465
+ combined_mask = power_mask & temp_mask
466
+
467
+ # Check for fault_time consecutive minutes
468
+ consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
469
+ if consecutive_condition.any():
470
+ first_true_index = consecutive_condition.idxmax()
471
+ adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
472
+ _add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
473
+
474
+ return _convert_silent_alarm_dict_to_df(alarms)
475
+
476
+ def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "") -> pd.DataFrame:
477
+ # Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
478
+ if (system != ""):
479
+ if not 'system' in bounds_df.columns:
480
+ raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
481
+ bounds_df = bounds_df.loc[bounds_df['system'] == system]
482
+
483
+ required_columns = ["variable_name", "alarm_codes"]
484
+ for required_column in required_columns:
485
+ if not required_column in bounds_df.columns:
486
+ raise Exception(f"{required_column} is not present in Variable_Names.csv")
487
+ if not 'pretty_name' in bounds_df.columns:
488
+ bounds_df['pretty_name'] = bounds_df['variable_name']
489
+ else:
490
+ bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
491
+
492
+ bounds_df = bounds_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
493
+ bounds_df.dropna(axis=0, thresh=2, inplace=True)
494
+
495
+ # Check if all alarm_codes are null or if dataframe is empty
496
+ if bounds_df.empty or bounds_df['alarm_codes'].isna().all():
497
+ return pd.DataFrame()
498
+
499
+ bounds_df = bounds_df[bounds_df['alarm_codes'].str.contains(alarm_tag, na=False)]
500
+
501
+ # Split alarm_codes by semicolons and create a row for each STS code
502
+ expanded_rows = []
503
+ for idx, row in bounds_df.iterrows():
504
+ alarm_codes = str(row['alarm_codes']).split(';')
505
+ tag_codes = [code.strip() for code in alarm_codes if code.strip().startswith(alarm_tag)]
506
+
507
+ if tag_codes: # Only process if there are STS codes
508
+ for tag_code in tag_codes:
509
+ new_row = row.copy()
510
+ if ":" in tag_code:
511
+ tag_parts = tag_code.split(':')
512
+ if len(tag_parts) > 2:
513
+ raise Exception(f"Improperly formated alarm code : {tag_code}")
514
+ new_row['bound'] = tag_parts[1]
515
+ tag_code = tag_parts[0]
516
+ else:
517
+ new_row['bound'] = None
518
+ new_row['alarm_codes'] = tag_code
519
+
520
+ expanded_rows.append(new_row)
521
+
522
+ if expanded_rows:
523
+ bounds_df = pd.DataFrame(expanded_rows)
524
+ else:
525
+ return pd.DataFrame()# no tagged alarms to look into
526
+
527
+ alarm_code_parts = []
528
+ for idx, row in bounds_df.iterrows():
529
+ parts = row['alarm_codes'].split('_')
530
+ if len(parts) == 2:
531
+ alarm_code_parts.append([parts[1], "No ID"])
532
+ elif len(parts) == 3:
533
+ alarm_code_parts.append([parts[1], parts[2]])
534
+ else:
535
+ raise Exception(f"improper STS alarm code format for {row['variable_name']}")
536
+ if alarm_code_parts:
537
+ bounds_df[['alarm_code_type', 'alarm_code_id']] = pd.DataFrame(alarm_code_parts, index=bounds_df.index)
538
+
539
+ # Replace None bounds with appropriate defaults based on alarm_code_type
540
+ for idx, row in bounds_df.iterrows():
541
+ if pd.isna(row['bound']) or row['bound'] is None:
542
+ if row['alarm_code_type'] in type_default_dict.keys():
543
+ bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']]
544
+ # Coerce bound column to float
545
+ bounds_df['bound'] = pd.to_numeric(bounds_df['bound'], errors='coerce').astype(float)
546
+ return bounds_df
547
+
548
+ def _add_an_alarm(alarm_dict : dict, day : datetime, var_name : str, alarm_string : str):
549
+ # Round down to beginning of day
550
+ day = pd.Timestamp(day).normalize()
551
+
552
+ if day in alarm_dict:
553
+ alarm_dict[day].append([var_name, alarm_string])
554
+ else:
555
+ alarm_dict[day] = [[var_name, alarm_string]]
556
+
191
557
  def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
192
558
  events = {
193
559
  'start_time_pt' : [],
@@ -1,3 +1,4 @@
1
- from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df
1
+ from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df,get_OAT_open_meteo
2
2
  __all__ = ["get_noaa_data", "json_to_df", "extract_files", "get_last_full_day_from_db", "get_db_row_from_time", 'extract_new', "csv_to_df", "get_sub_dirs", "msa_to_df", "fm_api_to_df",
3
- "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df"]
3
+ "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df",
4
+ "get_OAT_open_meteo"]
@@ -1,5 +1,6 @@
1
1
  from typing import List
2
2
  import pandas as pd
3
+ import openmeteo_requests
3
4
  import re
4
5
  from ftplib import FTP
5
6
  from datetime import datetime, timedelta
@@ -695,17 +696,29 @@ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, end
695
696
  try:
696
697
  df = pd.DataFrame()
697
698
  temp_dfs = []
699
+ ###############
700
+ if endTime is None:
701
+ endTime = datetime.utcnow()
702
+ if startTime is None:
703
+ startTime = endTime - timedelta(1)
698
704
  time_parser = startTime
699
705
  while time_parser < endTime:
700
- start_time_str = time_parser.strftime('%a, %d %b %H:%M:%S GMT')
701
- skycentrics_token, date_str = config.get_skycentrics_token(request_str=f'GET /api/devices/{config.api_device_id}/data HTTP/1.1',date_str=start_time_str)
702
- response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data',
703
- headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/json'})
706
+ time_parse_end = time_parser + timedelta(1)
707
+ start_time_str = time_parser.strftime('%Y-%m-%dT%H:%M:%S')
708
+ end_time_str = time_parse_end.strftime('%Y-%m-%dT%H:%M:%S')
709
+ skycentrics_token, date_str = config.get_skycentrics_token(
710
+ request_str=f'GET /api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1 HTTP/1.1',
711
+ date_str=None)
712
+ response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1',
713
+ headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/gzip'})
704
714
  if response.status_code == 200:
705
- norm_data = pd.json_normalize(response.json(), record_path=['sensors'], meta=['time'], meta_prefix='response_')
715
+ # Decompress the gzip response
716
+ decompressed_data = gzip.decompress(response.content)
717
+ # Parse JSON from decompressed data
718
+ json_data = json.loads(decompressed_data)
719
+ norm_data = pd.json_normalize(json_data, record_path=['sensors'], meta=['time'], meta_prefix='response_')
706
720
  if len(norm_data) != 0:
707
-
708
- norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"])
721
+ norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"], utc=True)
709
722
 
710
723
  norm_data["time_pt"] = norm_data["time_pt"].dt.tz_convert(time_zone)
711
724
  norm_data = pd.pivot_table(norm_data, index="time_pt", columns="id", values="data")
@@ -716,8 +729,8 @@ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, end
716
729
  temp_dfs.append(norm_data)
717
730
  else:
718
731
  print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
719
- time.sleep(60)
720
- time_parser = time_parser + timedelta(minutes=1)
732
+ time_parser = time_parse_end
733
+ ##############
721
734
  if len(temp_dfs) > 0:
722
735
  df = pd.concat(temp_dfs, ignore_index=False)
723
736
  if create_csv:
@@ -733,7 +746,7 @@ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, end
733
746
  except Exception as e:
734
747
  print(f"An error occurred: {e}")
735
748
  raise e
736
- return pd.DataFrame()
749
+ # return pd.DataFrame()
737
750
 
738
751
  def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
739
752
  """
@@ -1017,6 +1030,62 @@ def get_sub_dirs(dir: str) -> List[str]:
1017
1030
  return
1018
1031
  return directories
1019
1032
 
1033
+ def get_OAT_open_meteo(lat: float, long: float, start_date: datetime, end_date: datetime = None, time_zone: str = "America/Los_Angeles",
1034
+ use_noaa_names : bool = True) -> pd.DataFrame:
1035
+ if end_date is None:
1036
+ end_date = datetime.today() - timedelta(1)
1037
+ # datetime.today().date().strftime('%Y%m%d%H%M%S')
1038
+ start_date_str = start_date.date().strftime('%Y-%m-%d')
1039
+ end_date_str = end_date.date().strftime('%Y-%m-%d')
1040
+ print(f"Getting Open Meteao data for {start_date_str} to {end_date_str}")
1041
+ try:
1042
+ openmeteo = openmeteo_requests.Client()
1043
+
1044
+ url = "https://archive-api.open-meteo.com/v1/archive"
1045
+ params = {
1046
+ "latitude": lat,
1047
+ "longitude": long,
1048
+ "start_date": start_date_str,
1049
+ "end_date": end_date_str,
1050
+ "hourly": "temperature_2m",
1051
+ "temperature_unit": "fahrenheit",
1052
+ "timezone": time_zone,
1053
+ }
1054
+ responses = openmeteo.weather_api(url, params=params)
1055
+
1056
+ # Process first location. Add a for-loop for multiple locations or weather models
1057
+ response = responses[0]
1058
+
1059
+ # Process hourly data. The order of variables needs to be the same as requested.
1060
+ hourly = response.Hourly()
1061
+ hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
1062
+
1063
+ hourly_data = {"time_pt": pd.date_range(
1064
+ start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
1065
+ end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
1066
+ freq = pd.Timedelta(seconds = hourly.Interval()),
1067
+ inclusive = "left"
1068
+ )}
1069
+
1070
+ hourly_data["temperature_2m"] = hourly_temperature_2m
1071
+ hourly_data["time_pt"] = hourly_data["time_pt"].tz_convert(time_zone).tz_localize(None)
1072
+
1073
+ hourly_data = pd.DataFrame(hourly_data)
1074
+ hourly_data.set_index('time_pt', inplace = True)
1075
+
1076
+ if use_noaa_names:
1077
+ hourly_data = hourly_data.rename(columns = {'temperature_2m':'airTemp_F'})
1078
+ hourly_data['dewPoint_F'] = None
1079
+
1080
+ # Convert float32 to float64 for SQL database compatibility
1081
+ for col in hourly_data.select_dtypes(include=['float32']).columns:
1082
+ hourly_data[col] = hourly_data[col].astype('float64')
1083
+
1084
+ return hourly_data
1085
+ except Exception as e:
1086
+ print(f'Could not get OAT data: {e}')
1087
+ return pd.DataFrame()
1088
+
1020
1089
 
1021
1090
  def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids : dict = {}) -> dict:
1022
1091
  """
@@ -1034,6 +1103,7 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
1034
1103
  dict:
1035
1104
  Dictionary with key as Station Name and Value as DF of Parsed Weather Data
1036
1105
  """
1106
+ #TODO swap out for this if empty: https://open-meteo.com/en/docs/historical-weather-api?start_date=2025-12-29&latitude=47.6&longitude=-122.33&temperature_unit=fahrenheit&end_date=2026-01-04
1037
1107
  formatted_dfs = {}
1038
1108
  weather_directory = config.get_weather_dir_path()
1039
1109
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 1.0.2
3
+ Version: 1.0.3
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,8 +1,8 @@
1
1
  ecopipeline/__init__.py,sha256=pjC00JWsjVAhS0jUKHD-wyi4UIpTsWbIg9JaxLS1mlc,275
2
- ecopipeline/event_tracking/__init__.py,sha256=SV2kkvJgptjeyLQlqHWcDRpQO6-JC433_dRZ3H9-ZNU,131
3
- ecopipeline/event_tracking/event_tracking.py,sha256=NxrGJylD5m5t_OI4VwItbUUbtLW19LrDLy3sjUeQL80,29981
4
- ecopipeline/extract/__init__.py,sha256=EHJ5lHWuLOwEOOzk5GmgAeZKCLQHDDaiWzLLs-nH7v8,723
5
- ecopipeline/extract/extract.py,sha256=xK1fg7i0mCN04qCx3AM5VDRpM6ZQ7_2ai0BbQF2WYPk,55306
2
+ ecopipeline/event_tracking/__init__.py,sha256=1saCNVWbcp7bwz1kXfKa2d8aUoGWNWvWSj7IJY4fzc8,219
3
+ ecopipeline/event_tracking/event_tracking.py,sha256=LASkal4MgGLN7UzrAjbKw3eaM9JwKwt3YpkIraRSgiE,51172
4
+ ecopipeline/extract/__init__.py,sha256=j_8-q_yrPRySwYyloMv5v2XQeYYyYfX0N-MW2ZDA4rg,775
5
+ ecopipeline/extract/extract.py,sha256=MykzAchL_0LY0NG9TOAadpm5MSgjn7lPRI8AvSIMUBk,58530
6
6
  ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
7
7
  ecopipeline/load/load.py,sha256=PaSGWOZI0Xg44_SWN7htn2DPIAU_s8mOtCGibXq25tM,24614
8
8
  ecopipeline/transform/__init__.py,sha256=FjGcNpYNEYPdYQhogNRrQlKH2hGNNLv55jig1KaGaHY,2686
@@ -18,8 +18,8 @@ ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl,sha256=9UpCZ3rSu0mU4LoT
18
18
  ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl,sha256=Uq6I2dl5GcR5wb5QxurgDP4A2D4-N3neDL3BKtx53A4,2047
19
19
  ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl,sha256=CVbUWJvOQXg2nZ-0GP9FWtU-ffMGcwg3ok5q669bmf8,1472
20
20
  ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl,sha256=JiEp4SxR9eq-olKd8TKAG37iHMscJE_2SSHizGqBdno,1472
21
- ecopipeline-1.0.2.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- ecopipeline-1.0.2.dist-info/METADATA,sha256=g9VTZ2Gi045tlTwMThfSRt7v56XIubzch7Aasbj32gk,2329
23
- ecopipeline-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
- ecopipeline-1.0.2.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
25
- ecopipeline-1.0.2.dist-info/RECORD,,
21
+ ecopipeline-1.0.3.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ ecopipeline-1.0.3.dist-info/METADATA,sha256=zHiCg-0X-XgTFWJYCH_NkIIp-E3izV8-Y5ae0c480_0,2329
23
+ ecopipeline-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
24
+ ecopipeline-1.0.3.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
25
+ ecopipeline-1.0.3.dist-info/RECORD,,