ecopipeline 0.11.4__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. ecopipeline-1.0.3/MANIFEST.in +2 -0
  2. {ecopipeline-0.11.4/src/ecopipeline.egg-info → ecopipeline-1.0.3}/PKG-INFO +1 -1
  3. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/setup.cfg +1 -1
  4. ecopipeline-1.0.3/src/ecopipeline/event_tracking/__init__.py +3 -0
  5. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/event_tracking/event_tracking.py +397 -25
  6. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/extract/__init__.py +3 -2
  7. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/extract/extract.py +161 -20
  8. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/transform/__init__.py +2 -2
  9. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/transform/transform.py +86 -9
  10. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/utils/ConfigManager.py +20 -1
  11. ecopipeline-1.0.3/src/ecopipeline/utils/pkls/__init__.py +0 -0
  12. ecopipeline-1.0.3/src/ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl +0 -0
  13. ecopipeline-1.0.3/src/ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl +0 -0
  14. ecopipeline-1.0.3/src/ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl +0 -0
  15. ecopipeline-1.0.3/src/ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl +0 -0
  16. {ecopipeline-0.11.4 → ecopipeline-1.0.3/src/ecopipeline.egg-info}/PKG-INFO +1 -1
  17. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline.egg-info/SOURCES.txt +7 -1
  18. ecopipeline-0.11.4/src/ecopipeline/event_tracking/__init__.py +0 -2
  19. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/LICENSE +0 -0
  20. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/README.md +0 -0
  21. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/pyproject.toml +0 -0
  22. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/setup.py +0 -0
  23. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/__init__.py +0 -0
  24. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/load/__init__.py +0 -0
  25. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/load/load.py +0 -0
  26. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/transform/bayview.py +0 -0
  27. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/transform/lbnl.py +0 -0
  28. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/utils/NOAADataDownloader.py +0 -0
  29. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/utils/__init__.py +0 -0
  30. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/utils/unit_convert.py +0 -0
  31. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  32. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline.egg-info/requires.txt +0 -0
  33. {ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -0,0 +1,2 @@
1
+ include README.md
2
+ recursive-include src/ecopipeline/utils/pkls *.pkl
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.11.4
3
+ Version: 1.0.3
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 0.11.4
3
+ version = 1.0.3
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -0,0 +1,3 @@
1
+ from .event_tracking import *
2
+ __all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP','flag_high_swing_setpoint',
3
+ 'flag_recirc_balance_valve','flag_hp_inlet_temp']
@@ -1,6 +1,6 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
- import datetime as dt
3
+ import datetime as datetime
4
4
  from ecopipeline import ConfigManager
5
5
  import re
6
6
  import mysql.connector.errors as mysqlerrors
@@ -13,29 +13,24 @@ def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config
13
13
  day_list = daily_data.index.to_list()
14
14
  print('Checking for alarms...')
15
15
  alarm_df = _convert_silent_alarm_dict_to_df({})
16
- boundary_alarm_df = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
17
- pwr_alarm_df = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
18
- abnormal_COP_df = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
19
-
20
- if len(boundary_alarm_df) > 0:
21
- print("Boundary alarms detected. Adding them to event df...")
22
- alarm_df = boundary_alarm_df
23
- else:
24
- print("No boundary alarms detected.")
25
-
26
- if len(pwr_alarm_df) > 0:
27
- print("Power alarms detected. Adding them to event df...")
28
- alarm_df = pd.concat([alarm_df, pwr_alarm_df])
29
- else:
30
- print("No power alarms detected.")
31
-
32
- if _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
33
- print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
34
- elif len(abnormal_COP_df) > 0:
35
- print("Abnormal COPs detected. Adding them to event df...")
36
- alarm_df = pd.concat([alarm_df, abnormal_COP_df])
37
- else:
38
- print("No abnormal COPs.")
16
+ dict_of_alarms = {}
17
+ dict_of_alarms['boundary'] = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
18
+ dict_of_alarms['power ratio'] = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
19
+ dict_of_alarms['abnormal COP'] = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
20
+ dict_of_alarms['swing tank setpoint'] = flag_high_swing_setpoint(df, daily_data, config, system=system)
21
+ dict_of_alarms['recirculation loop balancing valve'] = flag_recirc_balance_valve(daily_data, config, system=system)
22
+ dict_of_alarms['HPWH inlet temperature'] = flag_hp_inlet_temp(df, daily_data, config, system)
23
+
24
+ ongoing_COP_exception = ['abnormal COP']
25
+
26
+ for key, value in dict_of_alarms.items():
27
+ if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
28
+ print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
29
+ elif len(value) > 0:
30
+ print(f"Detected {key} alarm(s). Adding to event df...")
31
+ alarm_df = pd.concat([alarm_df, value])
32
+ else:
33
+ print(f"No {key} alarm(s) detected.")
39
34
 
40
35
  return alarm_df
41
36
 
@@ -78,7 +73,7 @@ def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system:
78
73
  for bound_var, bounds in bounds_df.iterrows():
79
74
  if bound_var in cop_columns:
80
75
  for day, day_values in daily_data.iterrows():
81
- if day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']:
76
+ if not day_values[bound_var] is None and (day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']):
82
77
  alarm_str = f"Unexpected COP Value detected: {bounds['pretty_name']} = {round(day_values[bound_var],2)}"
83
78
  if day in alarms_dict:
84
79
  alarms_dict[day].append([bound_var, alarm_str])
@@ -135,6 +130,9 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
135
130
  pd.DataFrame:
136
131
  Pandas dataframe with alarm events
137
132
  """
133
+ if df.empty:
134
+ print("cannot flag boundary alarms. Dataframe is empty")
135
+ return pd.DataFrame()
138
136
  variable_names_path = config.get_var_names_path()
139
137
  try:
140
138
  bounds_df = pd.read_csv(variable_names_path)
@@ -185,6 +183,377 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
185
183
 
186
184
  return _convert_silent_alarm_dict_to_df(alarms)
187
185
 
186
+ def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
187
+ system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
188
+ default_power_ratio : float = 0.4) -> pd.DataFrame:
189
+ """
190
+ Function will take a pandas dataframe and location of alarm information in a csv,
191
+ and create an dataframe with applicable alarm events
192
+
193
+ VarNames syntax:
194
+ STS_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
195
+ STS_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
196
+ STS_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
197
+ STS_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
198
+
199
+ Parameters
200
+ ----------
201
+ df: pd.DataFrame
202
+ post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
203
+ are out of order or have gaps, the function may return erroneous alarms.
204
+ daily_df: pd.DataFrame
205
+ post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
206
+ config : ecopipeline.ConfigManager
207
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
208
+ called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
209
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
210
+ name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
211
+ default_fault_time : int
212
+ Number of consecutive minutes for T+SP alarms (default 3). T+SP alarms trigger when tank is powered and temperature exceeds
213
+ setpoint for this many consecutive minutes.
214
+ system: str
215
+ string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
216
+ default_setpoint : float
217
+ Default temperature setpoint in degrees for T and ST alarm codes when no custom bound is specified (default 130.0)
218
+ default_power_indication : float
219
+ Default power threshold in kW for SP alarm codes when no custom bound is specified (default 1.0)
220
+ default_power_ratio : float
221
+ Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4)
222
+
223
+ Returns
224
+ -------
225
+ pd.DataFrame:
226
+ Pandas dataframe with alarm events
227
+ """
228
+ if df.empty:
229
+ print("cannot flag swing tank setpoint alarms. Dataframe is empty")
230
+ return pd.DataFrame()
231
+ variable_names_path = config.get_var_names_path()
232
+ try:
233
+ bounds_df = pd.read_csv(variable_names_path)
234
+ except FileNotFoundError:
235
+ print("File Not Found: ", variable_names_path)
236
+ return pd.DataFrame()
237
+
238
+ bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'STS',
239
+ {'T' : default_setpoint,
240
+ 'SP': default_power_indication,
241
+ 'TP': default_power_ratio,
242
+ 'ST': default_setpoint},
243
+ system)
244
+ if bounds_df.empty:
245
+ return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
246
+
247
+ # Process each unique alarm_code_id
248
+ alarms = {}
249
+ for day in daily_df.index:
250
+ next_day = day + pd.Timedelta(days=1)
251
+ filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
252
+ alarmed_for_day = False
253
+ for alarm_id in bounds_df['alarm_code_id'].unique():
254
+ id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
255
+
256
+ # Get T and SP alarm codes for this ID
257
+ t_codes = id_group[id_group['alarm_code_type'] == 'T']
258
+ sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
259
+ tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
260
+ st_codes = id_group[id_group['alarm_code_type'] == 'ST']
261
+
262
+ # Check for multiple T or SP codes with same ID
263
+ if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
264
+ raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
265
+
266
+ # Check if we have both T and SP
267
+ if len(t_codes) == 1 and len(sp_codes) == 1:
268
+ t_var_name = t_codes.iloc[0]['variable_name']
269
+ sp_var_name = sp_codes.iloc[0]['variable_name']
270
+ sp_power_indication = sp_codes.iloc[0]['bound']
271
+ t_setpoint = t_codes.iloc[0]['bound']
272
+ # Check if both variables exist in df
273
+ if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
274
+ # Check for consecutive minutes where SP > default_power_indication
275
+ # AND T >= default_setpoint
276
+ power_mask = filtered_df[sp_var_name] >= sp_power_indication
277
+ temp_mask = filtered_df[t_var_name] >= t_setpoint
278
+ combined_mask = power_mask & temp_mask
279
+
280
+ # Check for 3 consecutive minutes
281
+ consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
282
+ if consecutive_condition.any():
283
+ # Get the first index where condition was met
284
+ first_true_index = consecutive_condition.idxmax()
285
+ # Adjust for the rolling window (first fault_time-1 minutes don't count)
286
+ adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
287
+ _add_an_alarm(alarms, adjusted_time, sp_var_name, f"High swing tank setpoint: Swing tank was powered at {adjusted_time} although temperature was above {t_setpoint}.")
288
+ alarmed_for_day = True
289
+ if not alarmed_for_day and len(st_codes) == 1:
290
+ st_var_name = st_codes.iloc[0]['variable_name']
291
+ st_setpoint = st_codes.iloc[0]['bound']
292
+ # Check if st_var_name exists in filtered_df
293
+ if st_var_name in filtered_df.columns:
294
+ # Check if setpoint was altered for over 10 minutes
295
+ altered_mask = filtered_df[st_var_name] != st_setpoint
296
+ consecutive_condition = altered_mask.rolling(window=10).min() == 1
297
+ if consecutive_condition.any():
298
+ # Get the first index where condition was met
299
+ first_true_index = consecutive_condition.idxmax()
300
+ # Adjust for the rolling window
301
+ adjusted_time = first_true_index - pd.Timedelta(minutes=9)
302
+ _add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
303
+ alarmed_for_day = True
304
+ if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
305
+ tp_var_name = tp_codes.iloc[0]['variable_name']
306
+ sp_var_name = sp_codes.iloc[0]['variable_name']
307
+ tp_ratio = tp_codes.iloc[0]['bound']
308
+ # Check if both variables exist in df
309
+ if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
310
+ # Check if swing tank power ratio exceeds threshold
311
+ if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
312
+ power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
313
+ if power_ratio > tp_ratio:
314
+ _add_an_alarm(alarms, day, sp_var_name, f"High swing tank power ratio: Swing tank accounted for more than {tp_ratio * 100}% of daily power.")
315
+ return _convert_silent_alarm_dict_to_df(alarms)
316
+
317
+ def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
318
+ """
319
+ Function will take a pandas dataframe and location of alarm information in a csv,
320
+ and create an dataframe with applicable alarm events
321
+
322
+ VarNames syntax:
323
+ BV_ER_[OPTIONAL ID] : Indicates a power variable for an ER heater (equipment recirculation)
324
+ BV_OUT_[OPTIONAL ID]:### - Indicates the heating output variable the ER heating contributes to. Optional ### for the percentage
325
+ threshold that should not be crossed by the ER elements (default 0.4 for 40%)
326
+
327
+ Parameters
328
+ ----------
329
+ daily_df: pd.DataFrame
330
+ post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
331
+ power to heating output power.
332
+ config : ecopipeline.ConfigManager
333
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
334
+ called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
335
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
336
+ name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_OUT_1:0.5)
337
+ system: str
338
+ string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
339
+ default_power_ratio : float
340
+ Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for OUT alarm codes when no custom bound is specified (default 0.4).
341
+ Alarm triggers when sum of ER equipment >= (OUT value / default_power_ratio)
342
+
343
+ Returns
344
+ -------
345
+ pd.DataFrame:
346
+ Pandas dataframe with alarm events
347
+ """
348
+ if daily_df.empty:
349
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
350
+ return pd.DataFrame()
351
+ variable_names_path = config.get_var_names_path()
352
+ try:
353
+ bounds_df = pd.read_csv(variable_names_path)
354
+ except FileNotFoundError:
355
+ print("File Not Found: ", variable_names_path)
356
+ return pd.DataFrame()
357
+ bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BV',
358
+ {'OUT' : default_power_ratio},
359
+ system)
360
+ if bounds_df.empty:
361
+ return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
362
+ # Process each unique alarm_code_id
363
+ alarms = {}
364
+ for alarm_id in bounds_df['alarm_code_id'].unique():
365
+ id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
366
+ out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
367
+ out_var_name = out_codes.iloc[0]['variable_name']
368
+ out_bound = out_codes.iloc[0]['bound']
369
+ er_codes = id_group[id_group['alarm_code_type'] == 'ER']
370
+ if len(out_codes) > 1 or len(er_codes) < 1:
371
+ raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
372
+ for day in daily_df.index:
373
+ if out_var_name in daily_df.columns:
374
+ # Get list of ER variable names
375
+ er_var_names = er_codes['variable_name'].tolist()
376
+
377
+ # Check if all ER variables exist in daily_df
378
+ if all(var in daily_df.columns for var in er_var_names):
379
+ # Sum all ER variables for this day
380
+ er_sum = daily_df.loc[day, er_var_names].sum()
381
+ out_value = daily_df.loc[day, out_var_name]
382
+
383
+ # Check if sum of ER >= OUT value
384
+ if er_sum >= out_value*out_bound:
385
+ _add_an_alarm(alarms, day, out_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(out_bound * 100):.2f}% of heating output.")
386
+ return _convert_silent_alarm_dict_to_df(alarms)
387
+
388
+ def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
389
+ default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
390
+ """
391
+ Function will take a pandas dataframe and location of alarm information in a csv,
392
+ and create an dataframe with applicable alarm events
393
+
394
+ VarNames syntax:
395
+ HPI_POW_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
396
+ the heat pump is considered 'on'
397
+ HPI_T_[OPTIONAL ID]:### - Indicates heat pump inlet temperature variable. ### is the temperature threshold (default 120.0)
398
+ that should not be exceeded while the heat pump is on
399
+
400
+ Parameters
401
+ ----------
402
+ df: pd.DataFrame
403
+ post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
404
+ are out of order or have gaps, the function may return erroneous alarms.
405
+ daily_df: pd.DataFrame
406
+ post-transformed dataframe for daily data.
407
+ config : ecopipeline.ConfigManager
408
+ The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
409
+ called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
410
+ The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
411
+ name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
412
+ system: str
413
+ string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
414
+ default_power_threshold : float
415
+ Default power threshold for POW alarm codes when no custom bound is specified (default 0.4). Heat pump is considered 'on'
416
+ when power exceeds this value.
417
+ default_temp_threshold : float
418
+ Default temperature threshold for T alarm codes when no custom bound is specified (default 120.0). Alarm triggers when
419
+ temperature exceeds this value while heat pump is on.
420
+ fault_time : int
421
+ Number of consecutive minutes that both power and temperature must exceed their thresholds before triggering an alarm (default 10).
422
+
423
+ Returns
424
+ -------
425
+ pd.DataFrame:
426
+ Pandas dataframe with alarm events
427
+ """
428
+ if df.empty:
429
+ print("cannot flag missing balancing valve alarms. Dataframe is empty")
430
+ return pd.DataFrame()
431
+ variable_names_path = config.get_var_names_path()
432
+ try:
433
+ bounds_df = pd.read_csv(variable_names_path)
434
+ except FileNotFoundError:
435
+ print("File Not Found: ", variable_names_path)
436
+ return pd.DataFrame()
437
+
438
+ bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPI',
439
+ {'POW' : default_power_threshold,
440
+ 'T' : default_temp_threshold},
441
+ system)
442
+ if bounds_df.empty:
443
+ return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
444
+
445
+ # Process each unique alarm_code_id
446
+ alarms = {}
447
+ for alarm_id in bounds_df['alarm_code_id'].unique():
448
+ for day in daily_df.index:
449
+ next_day = day + pd.Timedelta(days=1)
450
+ filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
451
+ id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
452
+ pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
453
+ pow_var_name = pow_codes.iloc[0]['variable_name']
454
+ pow_thresh = pow_codes.iloc[0]['bound']
455
+ t_codes = id_group[id_group['alarm_code_type'] == 'T']
456
+ t_var_name = t_codes.iloc[0]['variable_name']
457
+ t_pretty_name = t_codes.iloc[0]['pretty_name']
458
+ t_thresh = t_codes.iloc[0]['bound']
459
+ if len(t_codes) != 1 or len(pow_codes) != 1:
460
+ raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
461
+ if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
462
+ # Check for consecutive minutes where both power and temp exceed thresholds
463
+ power_mask = filtered_df[pow_var_name] > pow_thresh
464
+ temp_mask = filtered_df[t_var_name] > t_thresh
465
+ combined_mask = power_mask & temp_mask
466
+
467
+ # Check for fault_time consecutive minutes
468
+ consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
469
+ if consecutive_condition.any():
470
+ first_true_index = consecutive_condition.idxmax()
471
+ adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
472
+ _add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
473
+
474
+ return _convert_silent_alarm_dict_to_df(alarms)
475
+
476
+ def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "") -> pd.DataFrame:
477
+ # Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
478
+ if (system != ""):
479
+ if not 'system' in bounds_df.columns:
480
+ raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
481
+ bounds_df = bounds_df.loc[bounds_df['system'] == system]
482
+
483
+ required_columns = ["variable_name", "alarm_codes"]
484
+ for required_column in required_columns:
485
+ if not required_column in bounds_df.columns:
486
+ raise Exception(f"{required_column} is not present in Variable_Names.csv")
487
+ if not 'pretty_name' in bounds_df.columns:
488
+ bounds_df['pretty_name'] = bounds_df['variable_name']
489
+ else:
490
+ bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
491
+
492
+ bounds_df = bounds_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
493
+ bounds_df.dropna(axis=0, thresh=2, inplace=True)
494
+
495
+ # Check if all alarm_codes are null or if dataframe is empty
496
+ if bounds_df.empty or bounds_df['alarm_codes'].isna().all():
497
+ return pd.DataFrame()
498
+
499
+ bounds_df = bounds_df[bounds_df['alarm_codes'].str.contains(alarm_tag, na=False)]
500
+
501
+ # Split alarm_codes by semicolons and create a row for each STS code
502
+ expanded_rows = []
503
+ for idx, row in bounds_df.iterrows():
504
+ alarm_codes = str(row['alarm_codes']).split(';')
505
+ tag_codes = [code.strip() for code in alarm_codes if code.strip().startswith(alarm_tag)]
506
+
507
+ if tag_codes: # Only process if there are STS codes
508
+ for tag_code in tag_codes:
509
+ new_row = row.copy()
510
+ if ":" in tag_code:
511
+ tag_parts = tag_code.split(':')
512
+ if len(tag_parts) > 2:
513
+ raise Exception(f"Improperly formated alarm code : {tag_code}")
514
+ new_row['bound'] = tag_parts[1]
515
+ tag_code = tag_parts[0]
516
+ else:
517
+ new_row['bound'] = None
518
+ new_row['alarm_codes'] = tag_code
519
+
520
+ expanded_rows.append(new_row)
521
+
522
+ if expanded_rows:
523
+ bounds_df = pd.DataFrame(expanded_rows)
524
+ else:
525
+ return pd.DataFrame()# no tagged alarms to look into
526
+
527
+ alarm_code_parts = []
528
+ for idx, row in bounds_df.iterrows():
529
+ parts = row['alarm_codes'].split('_')
530
+ if len(parts) == 2:
531
+ alarm_code_parts.append([parts[1], "No ID"])
532
+ elif len(parts) == 3:
533
+ alarm_code_parts.append([parts[1], parts[2]])
534
+ else:
535
+ raise Exception(f"improper STS alarm code format for {row['variable_name']}")
536
+ if alarm_code_parts:
537
+ bounds_df[['alarm_code_type', 'alarm_code_id']] = pd.DataFrame(alarm_code_parts, index=bounds_df.index)
538
+
539
+ # Replace None bounds with appropriate defaults based on alarm_code_type
540
+ for idx, row in bounds_df.iterrows():
541
+ if pd.isna(row['bound']) or row['bound'] is None:
542
+ if row['alarm_code_type'] in type_default_dict.keys():
543
+ bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']]
544
+ # Coerce bound column to float
545
+ bounds_df['bound'] = pd.to_numeric(bounds_df['bound'], errors='coerce').astype(float)
546
+ return bounds_df
547
+
548
+ def _add_an_alarm(alarm_dict : dict, day : datetime, var_name : str, alarm_string : str):
549
+ # Round down to beginning of day
550
+ day = pd.Timestamp(day).normalize()
551
+
552
+ if day in alarm_dict:
553
+ alarm_dict[day].append([var_name, alarm_string])
554
+ else:
555
+ alarm_dict[day] = [[var_name, alarm_string]]
556
+
188
557
  def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
189
558
  events = {
190
559
  'start_time_pt' : [],
@@ -293,6 +662,9 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
293
662
  for required_column in required_columns:
294
663
  if not required_column in ratios_df.columns:
295
664
  raise Exception(f"{required_column} is not present in Variable_Names.csv")
665
+ if ratios_df['alarm_codes'].isna().all() or ratios_df['alarm_codes'].isnull().all():
666
+ print("No alarm codes in ", variable_names_path)
667
+ return pd.DataFrame()
296
668
  if not 'pretty_name' in ratios_df.columns:
297
669
  ratios_df['pretty_name'] = ratios_df['variable_name']
298
670
  else:
@@ -1,3 +1,4 @@
1
- from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df
1
+ from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df,get_OAT_open_meteo
2
2
  __all__ = ["get_noaa_data", "json_to_df", "extract_files", "get_last_full_day_from_db", "get_db_row_from_time", 'extract_new', "csv_to_df", "get_sub_dirs", "msa_to_df", "fm_api_to_df",
3
- "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df"]
3
+ "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df",
4
+ "get_OAT_open_meteo"]
@@ -1,5 +1,6 @@
1
1
  from typing import List
2
2
  import pandas as pd
3
+ import openmeteo_requests
3
4
  import re
4
5
  from ftplib import FTP
5
6
  from datetime import datetime, timedelta
@@ -15,6 +16,7 @@ import mysql.connector.errors as mysqlerrors
15
16
  import requests
16
17
  import subprocess
17
18
  import traceback
19
+ import time
18
20
 
19
21
 
20
22
  def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
@@ -661,6 +663,91 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
661
663
 
662
664
  return df_diff
663
665
 
666
+ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, time_zone: str = 'US/Pacific'):
667
+ """
668
+ Function connects to the field manager api to pull data and returns a dataframe.
669
+
670
+ Parameters
671
+ ----------
672
+ config : ecopipeline.ConfigManager
673
+ The ConfigManager object that holds configuration data for the pipeline. The config manager
674
+ must contain information to connect to the api, i.e. the api user name and password as well as
675
+ the device id for the device the data is being pulled from.
676
+ startTime: datetime
677
+ The point in time for which we want to start the data extraction from. This
678
+ is local time from the data's index.
679
+ endTime: datetime
680
+ The point in time for which we want to end the data extraction. This
681
+ is local time from the data's index.
682
+ create_csv : bool
683
+ create csv files as you process such that API need not be relied upon for reprocessing
684
+ time_zone: str
685
+ The timezone for the indexes in the output dataframe as a string. Must be a string recognized as a
686
+ time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
687
+ defaults to 'US/Pacific'
688
+
689
+ Returns
690
+ -------
691
+ pd.DataFrame:
692
+ Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
693
+ """
694
+ #temporary solution while no date range available
695
+
696
+ try:
697
+ df = pd.DataFrame()
698
+ temp_dfs = []
699
+ ###############
700
+ if endTime is None:
701
+ endTime = datetime.utcnow()
702
+ if startTime is None:
703
+ startTime = endTime - timedelta(1)
704
+ time_parser = startTime
705
+ while time_parser < endTime:
706
+ time_parse_end = time_parser + timedelta(1)
707
+ start_time_str = time_parser.strftime('%Y-%m-%dT%H:%M:%S')
708
+ end_time_str = time_parse_end.strftime('%Y-%m-%dT%H:%M:%S')
709
+ skycentrics_token, date_str = config.get_skycentrics_token(
710
+ request_str=f'GET /api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1 HTTP/1.1',
711
+ date_str=None)
712
+ response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1',
713
+ headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/gzip'})
714
+ if response.status_code == 200:
715
+ # Decompress the gzip response
716
+ decompressed_data = gzip.decompress(response.content)
717
+ # Parse JSON from decompressed data
718
+ json_data = json.loads(decompressed_data)
719
+ norm_data = pd.json_normalize(json_data, record_path=['sensors'], meta=['time'], meta_prefix='response_')
720
+ if len(norm_data) != 0:
721
+ norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"], utc=True)
722
+
723
+ norm_data["time_pt"] = norm_data["time_pt"].dt.tz_convert(time_zone)
724
+ norm_data = pd.pivot_table(norm_data, index="time_pt", columns="id", values="data")
725
+ # Iterate over the index and round up if necessary (work around for json format from sensors)
726
+ for i in range(len(norm_data.index)):
727
+ if norm_data.index[i].minute == 59 and norm_data.index[i].second == 59:
728
+ norm_data.index.values[i] = norm_data.index[i] + pd.Timedelta(seconds=1)
729
+ temp_dfs.append(norm_data)
730
+ else:
731
+ print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
732
+ time_parser = time_parse_end
733
+ ##############
734
+ if len(temp_dfs) > 0:
735
+ df = pd.concat(temp_dfs, ignore_index=False)
736
+ if create_csv:
737
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
738
+ original_directory = os.getcwd()
739
+ os.chdir(config.data_directory)
740
+ df.to_csv(filename, index_label='time_pt')
741
+ os.chdir(original_directory)
742
+ else:
743
+ print("No skycentrics data retieved for time frame.")
744
+ return df
745
+
746
+ except Exception as e:
747
+ print(f"An error occurred: {e}")
748
+ raise e
749
+ # return pd.DataFrame()
750
+
664
751
  def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
665
752
  """
666
753
  Function connects to the field manager api to pull data and returns a dataframe.
@@ -776,7 +863,7 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
776
863
  os.chdir(original_directory)
777
864
 
778
865
  def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : float = 1,
779
- sensor_keys : list = [], seperate_keys : bool = False):
866
+ sensor_keys : list = [], seperate_keys : bool = False, device_id_overwrite : str = None, csv_prefix : str = ""):
780
867
  """
781
868
  Function connects to the things board manager api to pull data and returns a dataframe.
782
869
 
@@ -796,6 +883,11 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
796
883
  create csv files as you process such that API need not be relied upon for reprocessing
797
884
  query_hours : float
798
885
  number of hours to query at a time from ThingsBoard API
886
+
887
+ device_id_overwrite : str
888
+ Overwrites device ID for API pull
889
+ csv_prefix : str
890
+ prefix to add to the csv title
799
891
 
800
892
  Returns
801
893
  -------
@@ -804,16 +896,17 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
804
896
  Will return with index in UTC so needs to be converted after to appropriate timezone
805
897
  """
806
898
  df = pd.DataFrame()
899
+ api_device_id = device_id_overwrite if not device_id_overwrite is None else config.api_device_id
807
900
  if len(sensor_keys) <= 0:
808
901
  token = config.get_thingsboard_token()
809
- key_list = _get_tb_keys(config, token)
902
+ key_list = _get_tb_keys(token, api_device_id)
810
903
  if len(key_list) <= 0:
811
- raise Exception(f"No sensors available at ThingsBoard site with id {config.api_device_id}")
812
- return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys)
904
+ raise Exception(f"No sensors available at ThingsBoard site with id {api_device_id}")
905
+ return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys, device_id_overwrite, csv_prefix)
813
906
  if seperate_keys:
814
907
  df_list = []
815
908
  for sensor_key in sensor_keys:
816
- df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False))
909
+ df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False, device_id_overwrite, csv_prefix))
817
910
  df = pd.concat(df_list)
818
911
  else:
819
912
  # not seperate_keys:
@@ -826,13 +919,13 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
826
919
  if endTime - timedelta(hours=query_hours) > startTime:
827
920
  time_diff = endTime - startTime
828
921
  midpointTime = startTime + time_diff / 2
829
- df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
830
- df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
922
+ df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False, device_id_overwrite = device_id_overwrite)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
923
+ df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False, device_id_overwrite = device_id_overwrite)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
831
924
  df = pd.concat([df_1, df_2])
832
925
  df = df.sort_index()
833
926
  df = df.groupby(df.index).mean()
834
927
  else:
835
- url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
928
+ url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/values/timeseries'
836
929
  token = config.get_thingsboard_token()
837
930
  key_string = ','.join(sensor_keys)
838
931
  params = {
@@ -844,7 +937,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
844
937
  'interval' : '0',
845
938
  'agg' : 'NONE'
846
939
  }
847
-
848
940
  # Headers
849
941
  headers = {
850
942
  'accept': 'application/json',
@@ -855,14 +947,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
855
947
  response = requests.get(url, headers=headers, params=params)
856
948
  if response.status_code == 200:
857
949
  response_json = response.json()
858
- # if create_csv:
859
- # json_filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.json"
860
- # print(f"filename: {json_filename}, url: {url}, params: {params}")
861
- # original_directory = os.getcwd()
862
- # os.chdir(config.data_directory)
863
- # with open(json_filename, 'w') as f:
864
- # json.dump(response_json, f, indent=4) # indent=4 makes it human-readable
865
- # os.chdir(original_directory)
866
950
 
867
951
  data = {}
868
952
  for key, records in response_json.items():
@@ -886,7 +970,7 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
886
970
  df = pd.DataFrame()
887
971
  # save to file
888
972
  if create_csv:
889
- filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
973
+ filename = f"{csv_prefix}{startTime.strftime('%Y%m%d%H%M%S')}.csv"
890
974
  original_directory = os.getcwd()
891
975
  os.chdir(config.data_directory)
892
976
  df.to_csv(filename, index_label='time_pt')
@@ -900,8 +984,8 @@ def _get_float_value(value):
900
984
  except (ValueError, TypeError):
901
985
  return None
902
986
 
903
- def _get_tb_keys(config: ConfigManager, token : str) -> List[str]:
904
- url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/keys/timeseries'
987
+ def _get_tb_keys(token : str, api_device_id : str) -> List[str]:
988
+ url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/keys/timeseries'
905
989
 
906
990
  # Headers
907
991
  headers = {
@@ -946,6 +1030,62 @@ def get_sub_dirs(dir: str) -> List[str]:
946
1030
  return
947
1031
  return directories
948
1032
 
1033
+ def get_OAT_open_meteo(lat: float, long: float, start_date: datetime, end_date: datetime = None, time_zone: str = "America/Los_Angeles",
1034
+ use_noaa_names : bool = True) -> pd.DataFrame:
1035
+ if end_date is None:
1036
+ end_date = datetime.today() - timedelta(1)
1037
+ # datetime.today().date().strftime('%Y%m%d%H%M%S')
1038
+ start_date_str = start_date.date().strftime('%Y-%m-%d')
1039
+ end_date_str = end_date.date().strftime('%Y-%m-%d')
1040
+ print(f"Getting Open Meteao data for {start_date_str} to {end_date_str}")
1041
+ try:
1042
+ openmeteo = openmeteo_requests.Client()
1043
+
1044
+ url = "https://archive-api.open-meteo.com/v1/archive"
1045
+ params = {
1046
+ "latitude": lat,
1047
+ "longitude": long,
1048
+ "start_date": start_date_str,
1049
+ "end_date": end_date_str,
1050
+ "hourly": "temperature_2m",
1051
+ "temperature_unit": "fahrenheit",
1052
+ "timezone": time_zone,
1053
+ }
1054
+ responses = openmeteo.weather_api(url, params=params)
1055
+
1056
+ # Process first location. Add a for-loop for multiple locations or weather models
1057
+ response = responses[0]
1058
+
1059
+ # Process hourly data. The order of variables needs to be the same as requested.
1060
+ hourly = response.Hourly()
1061
+ hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
1062
+
1063
+ hourly_data = {"time_pt": pd.date_range(
1064
+ start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
1065
+ end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
1066
+ freq = pd.Timedelta(seconds = hourly.Interval()),
1067
+ inclusive = "left"
1068
+ )}
1069
+
1070
+ hourly_data["temperature_2m"] = hourly_temperature_2m
1071
+ hourly_data["time_pt"] = hourly_data["time_pt"].tz_convert(time_zone).tz_localize(None)
1072
+
1073
+ hourly_data = pd.DataFrame(hourly_data)
1074
+ hourly_data.set_index('time_pt', inplace = True)
1075
+
1076
+ if use_noaa_names:
1077
+ hourly_data = hourly_data.rename(columns = {'temperature_2m':'airTemp_F'})
1078
+ hourly_data['dewPoint_F'] = None
1079
+
1080
+ # Convert float32 to float64 for SQL database compatibility
1081
+ for col in hourly_data.select_dtypes(include=['float32']).columns:
1082
+ hourly_data[col] = hourly_data[col].astype('float64')
1083
+
1084
+ return hourly_data
1085
+ except Exception as e:
1086
+ print(f'Could not get OAT data: {e}')
1087
+ return pd.DataFrame()
1088
+
949
1089
 
950
1090
  def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids : dict = {}) -> dict:
951
1091
  """
@@ -963,6 +1103,7 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
963
1103
  dict:
964
1104
  Dictionary with key as Station Name and Value as DF of Parsed Weather Data
965
1105
  """
1106
+ #TODO swap out for this if empty: https://open-meteo.com/en/docs/historical-weather-api?start_date=2025-12-29&latitude=47.6&longitude=-122.33&temperature_unit=fahrenheit&end_date=2026-01-04
966
1107
  formatted_dfs = {}
967
1108
  weather_directory = config.get_weather_dir_path()
968
1109
  try:
@@ -2,7 +2,7 @@ from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffi
2
2
  aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
3
3
  convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
4
4
  heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt,column_name_change, \
5
- process_ls_signal
5
+ process_ls_signal, convert_temp_resistance_type, estimate_power
6
6
  from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
7
7
  nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
8
8
  get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
@@ -14,4 +14,4 @@ __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_mi
14
14
  "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
15
15
  "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
16
16
  "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
17
- "delete_erroneous_from_time_pt","column_name_change","process_ls_signal"]
17
+ "delete_erroneous_from_time_pt","column_name_change","process_ls_signal", "convert_temp_resistance_type", "estimate_power"]
@@ -1,7 +1,7 @@
1
1
  import pandas as pd
2
2
  import numpy as np
3
3
  import datetime as dt
4
- import csv
4
+ import pickle
5
5
  import os
6
6
  from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw, temp_f_to_c
7
7
  from ecopipeline import ConfigManager
@@ -157,20 +157,29 @@ def _rm_cols(col, bounds_df): # Helper function for remove_outliers
157
157
  """
158
158
  Function will take in a pandas series and bounds information
159
159
  stored in a dataframe, then check each element of that column and set it to nan
160
- if it is outside the given bounds.
160
+ if it is outside the given bounds.
161
161
 
162
- Args:
163
- col: pd.Series
162
+ Args:
163
+ col: pd.Series
164
164
  Pandas dataframe column from data being processed
165
165
  bounds_df: pd.DataFrame
166
166
  Pandas dataframe indexed by the names of the columns from the dataframe that col came from. There should be at least
167
167
  two columns in this dataframe, lower_bound and upper_bound, for use in removing outliers
168
- Returns:
169
- None
168
+ Returns:
169
+ None
170
170
  """
171
171
  if (col.name in bounds_df.index):
172
- c_lower = float(bounds_df.loc[col.name]["lower_bound"])
173
- c_upper = float(bounds_df.loc[col.name]["upper_bound"])
172
+ c_lower = bounds_df.loc[col.name]["lower_bound"]
173
+ c_upper = bounds_df.loc[col.name]["upper_bound"]
174
+
175
+ # Skip if both bounds are NaN
176
+ if pd.isna(c_lower) and pd.isna(c_upper):
177
+ return
178
+
179
+ # Convert bounds to float, handling NaN values
180
+ c_lower = float(c_lower) if not pd.isna(c_lower) else -np.inf
181
+ c_upper = float(c_upper) if not pd.isna(c_upper) else np.inf
182
+
174
183
  col.mask((col > c_upper) | (col < c_lower), other=np.NaN, inplace=True)
175
184
 
176
185
  # TODO: remove_outliers STRETCH GOAL: Functionality for alarms being raised based on bounds needs to happen here.
@@ -305,6 +314,71 @@ def ffill_missing(original_df: pd.DataFrame, config : ConfigManager, previous_fi
305
314
  df.apply(_ffill, args=(ffill_df,previous_fill))
306
315
  return df
307
316
 
317
+ def convert_temp_resistance_type(df : pd.DataFrame, column_name : str, sensor_model = 'veris') -> pd.DataFrame:
318
+ """
319
+ Convert temperature in Fahrenheit to resistance in Ohms for 10k Type 2 thermistor.
320
+
321
+ Parameters:
322
+ -----------
323
+ df: pd.DataFrame
324
+ Timestamp indexed Pandas dataframe of minute by minute values
325
+ column_name : str
326
+ Name of column with resistance conversion type 2 data
327
+ sensor_model : str
328
+ possible strings: veris, tasseron
329
+
330
+ Returns:
331
+ --------
332
+ df: pd.DataFrame
333
+ """
334
+ model_path_t_to_r = '../utils/pkls/'
335
+ model_path_r_to_t = '../utils/pkls/'
336
+ if sensor_model == 'veris':
337
+ model_path_t_to_r = model_path_t_to_r + 'veris_temp_to_resistance_2.pkl'
338
+ model_path_r_to_t = model_path_r_to_t + 'veris_resistance_to_temp_3.pkl'
339
+ elif sensor_model == 'tasseron':
340
+ model_path_t_to_r = model_path_t_to_r + 'tasseron_temp_to_resistance_2.pkl'
341
+ model_path_r_to_t = model_path_r_to_t + 'tasseron_resistance_to_temp_3.pkl'
342
+ else:
343
+ raise Exception("unsupported sensor model")
344
+
345
+ with open(os.path.join(os.path.dirname(__file__),model_path_t_to_r), 'rb') as f:
346
+ model = pickle.load(f)
347
+ df['resistance'] = df[column_name].apply(model)
348
+ with open(os.path.join(os.path.dirname(__file__),model_path_r_to_t), 'rb') as f:
349
+ model = pickle.load(f)
350
+ df[column_name] = df['resistance'].apply(model)
351
+ df.drop(columns='resistance')
352
+ return df
353
+
354
+ def estimate_power(df : pd.DataFrame, new_power_column : str, current_a_column : str, current_b_column : str, current_c_column : str,
355
+ assumed_voltage : float = 208, power_factor : float = 1):
356
+ """
357
+ df: pd.DataFrame
358
+ Pandas dataframe with minute-to-minute data
359
+ new_power_column : str
360
+ The column name of the power varriable for the calculation. Units of the column should be kW
361
+ current_a_column : str
362
+ The column name of the Current A varriable for the calculation. Units of the column should be amps
363
+ current_b_column : str
364
+ The column name of the Current B varriable for the calculation. Units of the column should be amps
365
+ current_c_column : str
366
+ The column name of the Current C varriable for the calculation. Units of the column should be amps
367
+ assumed_voltage : float
368
+ The assumed voltage (default 208)
369
+ power_factor : float
370
+ The power factor (default 1)
371
+
372
+ Returns
373
+ -------
374
+ pd.DataFrame:
375
+ Pandas dataframe with new estimated power column of specified name.
376
+ """
377
+ #average current * 208V * PF * sqrt(3)
378
+ df[new_power_column] = (df[current_a_column] + df[current_b_column] + df[current_c_column]) / 3 * assumed_voltage * power_factor * np.sqrt(3) / 1000
379
+
380
+ return df
381
+
308
382
  def process_ls_signal(df: pd.DataFrame, hourly_df: pd.DataFrame, daily_df: pd.DataFrame, load_dict: dict = {1: "normal", 2: "loadUp", 3 : "shed"}, ls_column: str = 'ls',
309
383
  drop_ls_from_df : bool = False):
310
384
  """
@@ -719,7 +793,7 @@ def convert_on_off_col_to_bool(df: pd.DataFrame, column_names: list) -> pd.DataF
719
793
  pd.DataFrame: Dataframe with specified columns converted from Celsius to Farenhiet.
720
794
  """
721
795
 
722
- mapping = {'ON': True, 'OFF': False}
796
+ mapping = {'ON': True, 'OFF': False, 'On': True, 'Off': False}
723
797
 
724
798
  for column_name in column_names:
725
799
  df[column_name] = df[column_name].map(mapping).where(df[column_name].notna(), df[column_name])
@@ -1110,6 +1184,9 @@ def join_to_hourly(hourly_data: pd.DataFrame, noaa_data: pd.DataFrame) -> pd.Dat
1110
1184
  pd.DataFrame:
1111
1185
  A single, joined dataframe
1112
1186
  """
1187
+ #fixing pipelines for new years
1188
+ if 'OAT_NOAA' in noaa_data.columns and not noaa_data['OAT_NOAA'].notnull().any():
1189
+ return hourly_data
1113
1190
  out_df = hourly_data.join(noaa_data)
1114
1191
  return out_df
1115
1192
 
@@ -4,6 +4,9 @@ import mysql.connector
4
4
  import mysql.connector.cursor
5
5
  import requests
6
6
  from datetime import datetime
7
+ import base64
8
+ import hashlib
9
+ import hmac
7
10
 
8
11
  class ConfigManager:
9
12
  """
@@ -56,6 +59,8 @@ class ConfigManager:
56
59
  self.data_directory = data_directory
57
60
  self.api_usr = None
58
61
  self.api_pw = None
62
+ self.api_token = None
63
+ self.api_secret = None
59
64
  self.api_device_id = None
60
65
  if self.data_directory is None:
61
66
  configured_data_method = False
@@ -74,6 +79,11 @@ class ConfigManager:
74
79
  self.api_pw = configure.get('data', 'api_pw')
75
80
  self.api_device_id = configure.get('data','device_id')
76
81
  configured_data_method = True
82
+ elif 'api_token' in configure['data'] and 'api_secret' in configure['data']:
83
+ self.api_token = configure.get('data', 'api_token')
84
+ self.api_secret = configure.get('data', 'api_secret')
85
+ self.api_device_id = configure.get('data','device_id')
86
+ configured_data_method = True
77
87
  if not configured_data_method:
78
88
  raise Exception('data configuration section missing or incomplete in configuration file.')
79
89
 
@@ -261,4 +271,13 @@ class ConfigManager:
261
271
  def get_fm_device_id(self) -> str:
262
272
  if self.api_device_id is None:
263
273
  raise Exception("Field Manager device ID has not been configured.")
264
- return self.api_device_id
274
+ return self.api_device_id
275
+
276
+ def get_skycentrics_token(self, request_str = 'GET /api/devices/ HTTP/1.', date_str : str = None) -> tuple:
277
+ if date_str is None:
278
+ date_str = datetime.utcnow().strftime('%a, %d %b %H:%M:%S GMT')
279
+ signature = base64.b64encode(hmac.new(self.api_secret.encode(),
280
+ '{}\n{}\n{}\n{}'.format(request_str, date_str, '', hashlib.md5(''.encode()).hexdigest()).encode(),
281
+ hashlib.sha1).digest())
282
+ token = '{}:{}'.format(self.api_token, signature.decode())
283
+ return token, date_str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.11.4
3
+ Version: 1.0.3
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,4 +1,5 @@
1
1
  LICENSE
2
+ MANIFEST.in
2
3
  README.md
3
4
  pyproject.toml
4
5
  setup.cfg
@@ -22,4 +23,9 @@ src/ecopipeline/transform/transform.py
22
23
  src/ecopipeline/utils/ConfigManager.py
23
24
  src/ecopipeline/utils/NOAADataDownloader.py
24
25
  src/ecopipeline/utils/__init__.py
25
- src/ecopipeline/utils/unit_convert.py
26
+ src/ecopipeline/utils/unit_convert.py
27
+ src/ecopipeline/utils/pkls/__init__.py
28
+ src/ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl
29
+ src/ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl
30
+ src/ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl
31
+ src/ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl
@@ -1,2 +0,0 @@
1
- from .event_tracking import *
2
- __all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP']
File without changes
File without changes
File without changes
File without changes