PyPI - ecopipeline - Versions diffs - 0.11.4__tar.gz → 1.0.3__tar.gz - Mend

ecopipeline 0.11.4tar.gz → 1.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

ecopipeline-1.0.3/MANIFEST.in ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ include README.md
2	+ recursive-include src/ecopipeline/utils/pkls *.pkl

{ecopipeline-0.11.4/src/ecopipeline.egg-info → ecopipeline-1.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.11.4
+Version: 1.0.3
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = ecopipeline
-version = 0.11.4
+version = 1.0.3
 authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
 description = Contains functions for use in Ecotope Datapipelines
 long_description = file: README.md

ecopipeline-1.0.3/src/ecopipeline/event_tracking/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .event_tracking import *
+__all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP','flag_high_swing_setpoint',
+           'flag_recirc_balance_valve','flag_hp_inlet_temp']

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/event_tracking/event_tracking.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import pandas as pd
 import numpy as np
-import datetime as dt
+import datetime as datetime
 from ecopipeline import ConfigManager
 import re
 import mysql.connector.errors as mysqlerrors
@@ -13,29 +13,24 @@ def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config
     day_list = daily_data.index.to_list()
     print('Checking for alarms...')
     alarm_df = _convert_silent_alarm_dict_to_df({})
-    boundary_alarm_df = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
-    pwr_alarm_df = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
-    abnormal_COP_df = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
-    if len(boundary_alarm_df) > 0:
-        print("Boundary alarms detected. Adding them to event df...")
-        alarm_df = boundary_alarm_df
-    else:
-        print("No boundary alarms detected.")
-    if len(pwr_alarm_df) > 0:
-        print("Power alarms detected. Adding them to event df...")
-        alarm_df = pd.concat([alarm_df, pwr_alarm_df])
-    else:
-        print("No power alarms detected.")
-    if _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
-        print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
-    elif len(abnormal_COP_df) > 0:
-        print("Abnormal COPs detected. Adding them to event df...")
-        alarm_df = pd.concat([alarm_df, abnormal_COP_df])
-    else:
-        print("No abnormal COPs.")
+    dict_of_alarms = {}
+    dict_of_alarms['boundary'] = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
+    dict_of_alarms['power ratio'] = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
+    dict_of_alarms['abnormal COP'] = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
+    dict_of_alarms['swing tank setpoint'] = flag_high_swing_setpoint(df, daily_data, config, system=system)
+    dict_of_alarms['recirculation loop balancing valve'] = flag_recirc_balance_valve(daily_data, config, system=system)
+    dict_of_alarms['HPWH inlet temperature'] = flag_hp_inlet_temp(df, daily_data, config, system)
+    ongoing_COP_exception = ['abnormal COP']
+    for key, value in dict_of_alarms.items():
+        if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
+            print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
+        elif len(value) > 0:
+            print(f"Detected {key} alarm(s). Adding to event df...")
+            alarm_df = pd.concat([alarm_df, value])
+        else:
+            print(f"No {key} alarm(s) detected.")
     return alarm_df
@@ -78,7 +73,7 @@ def flag_abnormal_COP(daily_data: pd.DataFrame, config : ConfigManager, system:
         for bound_var, bounds in bounds_df.iterrows():
             if bound_var in cop_columns:
                 for day, day_values in daily_data.iterrows():
-                    if day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']:
+                    if not day_values[bound_var] is None and (day_values[bound_var] > bounds['high_alarm'] or day_values[bound_var] < bounds['low_alarm']):
                         alarm_str = f"Unexpected COP Value detected: {bounds['pretty_name']} = {round(day_values[bound_var],2)}"
                         if day in alarms_dict:
                             alarms_dict[day].append([bound_var, alarm_str])
@@ -135,6 +130,9 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
     pd.DataFrame:
         Pandas dataframe with alarm events
     """
+    if df.empty:
+        print("cannot flag boundary alarms. Dataframe is empty")
+        return pd.DataFrame()
     variable_names_path = config.get_var_names_path()
     try:
         bounds_df = pd.read_csv(variable_names_path)
@@ -185,6 +183,377 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
     return _convert_silent_alarm_dict_to_df(alarms)
+def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
+                             system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
+                             default_power_ratio : float = 0.4) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and location of alarm information in a csv,
+    and create an dataframe with applicable alarm events
+    VarNames syntax:
+    STS_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
+    STS_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
+    STS_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
+    STS_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
+    Parameters
+    ----------
+    df: pd.DataFrame
+        post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
+        are out of order or have gaps, the function may return erroneous alarms.
+    daily_df: pd.DataFrame
+        post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
+        called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
+        The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
+        name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
+    default_fault_time : int
+        Number of consecutive minutes for T+SP alarms (default 3). T+SP alarms trigger when tank is powered and temperature exceeds
+        setpoint for this many consecutive minutes.
+    system: str
+        string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
+    default_setpoint : float
+        Default temperature setpoint in degrees for T and ST alarm codes when no custom bound is specified (default 130.0)
+    default_power_indication : float
+        Default power threshold in kW for SP alarm codes when no custom bound is specified (default 1.0)
+    default_power_ratio : float
+        Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4)
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with alarm events
+    """
+    if df.empty:
+        print("cannot flag swing tank setpoint alarms. Dataframe is empty")
+        return pd.DataFrame()
+    variable_names_path = config.get_var_names_path()
+    try:
+        bounds_df = pd.read_csv(variable_names_path)
+    except FileNotFoundError:
+        print("File Not Found: ", variable_names_path)
+        return pd.DataFrame()
+    bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'STS',
+                {'T' : default_setpoint,
+                 'SP': default_power_indication,
+                 'TP': default_power_ratio,
+                 'ST': default_setpoint},
+                system)
+    if bounds_df.empty:
+        return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
+    # Process each unique alarm_code_id
+    alarms = {}
+    for day in daily_df.index:
+        next_day = day + pd.Timedelta(days=1)
+        filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
+        alarmed_for_day = False
+        for alarm_id in bounds_df['alarm_code_id'].unique():
+            id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
+            # Get T and SP alarm codes for this ID
+            t_codes = id_group[id_group['alarm_code_type'] == 'T']
+            sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
+            tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
+            st_codes = id_group[id_group['alarm_code_type'] == 'ST']
+            # Check for multiple T or SP codes with same ID
+            if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
+                raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
+            # Check if we have both T and SP
+            if len(t_codes) == 1 and len(sp_codes) == 1:
+                t_var_name = t_codes.iloc[0]['variable_name']
+                sp_var_name = sp_codes.iloc[0]['variable_name']
+                sp_power_indication = sp_codes.iloc[0]['bound']
+                t_setpoint = t_codes.iloc[0]['bound']
+                # Check if both variables exist in df
+                if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
+                    # Check for consecutive minutes where SP > default_power_indication
+                    # AND T >= default_setpoint
+                    power_mask = filtered_df[sp_var_name] >= sp_power_indication
+                    temp_mask = filtered_df[t_var_name] >= t_setpoint
+                    combined_mask = power_mask & temp_mask
+                    # Check for 3 consecutive minutes
+                    consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
+                    if consecutive_condition.any():
+                        # Get the first index where condition was met
+                        first_true_index = consecutive_condition.idxmax()
+                        # Adjust for the rolling window (first fault_time-1 minutes don't count)
+                        adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
+                        _add_an_alarm(alarms, adjusted_time, sp_var_name, f"High swing tank setpoint: Swing tank was powered at {adjusted_time} although temperature was above {t_setpoint}.")
+                        alarmed_for_day = True
+            if not alarmed_for_day and len(st_codes) == 1:
+                st_var_name = st_codes.iloc[0]['variable_name']
+                st_setpoint = st_codes.iloc[0]['bound']
+                # Check if st_var_name exists in filtered_df
+                if st_var_name in filtered_df.columns:
+                    # Check if setpoint was altered for over 10 minutes
+                    altered_mask = filtered_df[st_var_name] != st_setpoint
+                    consecutive_condition = altered_mask.rolling(window=10).min() == 1
+                    if consecutive_condition.any():
+                        # Get the first index where condition was met
+                        first_true_index = consecutive_condition.idxmax()
+                        # Adjust for the rolling window
+                        adjusted_time = first_true_index - pd.Timedelta(minutes=9)
+                        _add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
+                        alarmed_for_day = True
+            if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
+                tp_var_name = tp_codes.iloc[0]['variable_name']
+                sp_var_name = sp_codes.iloc[0]['variable_name']
+                tp_ratio = tp_codes.iloc[0]['bound']
+                # Check if both variables exist in df
+                if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
+                    # Check if swing tank power ratio exceeds threshold
+                    if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
+                        power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
+                        if power_ratio > tp_ratio:
+                            _add_an_alarm(alarms, day, sp_var_name, f"High swing tank power ratio: Swing tank accounted for more than {tp_ratio * 100}% of daily power.")
+    return _convert_silent_alarm_dict_to_df(alarms)
+def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and location of alarm information in a csv,
+    and create an dataframe with applicable alarm events
+    VarNames syntax:
+    BV_ER_[OPTIONAL ID] : Indicates a power variable for an ER heater (equipment recirculation)
+    BV_OUT_[OPTIONAL ID]:### - Indicates the heating output variable the ER heating contributes to. Optional ### for the percentage
+        threshold that should not be crossed by the ER elements (default 0.4 for 40%)
+    Parameters
+    ----------
+    daily_df: pd.DataFrame
+        post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
+        power to heating output power.
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
+        called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
+        The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
+        name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_OUT_1:0.5)
+    system: str
+        string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
+    default_power_ratio : float
+        Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for OUT alarm codes when no custom bound is specified (default 0.4).
+        Alarm triggers when sum of ER equipment >= (OUT value / default_power_ratio)
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with alarm events
+    """
+    if daily_df.empty:
+        print("cannot flag missing balancing valve alarms. Dataframe is empty")
+        return pd.DataFrame()
+    variable_names_path = config.get_var_names_path()
+    try:
+        bounds_df = pd.read_csv(variable_names_path)
+    except FileNotFoundError:
+        print("File Not Found: ", variable_names_path)
+        return pd.DataFrame()
+    bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BV',
+                {'OUT' : default_power_ratio},
+                system)
+    if bounds_df.empty:
+        return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
+    # Process each unique alarm_code_id
+    alarms = {}
+    for alarm_id in bounds_df['alarm_code_id'].unique():
+        id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
+        out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
+        out_var_name = out_codes.iloc[0]['variable_name']
+        out_bound = out_codes.iloc[0]['bound']
+        er_codes = id_group[id_group['alarm_code_type'] == 'ER']
+        if len(out_codes) > 1 or len(er_codes) < 1:
+            raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
+        for day in daily_df.index:
+            if out_var_name in daily_df.columns:
+                # Get list of ER variable names
+                er_var_names = er_codes['variable_name'].tolist()
+                # Check if all ER variables exist in daily_df
+                if all(var in daily_df.columns for var in er_var_names):
+                    # Sum all ER variables for this day
+                    er_sum = daily_df.loc[day, er_var_names].sum()
+                    out_value = daily_df.loc[day, out_var_name]
+                    # Check if sum of ER >= OUT value
+                    if er_sum >= out_value*out_bound:
+                        _add_an_alarm(alarms, day, out_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(out_bound * 100):.2f}% of heating output.")
+    return _convert_silent_alarm_dict_to_df(alarms)
+def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
+                       default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and location of alarm information in a csv,
+    and create an dataframe with applicable alarm events
+    VarNames syntax:
+    HPI_POW_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
+        the heat pump is considered 'on'
+    HPI_T_[OPTIONAL ID]:### - Indicates heat pump inlet temperature variable. ### is the temperature threshold (default 120.0)
+        that should not be exceeded while the heat pump is on
+    Parameters
+    ----------
+    df: pd.DataFrame
+        post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
+        are out of order or have gaps, the function may return erroneous alarms.
+    daily_df: pd.DataFrame
+        post-transformed dataframe for daily data.
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
+        called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
+        The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
+        name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
+    system: str
+        string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
+    default_power_threshold : float
+        Default power threshold for POW alarm codes when no custom bound is specified (default 0.4). Heat pump is considered 'on'
+        when power exceeds this value.
+    default_temp_threshold : float
+        Default temperature threshold for T alarm codes when no custom bound is specified (default 120.0). Alarm triggers when
+        temperature exceeds this value while heat pump is on.
+    fault_time : int
+        Number of consecutive minutes that both power and temperature must exceed their thresholds before triggering an alarm (default 10).
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with alarm events
+    """
+    if df.empty:
+        print("cannot flag missing balancing valve alarms. Dataframe is empty")
+        return pd.DataFrame()
+    variable_names_path = config.get_var_names_path()
+    try:
+        bounds_df = pd.read_csv(variable_names_path)
+    except FileNotFoundError:
+        print("File Not Found: ", variable_names_path)
+        return pd.DataFrame()
+    bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPI',
+                                               {'POW' : default_power_threshold,
+                                                'T' : default_temp_threshold},
+                                                system)
+    if bounds_df.empty:
+        return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
+    # Process each unique alarm_code_id
+    alarms = {}
+    for alarm_id in bounds_df['alarm_code_id'].unique():
+        for day in daily_df.index:
+            next_day = day + pd.Timedelta(days=1)
+            filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
+            id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
+            pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
+            pow_var_name = pow_codes.iloc[0]['variable_name']
+            pow_thresh = pow_codes.iloc[0]['bound']
+            t_codes = id_group[id_group['alarm_code_type'] == 'T']
+            t_var_name = t_codes.iloc[0]['variable_name']
+            t_pretty_name = t_codes.iloc[0]['pretty_name']
+            t_thresh = t_codes.iloc[0]['bound']
+            if len(t_codes) != 1 or len(pow_codes) != 1:
+                raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
+            if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
+                # Check for consecutive minutes where both power and temp exceed thresholds
+                power_mask = filtered_df[pow_var_name] > pow_thresh
+                temp_mask = filtered_df[t_var_name] > t_thresh
+                combined_mask = power_mask & temp_mask
+                # Check for fault_time consecutive minutes
+                consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
+                if consecutive_condition.any():
+                    first_true_index = consecutive_condition.idxmax()
+                    adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
+                    _add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
+    return _convert_silent_alarm_dict_to_df(alarms)
+def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "") -> pd.DataFrame:
+    # Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
+    if (system != ""):
+        if not 'system' in bounds_df.columns:
+            raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
+        bounds_df = bounds_df.loc[bounds_df['system'] == system]
+    required_columns = ["variable_name", "alarm_codes"]
+    for required_column in required_columns:
+        if not required_column in bounds_df.columns:
+            raise Exception(f"{required_column} is not present in Variable_Names.csv")
+    if not 'pretty_name' in bounds_df.columns:
+        bounds_df['pretty_name'] = bounds_df['variable_name']
+    else:
+        bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
+    bounds_df = bounds_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
+    bounds_df.dropna(axis=0, thresh=2, inplace=True)
+    # Check if all alarm_codes are null or if dataframe is empty
+    if bounds_df.empty or bounds_df['alarm_codes'].isna().all():
+        return pd.DataFrame()
+    bounds_df = bounds_df[bounds_df['alarm_codes'].str.contains(alarm_tag, na=False)]
+    # Split alarm_codes by semicolons and create a row for each STS code
+    expanded_rows = []
+    for idx, row in bounds_df.iterrows():
+        alarm_codes = str(row['alarm_codes']).split(';')
+        tag_codes = [code.strip() for code in alarm_codes if code.strip().startswith(alarm_tag)]
+        if tag_codes:  # Only process if there are STS codes
+            for tag_code in tag_codes:
+                new_row = row.copy()
+                if ":" in tag_code:
+                    tag_parts = tag_code.split(':')
+                    if len(tag_parts) > 2:
+                        raise Exception(f"Improperly formated alarm code : {tag_code}")
+                    new_row['bound'] = tag_parts[1]
+                    tag_code = tag_parts[0]
+                else:
+                    new_row['bound'] = None
+                new_row['alarm_codes'] = tag_code
+                expanded_rows.append(new_row)
+    if expanded_rows:
+        bounds_df = pd.DataFrame(expanded_rows)
+    else:
+        return pd.DataFrame()# no tagged alarms to look into
+    alarm_code_parts = []
+    for idx, row in bounds_df.iterrows():
+        parts = row['alarm_codes'].split('_')
+        if len(parts) == 2:
+            alarm_code_parts.append([parts[1], "No ID"])
+        elif len(parts) == 3:
+            alarm_code_parts.append([parts[1], parts[2]])
+        else:
+            raise Exception(f"improper STS alarm code format for {row['variable_name']}")
+    if alarm_code_parts:
+        bounds_df[['alarm_code_type', 'alarm_code_id']] = pd.DataFrame(alarm_code_parts, index=bounds_df.index)
+        # Replace None bounds with appropriate defaults based on alarm_code_type
+        for idx, row in bounds_df.iterrows():
+            if pd.isna(row['bound']) or row['bound'] is None:
+                if row['alarm_code_type'] in type_default_dict.keys():
+                    bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']]
+        # Coerce bound column to float
+        bounds_df['bound'] = pd.to_numeric(bounds_df['bound'], errors='coerce').astype(float)
+    return bounds_df
+def _add_an_alarm(alarm_dict : dict, day : datetime, var_name : str, alarm_string : str):
+    # Round down to beginning of day
+    day = pd.Timestamp(day).normalize()
+    if day in alarm_dict:
+        alarm_dict[day].append([var_name, alarm_string])
+    else:
+        alarm_dict[day] = [[var_name, alarm_string]]
 def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
     events = {
         'start_time_pt' : [],
@@ -293,6 +662,9 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_
     for required_column in required_columns:
         if not required_column in ratios_df.columns:
             raise Exception(f"{required_column} is not present in Variable_Names.csv")
+    if ratios_df['alarm_codes'].isna().all() or ratios_df['alarm_codes'].isnull().all():
+        print("No alarm codes in ", variable_names_path)
+        return pd.DataFrame()
     if not 'pretty_name' in ratios_df.columns:
         ratios_df['pretty_name'] = ratios_df['variable_name']
     else:

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/extract/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
-from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df
+from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df,get_OAT_open_meteo
 __all__ = ["get_noaa_data", "json_to_df", "extract_files", "get_last_full_day_from_db", "get_db_row_from_time", 'extract_new', "csv_to_df", "get_sub_dirs", "msa_to_df", "fm_api_to_df",
-           "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df"]
+           "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df",
+           "get_OAT_open_meteo"]

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/extract/extract.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from typing import List
 import pandas as pd
+import openmeteo_requests
 import re
 from ftplib import FTP
 from datetime import datetime, timedelta
@@ -15,6 +16,7 @@ import mysql.connector.errors as mysqlerrors
 import requests
 import subprocess
 import traceback
+import time
 def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
@@ -661,6 +663,91 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
     return df_diff
+def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, time_zone: str = 'US/Pacific'):
+    """
+    Function connects to the field manager api to pull data and returns a dataframe.
+    Parameters
+    ----------
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. The config manager
+        must contain information to connect to the api, i.e. the api user name and password as well as
+        the device id for the device the data is being pulled from.
+    startTime: datetime
+        The point in time for which we want to start the data extraction from. This
+        is local time from the data's index.
+    endTime: datetime
+        The point in time for which we want to end the data extraction. This
+        is local time from the data's index.
+    create_csv : bool
+        create csv files as you process such that API need not be relied upon for reprocessing
+    time_zone: str
+        The timezone for the indexes in the output dataframe as a string. Must be a string recognized as a
+        time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
+        defaults to 'US/Pacific'
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
+    """
+    #temporary solution while no date range available
+    try:
+        df = pd.DataFrame()
+        temp_dfs = []
+        ###############
+        if endTime is None:
+            endTime = datetime.utcnow()
+        if startTime is None:
+            startTime = endTime - timedelta(1)
+        time_parser = startTime
+        while time_parser < endTime:
+            time_parse_end = time_parser + timedelta(1)
+            start_time_str = time_parser.strftime('%Y-%m-%dT%H:%M:%S')
+            end_time_str = time_parse_end.strftime('%Y-%m-%dT%H:%M:%S')
+            skycentrics_token, date_str = config.get_skycentrics_token(
+                request_str=f'GET /api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1 HTTP/1.1',
+                date_str=None)
+            response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1',
+                                headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/gzip'})
+            if response.status_code == 200:
+                # Decompress the gzip response
+                decompressed_data = gzip.decompress(response.content)
+                # Parse JSON from decompressed data
+                json_data = json.loads(decompressed_data)
+                norm_data = pd.json_normalize(json_data, record_path=['sensors'], meta=['time'], meta_prefix='response_')
+                if len(norm_data) != 0:
+                    norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"], utc=True)
+                    norm_data["time_pt"] = norm_data["time_pt"].dt.tz_convert(time_zone)
+                    norm_data = pd.pivot_table(norm_data, index="time_pt", columns="id", values="data")
+                    # Iterate over the index and round up if necessary (work around for json format from sensors)
+                    for i in range(len(norm_data.index)):
+                        if norm_data.index[i].minute == 59 and norm_data.index[i].second == 59:
+                            norm_data.index.values[i] = norm_data.index[i] + pd.Timedelta(seconds=1)
+                    temp_dfs.append(norm_data)
+            else:
+                print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
+            time_parser = time_parse_end
+        ##############
+        if len(temp_dfs) > 0:
+            df = pd.concat(temp_dfs, ignore_index=False)
+            if create_csv:
+                filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                original_directory = os.getcwd()
+                os.chdir(config.data_directory)
+                df.to_csv(filename, index_label='time_pt')
+                os.chdir(original_directory)
+        else:
+            print("No skycentrics data retieved for time frame.")
+        return df
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        raise e
+    # return pd.DataFrame()
 def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
     """
     Function connects to the field manager api to pull data and returns a dataframe.
@@ -776,7 +863,7 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
     os.chdir(original_directory)
 def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : float = 1,
-                 sensor_keys : list = [], seperate_keys : bool = False):
+                 sensor_keys : list = [], seperate_keys : bool = False, device_id_overwrite : str = None, csv_prefix : str = ""):
     """
     Function connects to the things board manager api to pull data and returns a dataframe.
@@ -796,6 +883,11 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         create csv files as you process such that API need not be relied upon for reprocessing
     query_hours : float
         number of hours to query at a time from ThingsBoard API
+    device_id_overwrite : str
+        Overwrites device ID for API pull
+    csv_prefix : str
+        prefix to add to the csv title
     Returns
     -------
@@ -804,16 +896,17 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         Will return with index in UTC so needs to be converted after to appropriate timezone
     """
     df = pd.DataFrame()
+    api_device_id = device_id_overwrite if not device_id_overwrite is None else config.api_device_id
     if len(sensor_keys) <= 0:
         token = config.get_thingsboard_token()
-        key_list = _get_tb_keys(config, token)
+        key_list = _get_tb_keys(token, api_device_id)
         if len(key_list) <= 0:
-            raise Exception(f"No sensors available at ThingsBoard site with id {config.api_device_id}")
-        return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys)
+            raise Exception(f"No sensors available at ThingsBoard site with id {api_device_id}")
+        return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys, device_id_overwrite, csv_prefix)
     if seperate_keys:
         df_list = []
         for sensor_key in sensor_keys:
-            df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False))
+            df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False, device_id_overwrite, csv_prefix))
         df = pd.concat(df_list)
     else:
     # not seperate_keys:
@@ -826,13 +919,13 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         if endTime - timedelta(hours=query_hours) > startTime:
             time_diff = endTime - startTime
             midpointTime = startTime + time_diff / 2
-            df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
-            df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
+            df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False, device_id_overwrite = device_id_overwrite)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
+            df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False, device_id_overwrite = device_id_overwrite)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
             df = pd.concat([df_1, df_2])
             df = df.sort_index()
             df = df.groupby(df.index).mean()
         else:
-            url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
+            url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/values/timeseries'
             token = config.get_thingsboard_token()
             key_string = ','.join(sensor_keys)
             params = {
@@ -844,7 +937,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 'interval' : '0',
                 'agg' : 'NONE'
             }
             # Headers
             headers = {
                 'accept': 'application/json',
@@ -855,14 +947,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 response = requests.get(url, headers=headers, params=params)
                 if response.status_code == 200:
                     response_json = response.json()
-                    # if create_csv:
-                    #     json_filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.json"
-                    #     print(f"filename: {json_filename}, url: {url}, params: {params}")
-                    #     original_directory = os.getcwd()
-                    #     os.chdir(config.data_directory)
-                    #     with open(json_filename, 'w') as f:
-                    #         json.dump(response_json, f, indent=4)  # indent=4 makes it human-readable
-                    #     os.chdir(original_directory)
                     data = {}
                     for key, records in response_json.items():
@@ -886,7 +970,7 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 df = pd.DataFrame()
     # save to file
     if create_csv:
-        filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+        filename = f"{csv_prefix}{startTime.strftime('%Y%m%d%H%M%S')}.csv"
         original_directory = os.getcwd()
         os.chdir(config.data_directory)
         df.to_csv(filename, index_label='time_pt')
@@ -900,8 +984,8 @@ def _get_float_value(value):
     except (ValueError, TypeError):
         return None
-def _get_tb_keys(config: ConfigManager, token : str) -> List[str]:
-    url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/keys/timeseries'
+def _get_tb_keys(token : str, api_device_id : str) -> List[str]:
+    url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/keys/timeseries'
     # Headers
     headers = {
@@ -946,6 +1030,62 @@ def get_sub_dirs(dir: str) -> List[str]:
         return
     return directories
+def get_OAT_open_meteo(lat: float, long: float, start_date: datetime, end_date: datetime = None, time_zone: str = "America/Los_Angeles",
+                       use_noaa_names : bool = True) -> pd.DataFrame:
+    if end_date is None:
+        end_date = datetime.today() - timedelta(1)
+    # datetime.today().date().strftime('%Y%m%d%H%M%S')
+    start_date_str = start_date.date().strftime('%Y-%m-%d')
+    end_date_str = end_date.date().strftime('%Y-%m-%d')
+    print(f"Getting Open Meteao data for {start_date_str} to {end_date_str}")
+    try:
+        openmeteo = openmeteo_requests.Client()
+        url = "https://archive-api.open-meteo.com/v1/archive"
+        params = {
+            "latitude": lat,
+            "longitude": long,
+            "start_date": start_date_str,
+            "end_date": end_date_str,
+            "hourly": "temperature_2m",
+            "temperature_unit": "fahrenheit",
+            "timezone": time_zone,
+        }
+        responses = openmeteo.weather_api(url, params=params)
+        # Process first location. Add a for-loop for multiple locations or weather models
+        response = responses[0]
+        # Process hourly data. The order of variables needs to be the same as requested.
+        hourly = response.Hourly()
+        hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
+        hourly_data = {"time_pt": pd.date_range(
+            start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
+            end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
+            freq = pd.Timedelta(seconds = hourly.Interval()),
+            inclusive = "left"
+        )}
+        hourly_data["temperature_2m"] = hourly_temperature_2m
+        hourly_data["time_pt"] = hourly_data["time_pt"].tz_convert(time_zone).tz_localize(None)
+        hourly_data = pd.DataFrame(hourly_data)
+        hourly_data.set_index('time_pt', inplace = True)
+        if use_noaa_names:
+            hourly_data = hourly_data.rename(columns = {'temperature_2m':'airTemp_F'})
+            hourly_data['dewPoint_F'] = None
+        # Convert float32 to float64 for SQL database compatibility
+        for col in hourly_data.select_dtypes(include=['float32']).columns:
+            hourly_data[col] = hourly_data[col].astype('float64')
+        return hourly_data
+    except Exception as e:
+        print(f'Could not get OAT data: {e}')
+        return pd.DataFrame()
 def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids : dict = {}) -> dict:
     """
@@ -963,6 +1103,7 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
     dict:
         Dictionary with key as Station Name and Value as DF of Parsed Weather Data
     """
+    #TODO swap out for this if empty: https://open-meteo.com/en/docs/historical-weather-api?start_date=2025-12-29&latitude=47.6&longitude=-122.33&temperature_unit=fahrenheit&end_date=2026-01-04
     formatted_dfs = {}
     weather_directory = config.get_weather_dir_path()
     try:

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/transform/__init__.py RENAMED Viewed

@@ -2,7 +2,7 @@ from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffi
     aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days, \
     convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns, \
     heat_output_calc, add_relative_humidity, apply_equipment_cop_derate, create_data_statistics_df, delete_erroneous_from_time_pt,column_name_change, \
-    process_ls_signal
+    process_ls_signal, convert_temp_resistance_type, estimate_power
 from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, \
     nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, \
     get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, \
@@ -14,4 +14,4 @@ __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_mi
            "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
            "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
            "shift_accumulative_columns","heat_output_calc", "add_relative_humidity","apply_equipment_cop_derate","create_data_statistics_df",
-           "delete_erroneous_from_time_pt","column_name_change","process_ls_signal"]
+           "delete_erroneous_from_time_pt","column_name_change","process_ls_signal", "convert_temp_resistance_type", "estimate_power"]

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/transform/transform.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import pandas as pd
 import numpy as np
 import datetime as dt
-import csv
+import pickle
 import os
 from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw, temp_f_to_c
 from ecopipeline import ConfigManager
@@ -157,20 +157,29 @@ def _rm_cols(col, bounds_df):  # Helper function for remove_outliers
     """
     Function will take in a pandas series and bounds information
     stored in a dataframe, then check each element of that column and set it to nan
-    if it is outside the given bounds.
+    if it is outside the given bounds.
-    Args:
-        col: pd.Series
+    Args:
+        col: pd.Series
             Pandas dataframe column from data being processed
         bounds_df: pd.DataFrame
             Pandas dataframe indexed by the names of the columns from the dataframe that col came from. There should be at least
             two columns in this dataframe, lower_bound and upper_bound, for use in removing outliers
-    Returns:
-        None
+    Returns:
+        None
     """
     if (col.name in bounds_df.index):
-        c_lower = float(bounds_df.loc[col.name]["lower_bound"])
-        c_upper = float(bounds_df.loc[col.name]["upper_bound"])
+        c_lower = bounds_df.loc[col.name]["lower_bound"]
+        c_upper = bounds_df.loc[col.name]["upper_bound"]
+        # Skip if both bounds are NaN
+        if pd.isna(c_lower) and pd.isna(c_upper):
+            return
+        # Convert bounds to float, handling NaN values
+        c_lower = float(c_lower) if not pd.isna(c_lower) else -np.inf
+        c_upper = float(c_upper) if not pd.isna(c_upper) else np.inf
         col.mask((col > c_upper) | (col < c_lower), other=np.NaN, inplace=True)
 # TODO: remove_outliers STRETCH GOAL: Functionality for alarms being raised based on bounds needs to happen here.
@@ -305,6 +314,71 @@ def ffill_missing(original_df: pd.DataFrame, config : ConfigManager, previous_fi
     df.apply(_ffill, args=(ffill_df,previous_fill))
     return df
+def convert_temp_resistance_type(df : pd.DataFrame, column_name : str, sensor_model = 'veris') -> pd.DataFrame:
+    """
+    Convert temperature in Fahrenheit to resistance in Ohms for 10k Type 2 thermistor.
+    Parameters:
+    -----------
+    df: pd.DataFrame
+        Timestamp indexed Pandas dataframe of minute by minute values
+    column_name : str
+        Name of column with resistance conversion type 2 data
+    sensor_model : str
+        possible strings: veris, tasseron
+    Returns:
+    --------
+    df: pd.DataFrame
+    """
+    model_path_t_to_r = '../utils/pkls/'
+    model_path_r_to_t = '../utils/pkls/'
+    if sensor_model == 'veris':
+        model_path_t_to_r = model_path_t_to_r + 'veris_temp_to_resistance_2.pkl'
+        model_path_r_to_t = model_path_r_to_t + 'veris_resistance_to_temp_3.pkl'
+    elif sensor_model == 'tasseron':
+        model_path_t_to_r = model_path_t_to_r + 'tasseron_temp_to_resistance_2.pkl'
+        model_path_r_to_t = model_path_r_to_t + 'tasseron_resistance_to_temp_3.pkl'
+    else:
+        raise Exception("unsupported sensor model")
+    with open(os.path.join(os.path.dirname(__file__),model_path_t_to_r), 'rb') as f:
+        model = pickle.load(f)
+    df['resistance'] = df[column_name].apply(model)
+    with open(os.path.join(os.path.dirname(__file__),model_path_r_to_t), 'rb') as f:
+        model = pickle.load(f)
+    df[column_name] = df['resistance'].apply(model)
+    df.drop(columns='resistance')
+    return df
+def estimate_power(df : pd.DataFrame, new_power_column : str, current_a_column : str, current_b_column : str, current_c_column : str,
+                 assumed_voltage : float = 208, power_factor : float = 1):
+    """
+    df: pd.DataFrame
+        Pandas dataframe with minute-to-minute data
+    new_power_column : str
+        The column name of the power varriable for the calculation. Units of the column should be kW
+    current_a_column : str
+        The column name of the Current A varriable for the calculation. Units of the column should be amps
+    current_b_column : str
+        The column name of the Current B varriable for the calculation. Units of the column should be amps
+    current_c_column : str
+        The column name of the Current C varriable for the calculation. Units of the column should be amps
+    assumed_voltage : float
+        The assumed voltage (default 208)
+    power_factor : float
+        The power factor (default 1)
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with new estimated power column of specified name.
+    """
+    #average current * 208V * PF * sqrt(3)
+    df[new_power_column] = (df[current_a_column] + df[current_b_column] + df[current_c_column]) / 3 * assumed_voltage * power_factor * np.sqrt(3) / 1000
+    return df
 def process_ls_signal(df: pd.DataFrame, hourly_df: pd.DataFrame, daily_df: pd.DataFrame, load_dict: dict = {1: "normal", 2: "loadUp", 3 : "shed"}, ls_column: str = 'ls',
                       drop_ls_from_df : bool = False):
     """
@@ -719,7 +793,7 @@ def convert_on_off_col_to_bool(df: pd.DataFrame, column_names: list) -> pd.DataF
     pd.DataFrame: Dataframe with specified columns converted from Celsius to Farenhiet.
     """
-    mapping = {'ON': True, 'OFF': False}
+    mapping = {'ON': True, 'OFF': False, 'On': True, 'Off': False}
     for column_name in column_names:
         df[column_name] = df[column_name].map(mapping).where(df[column_name].notna(), df[column_name])
@@ -1110,6 +1184,9 @@ def join_to_hourly(hourly_data: pd.DataFrame, noaa_data: pd.DataFrame) -> pd.Dat
     pd.DataFrame:
         A single, joined dataframe
     """
+    #fixing pipelines for new years
+    if 'OAT_NOAA' in noaa_data.columns and not noaa_data['OAT_NOAA'].notnull().any():
+        return hourly_data
     out_df = hourly_data.join(noaa_data)
     return out_df

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline/utils/ConfigManager.py RENAMED Viewed

@@ -4,6 +4,9 @@ import mysql.connector
 import mysql.connector.cursor
 import requests
 from datetime import datetime
+import base64
+import hashlib
+import hmac
 class ConfigManager:
     """
@@ -56,6 +59,8 @@ class ConfigManager:
         self.data_directory = data_directory
         self.api_usr = None
         self.api_pw = None
+        self.api_token = None
+        self.api_secret = None
         self.api_device_id = None
         if self.data_directory is None:
             configured_data_method = False
@@ -74,6 +79,11 @@ class ConfigManager:
                     self.api_pw = configure.get('data', 'api_pw')
                     self.api_device_id = configure.get('data','device_id')
                     configured_data_method = True
+                elif 'api_token' in configure['data'] and 'api_secret' in configure['data']:
+                    self.api_token = configure.get('data', 'api_token')
+                    self.api_secret = configure.get('data', 'api_secret')
+                    self.api_device_id = configure.get('data','device_id')
+                    configured_data_method = True
             if not configured_data_method:
                 raise Exception('data configuration section missing or incomplete in configuration file.')
@@ -261,4 +271,13 @@ class ConfigManager:
     def get_fm_device_id(self) -> str:
         if self.api_device_id is None:
             raise Exception("Field Manager device ID has not been configured.")
-        return self.api_device_id
+        return self.api_device_id
+    def get_skycentrics_token(self, request_str = 'GET /api/devices/ HTTP/1.', date_str : str = None) -> tuple:
+        if date_str is None:
+            date_str = datetime.utcnow().strftime('%a, %d %b %H:%M:%S GMT')
+        signature = base64.b64encode(hmac.new(self.api_secret.encode(),
+            '{}\n{}\n{}\n{}'.format(request_str, date_str, '', hashlib.md5(''.encode()).hexdigest()).encode(),
+            hashlib.sha1).digest())
+        token = '{}:{}'.format(self.api_token, signature.decode())
+        return token, date_str

ecopipeline-1.0.3/src/ecopipeline/utils/pkls/__init__.py ADDED Viewed

File without changes

ecopipeline-1.0.3/src/ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl ADDED Viewed

Binary file

ecopipeline-1.0.3/src/ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl ADDED Viewed

Binary file

ecopipeline-1.0.3/src/ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl ADDED Viewed

Binary file

ecopipeline-1.0.3/src/ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl ADDED Viewed

Binary file

{ecopipeline-0.11.4 → ecopipeline-1.0.3/src/ecopipeline.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.11.4
+Version: 1.0.3
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.11.4 → ecopipeline-1.0.3}/src/ecopipeline.egg-info/SOURCES.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 LICENSE
+MANIFEST.in
 README.md
 pyproject.toml
 setup.cfg
@@ -22,4 +23,9 @@ src/ecopipeline/transform/transform.py
 src/ecopipeline/utils/ConfigManager.py
 src/ecopipeline/utils/NOAADataDownloader.py
 src/ecopipeline/utils/__init__.py
-src/ecopipeline/utils/unit_convert.py
+src/ecopipeline/utils/unit_convert.py
+src/ecopipeline/utils/pkls/__init__.py
+src/ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl
+src/ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl
+src/ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl
+src/ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl

ecopipeline-0.11.4/src/ecopipeline/event_tracking/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- from .event_tracking import *
2	- __all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP']