PyPI - ecopipeline - Versions diffs - 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl - Mend

ecopipeline 1.0.2py3-none-any.whl → 1.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ecopipeline/event_tracking/__init__.py CHANGED Viewed

@@ -1,2 +1,3 @@
 from .event_tracking import *
-__all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP']
+__all__ = ['central_alarm_df_creator','flag_boundary_alarms','power_ratio_alarm','flag_abnormal_COP','flag_high_swing_setpoint',
+           'flag_recirc_balance_valve','flag_hp_inlet_temp']

ecopipeline/event_tracking/event_tracking.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import pandas as pd
 import numpy as np
-import datetime as dt
+import datetime as datetime
 from ecopipeline import ConfigManager
 import re
 import mysql.connector.errors as mysqlerrors
@@ -13,29 +13,24 @@ def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config
     day_list = daily_data.index.to_list()
     print('Checking for alarms...')
     alarm_df = _convert_silent_alarm_dict_to_df({})
-    boundary_alarm_df = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
-    pwr_alarm_df = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
-    abnormal_COP_df = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
-    if len(boundary_alarm_df) > 0:
-        print("Boundary alarms detected. Adding them to event df...")
-        alarm_df = boundary_alarm_df
-    else:
-        print("No boundary alarms detected.")
-    if len(pwr_alarm_df) > 0:
-        print("Power alarms detected. Adding them to event df...")
-        alarm_df = pd.concat([alarm_df, pwr_alarm_df])
-    else:
-        print("No power alarms detected.")
-    if _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
-        print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
-    elif len(abnormal_COP_df) > 0:
-        print("Abnormal COPs detected. Adding them to event df...")
-        alarm_df = pd.concat([alarm_df, abnormal_COP_df])
-    else:
-        print("No abnormal COPs.")
+    dict_of_alarms = {}
+    dict_of_alarms['boundary'] = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
+    dict_of_alarms['power ratio'] = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
+    dict_of_alarms['abnormal COP'] = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
+    dict_of_alarms['swing tank setpoint'] = flag_high_swing_setpoint(df, daily_data, config, system=system)
+    dict_of_alarms['recirculation loop balancing valve'] = flag_recirc_balance_valve(daily_data, config, system=system)
+    dict_of_alarms['HPWH inlet temperature'] = flag_hp_inlet_temp(df, daily_data, config, system)
+    ongoing_COP_exception = ['abnormal COP']
+    for key, value in dict_of_alarms.items():
+        if key in ongoing_COP_exception and _check_if_during_ongoing_cop_alarm(daily_data, config, site_name):
+            print("Ongoing DATA_LOSS_COP detected. No further DATA_LOSS_COP events will be uploaded")
+        elif len(value) > 0:
+            print(f"Detected {key} alarm(s). Adding to event df...")
+            alarm_df = pd.concat([alarm_df, value])
+        else:
+            print(f"No {key} alarm(s) detected.")
     return alarm_df
@@ -188,6 +183,377 @@ def flag_boundary_alarms(df: pd.DataFrame, config : ConfigManager, default_fault
     return _convert_silent_alarm_dict_to_df(alarms)
+def flag_high_swing_setpoint(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, default_fault_time : int = 3,
+                             system: str = "", default_setpoint : float = 130.0, default_power_indication : float = 1.0,
+                             default_power_ratio : float = 0.4) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and location of alarm information in a csv,
+    and create an dataframe with applicable alarm events
+    VarNames syntax:
+    STS_T_ID:### - Swing Tank Outlet Temperature. Alarm triggered if over number ### (or 130) for 3 minutes with power on
+    STS_SP_ID:### - Swing Tank Power. ### is lowest recorded power for Swing Tank to be considered 'on'. Defaults to 1.0
+    STS_TP_ID:### - Total System Power for ratio alarming for alarming if swing tank power is more than ### (40% default) of usage
+    STS_ST_ID:### - Swing Tank Setpoint that should not change at all from ### (default 130)
+    Parameters
+    ----------
+    df: pd.DataFrame
+        post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
+        are out of order or have gaps, the function may return erroneous alarms.
+    daily_df: pd.DataFrame
+        post-transformed dataframe for daily data. Used for checking power ratios and determining which days to process.
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
+        called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
+        The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
+        name of each variable in the dataframe that requires alarming and the STS alarm codes (e.g., STS_T_1:140, STS_SP_1:2.0)
+    default_fault_time : int
+        Number of consecutive minutes for T+SP alarms (default 3). T+SP alarms trigger when tank is powered and temperature exceeds
+        setpoint for this many consecutive minutes.
+    system: str
+        string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
+    default_setpoint : float
+        Default temperature setpoint in degrees for T and ST alarm codes when no custom bound is specified (default 130.0)
+    default_power_indication : float
+        Default power threshold in kW for SP alarm codes when no custom bound is specified (default 1.0)
+    default_power_ratio : float
+        Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for TP alarm codes when no custom bound is specified (default 0.4)
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with alarm events
+    """
+    if df.empty:
+        print("cannot flag swing tank setpoint alarms. Dataframe is empty")
+        return pd.DataFrame()
+    variable_names_path = config.get_var_names_path()
+    try:
+        bounds_df = pd.read_csv(variable_names_path)
+    except FileNotFoundError:
+        print("File Not Found: ", variable_names_path)
+        return pd.DataFrame()
+    bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'STS',
+                {'T' : default_setpoint,
+                 'SP': default_power_indication,
+                 'TP': default_power_ratio,
+                 'ST': default_setpoint},
+                system)
+    if bounds_df.empty:
+        return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
+    # Process each unique alarm_code_id
+    alarms = {}
+    for day in daily_df.index:
+        next_day = day + pd.Timedelta(days=1)
+        filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
+        alarmed_for_day = False
+        for alarm_id in bounds_df['alarm_code_id'].unique():
+            id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
+            # Get T and SP alarm codes for this ID
+            t_codes = id_group[id_group['alarm_code_type'] == 'T']
+            sp_codes = id_group[id_group['alarm_code_type'] == 'SP']
+            tp_codes = id_group[id_group['alarm_code_type'] == 'TP']
+            st_codes = id_group[id_group['alarm_code_type'] == 'ST']
+            # Check for multiple T or SP codes with same ID
+            if len(t_codes) > 1 or len(sp_codes) > 1 or len(tp_codes) > 1 or len(st_codes) > 1:
+                raise Exception(f"Improper alarm codes for swing tank setpoint with id {alarm_id}")
+            # Check if we have both T and SP
+            if len(t_codes) == 1 and len(sp_codes) == 1:
+                t_var_name = t_codes.iloc[0]['variable_name']
+                sp_var_name = sp_codes.iloc[0]['variable_name']
+                sp_power_indication = sp_codes.iloc[0]['bound']
+                t_setpoint = t_codes.iloc[0]['bound']
+                # Check if both variables exist in df
+                if t_var_name in filtered_df.columns and sp_var_name in filtered_df.columns:
+                    # Check for consecutive minutes where SP > default_power_indication
+                    # AND T >= default_setpoint
+                    power_mask = filtered_df[sp_var_name] >= sp_power_indication
+                    temp_mask = filtered_df[t_var_name] >= t_setpoint
+                    combined_mask = power_mask & temp_mask
+                    # Check for 3 consecutive minutes
+                    consecutive_condition = combined_mask.rolling(window=default_fault_time).min() == 1
+                    if consecutive_condition.any():
+                        # Get the first index where condition was met
+                        first_true_index = consecutive_condition.idxmax()
+                        # Adjust for the rolling window (first fault_time-1 minutes don't count)
+                        adjusted_time = first_true_index - pd.Timedelta(minutes=default_fault_time-1)
+                        _add_an_alarm(alarms, adjusted_time, sp_var_name, f"High swing tank setpoint: Swing tank was powered at {adjusted_time} although temperature was above {t_setpoint}.")
+                        alarmed_for_day = True
+            if not alarmed_for_day and len(st_codes) == 1:
+                st_var_name = st_codes.iloc[0]['variable_name']
+                st_setpoint = st_codes.iloc[0]['bound']
+                # Check if st_var_name exists in filtered_df
+                if st_var_name in filtered_df.columns:
+                    # Check if setpoint was altered for over 10 minutes
+                    altered_mask = filtered_df[st_var_name] != st_setpoint
+                    consecutive_condition = altered_mask.rolling(window=10).min() == 1
+                    if consecutive_condition.any():
+                        # Get the first index where condition was met
+                        first_true_index = consecutive_condition.idxmax()
+                        # Adjust for the rolling window
+                        adjusted_time = first_true_index - pd.Timedelta(minutes=9)
+                        _add_an_alarm(alarms, day, st_var_name, f"Swing tank setpoint was altered at {adjusted_time}")
+                        alarmed_for_day = True
+            if not alarmed_for_day and len(tp_codes) == 1 and len(sp_codes) == 1:
+                tp_var_name = tp_codes.iloc[0]['variable_name']
+                sp_var_name = sp_codes.iloc[0]['variable_name']
+                tp_ratio = tp_codes.iloc[0]['bound']
+                # Check if both variables exist in df
+                if tp_var_name in daily_df.columns and sp_var_name in daily_df.columns:
+                    # Check if swing tank power ratio exceeds threshold
+                    if day in daily_df.index and daily_df.loc[day, tp_var_name] != 0:
+                        power_ratio = daily_df.loc[day, sp_var_name] / daily_df.loc[day, tp_var_name]
+                        if power_ratio > tp_ratio:
+                            _add_an_alarm(alarms, day, sp_var_name, f"High swing tank power ratio: Swing tank accounted for more than {tp_ratio * 100}% of daily power.")
+    return _convert_silent_alarm_dict_to_df(alarms)
+def flag_recirc_balance_valve(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_ratio : float = 0.4) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and location of alarm information in a csv,
+    and create an dataframe with applicable alarm events
+    VarNames syntax:
+    BV_ER_[OPTIONAL ID] : Indicates a power variable for an ER heater (equipment recirculation)
+    BV_OUT_[OPTIONAL ID]:### - Indicates the heating output variable the ER heating contributes to. Optional ### for the percentage
+        threshold that should not be crossed by the ER elements (default 0.4 for 40%)
+    Parameters
+    ----------
+    daily_df: pd.DataFrame
+        post-transformed dataframe for daily data. Used for checking recirculation balance by comparing sum of ER equipment
+        power to heating output power.
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
+        called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
+        The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
+        name of each variable in the dataframe that requires alarming and the BV alarm codes (e.g., BV_ER_1, BV_OUT_1:0.5)
+    system: str
+        string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
+    default_power_ratio : float
+        Default power ratio threshold (as decimal, e.g., 0.4 for 40%) for OUT alarm codes when no custom bound is specified (default 0.4).
+        Alarm triggers when sum of ER equipment >= (OUT value / default_power_ratio)
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with alarm events
+    """
+    if daily_df.empty:
+        print("cannot flag missing balancing valve alarms. Dataframe is empty")
+        return pd.DataFrame()
+    variable_names_path = config.get_var_names_path()
+    try:
+        bounds_df = pd.read_csv(variable_names_path)
+    except FileNotFoundError:
+        print("File Not Found: ", variable_names_path)
+        return pd.DataFrame()
+    bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'BV',
+                {'OUT' : default_power_ratio},
+                system)
+    if bounds_df.empty:
+        return _convert_silent_alarm_dict_to_df({}) # no BV alarms to look into
+    # Process each unique alarm_code_id
+    alarms = {}
+    for alarm_id in bounds_df['alarm_code_id'].unique():
+        id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
+        out_codes = id_group[id_group['alarm_code_type'] == 'OUT']
+        out_var_name = out_codes.iloc[0]['variable_name']
+        out_bound = out_codes.iloc[0]['bound']
+        er_codes = id_group[id_group['alarm_code_type'] == 'ER']
+        if len(out_codes) > 1 or len(er_codes) < 1:
+            raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
+        for day in daily_df.index:
+            if out_var_name in daily_df.columns:
+                # Get list of ER variable names
+                er_var_names = er_codes['variable_name'].tolist()
+                # Check if all ER variables exist in daily_df
+                if all(var in daily_df.columns for var in er_var_names):
+                    # Sum all ER variables for this day
+                    er_sum = daily_df.loc[day, er_var_names].sum()
+                    out_value = daily_df.loc[day, out_var_name]
+                    # Check if sum of ER >= OUT value
+                    if er_sum >= out_value*out_bound:
+                        _add_an_alarm(alarms, day, out_var_name, f"Recirculation imbalance: Sum of recirculation equipment ({er_sum:.2f}) exceeds or equals {(out_bound * 100):.2f}% of heating output.")
+    return _convert_silent_alarm_dict_to_df(alarms)
+def flag_hp_inlet_temp(df: pd.DataFrame, daily_df: pd.DataFrame, config : ConfigManager, system: str = "", default_power_threshold : float = 1.0,
+                       default_temp_threshold : float = 115.0, fault_time : int = 5) -> pd.DataFrame:
+    """
+    Function will take a pandas dataframe and location of alarm information in a csv,
+    and create an dataframe with applicable alarm events
+    VarNames syntax:
+    HPI_POW_[OPTIONAL ID]:### - Indicates a power variable for the heat pump. ### is the power threshold (default 1.0) above which
+        the heat pump is considered 'on'
+    HPI_T_[OPTIONAL ID]:### - Indicates heat pump inlet temperature variable. ### is the temperature threshold (default 120.0)
+        that should not be exceeded while the heat pump is on
+    Parameters
+    ----------
+    df: pd.DataFrame
+        post-transformed dataframe for minute data. It should be noted that this function expects consecutive, in order minutes. If minutes
+        are out of order or have gaps, the function may return erroneous alarms.
+    daily_df: pd.DataFrame
+        post-transformed dataframe for daily data.
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
+        called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
+        The file must have at least two columns which must be titled "variable_name" and "alarm_codes" which should contain the
+        name of each variable in the dataframe that requires alarming and the HPI alarm codes (e.g., HPI_POW_1:0.5, HPI_T_1:125.0)
+    system: str
+        string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
+    default_power_threshold : float
+        Default power threshold for POW alarm codes when no custom bound is specified (default 0.4). Heat pump is considered 'on'
+        when power exceeds this value.
+    default_temp_threshold : float
+        Default temperature threshold for T alarm codes when no custom bound is specified (default 120.0). Alarm triggers when
+        temperature exceeds this value while heat pump is on.
+    fault_time : int
+        Number of consecutive minutes that both power and temperature must exceed their thresholds before triggering an alarm (default 10).
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas dataframe with alarm events
+    """
+    if df.empty:
+        print("cannot flag missing balancing valve alarms. Dataframe is empty")
+        return pd.DataFrame()
+    variable_names_path = config.get_var_names_path()
+    try:
+        bounds_df = pd.read_csv(variable_names_path)
+    except FileNotFoundError:
+        print("File Not Found: ", variable_names_path)
+        return pd.DataFrame()
+    bounds_df = _process_bounds_df_alarm_codes(bounds_df, 'HPI',
+                                               {'POW' : default_power_threshold,
+                                                'T' : default_temp_threshold},
+                                                system)
+    if bounds_df.empty:
+        return _convert_silent_alarm_dict_to_df({}) # no alarms to look into
+    # Process each unique alarm_code_id
+    alarms = {}
+    for alarm_id in bounds_df['alarm_code_id'].unique():
+        for day in daily_df.index:
+            next_day = day + pd.Timedelta(days=1)
+            filtered_df = df.loc[(df.index >= day) & (df.index < next_day)]
+            id_group = bounds_df[bounds_df['alarm_code_id'] == alarm_id]
+            pow_codes = id_group[id_group['alarm_code_type'] == 'POW']
+            pow_var_name = pow_codes.iloc[0]['variable_name']
+            pow_thresh = pow_codes.iloc[0]['bound']
+            t_codes = id_group[id_group['alarm_code_type'] == 'T']
+            t_var_name = t_codes.iloc[0]['variable_name']
+            t_pretty_name = t_codes.iloc[0]['pretty_name']
+            t_thresh = t_codes.iloc[0]['bound']
+            if len(t_codes) != 1 or len(pow_codes) != 1:
+                raise Exception(f"Improper alarm codes for balancing valve with id {alarm_id}")
+            if pow_var_name in filtered_df.columns and t_var_name in filtered_df.columns:
+                # Check for consecutive minutes where both power and temp exceed thresholds
+                power_mask = filtered_df[pow_var_name] > pow_thresh
+                temp_mask = filtered_df[t_var_name] > t_thresh
+                combined_mask = power_mask & temp_mask
+                # Check for fault_time consecutive minutes
+                consecutive_condition = combined_mask.rolling(window=fault_time).min() == 1
+                if consecutive_condition.any():
+                    first_true_index = consecutive_condition.idxmax()
+                    adjusted_time = first_true_index - pd.Timedelta(minutes=fault_time-1)
+                    _add_an_alarm(alarms, day, t_var_name, f"High heat pump inlet temperature: {t_pretty_name} was above {t_thresh:.1f} while HP was ON starting at {adjusted_time}.")
+    return _convert_silent_alarm_dict_to_df(alarms)
+def _process_bounds_df_alarm_codes(bounds_df : pd.DataFrame, alarm_tag : str, type_default_dict : dict = {}, system : str = "") -> pd.DataFrame:
+    # Should only do for alarm codes of format: [TAG]_[TYPE]_[OPTIONAL_ID]:[BOUND]
+    if (system != ""):
+        if not 'system' in bounds_df.columns:
+            raise Exception("system parameter is non null, however, system is not present in Variable_Names.csv")
+        bounds_df = bounds_df.loc[bounds_df['system'] == system]
+    required_columns = ["variable_name", "alarm_codes"]
+    for required_column in required_columns:
+        if not required_column in bounds_df.columns:
+            raise Exception(f"{required_column} is not present in Variable_Names.csv")
+    if not 'pretty_name' in bounds_df.columns:
+        bounds_df['pretty_name'] = bounds_df['variable_name']
+    else:
+        bounds_df['pretty_name'] = bounds_df['pretty_name'].fillna(bounds_df['variable_name'])
+    bounds_df = bounds_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
+    bounds_df.dropna(axis=0, thresh=2, inplace=True)
+    # Check if all alarm_codes are null or if dataframe is empty
+    if bounds_df.empty or bounds_df['alarm_codes'].isna().all():
+        return pd.DataFrame()
+    bounds_df = bounds_df[bounds_df['alarm_codes'].str.contains(alarm_tag, na=False)]
+    # Split alarm_codes by semicolons and create a row for each STS code
+    expanded_rows = []
+    for idx, row in bounds_df.iterrows():
+        alarm_codes = str(row['alarm_codes']).split(';')
+        tag_codes = [code.strip() for code in alarm_codes if code.strip().startswith(alarm_tag)]
+        if tag_codes:  # Only process if there are STS codes
+            for tag_code in tag_codes:
+                new_row = row.copy()
+                if ":" in tag_code:
+                    tag_parts = tag_code.split(':')
+                    if len(tag_parts) > 2:
+                        raise Exception(f"Improperly formated alarm code : {tag_code}")
+                    new_row['bound'] = tag_parts[1]
+                    tag_code = tag_parts[0]
+                else:
+                    new_row['bound'] = None
+                new_row['alarm_codes'] = tag_code
+                expanded_rows.append(new_row)
+    if expanded_rows:
+        bounds_df = pd.DataFrame(expanded_rows)
+    else:
+        return pd.DataFrame()# no tagged alarms to look into
+    alarm_code_parts = []
+    for idx, row in bounds_df.iterrows():
+        parts = row['alarm_codes'].split('_')
+        if len(parts) == 2:
+            alarm_code_parts.append([parts[1], "No ID"])
+        elif len(parts) == 3:
+            alarm_code_parts.append([parts[1], parts[2]])
+        else:
+            raise Exception(f"improper STS alarm code format for {row['variable_name']}")
+    if alarm_code_parts:
+        bounds_df[['alarm_code_type', 'alarm_code_id']] = pd.DataFrame(alarm_code_parts, index=bounds_df.index)
+        # Replace None bounds with appropriate defaults based on alarm_code_type
+        for idx, row in bounds_df.iterrows():
+            if pd.isna(row['bound']) or row['bound'] is None:
+                if row['alarm_code_type'] in type_default_dict.keys():
+                    bounds_df.at[idx, 'bound'] = type_default_dict[row['alarm_code_type']]
+        # Coerce bound column to float
+        bounds_df['bound'] = pd.to_numeric(bounds_df['bound'], errors='coerce').astype(float)
+    return bounds_df
+def _add_an_alarm(alarm_dict : dict, day : datetime, var_name : str, alarm_string : str):
+    # Round down to beginning of day
+    day = pd.Timestamp(day).normalize()
+    if day in alarm_dict:
+        alarm_dict[day].append([var_name, alarm_string])
+    else:
+        alarm_dict[day] = [[var_name, alarm_string]]
 def _convert_silent_alarm_dict_to_df(alarm_dict : dict) -> pd.DataFrame:
     events = {
         'start_time_pt' : [],

ecopipeline/extract/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
-from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df
+from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df,get_OAT_open_meteo
 __all__ = ["get_noaa_data", "json_to_df", "extract_files", "get_last_full_day_from_db", "get_db_row_from_time", 'extract_new', "csv_to_df", "get_sub_dirs", "msa_to_df", "fm_api_to_df",
-           "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df"]
+           "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df",
+           "get_OAT_open_meteo"]

ecopipeline/extract/extract.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import List
 import pandas as pd
+import openmeteo_requests
 import re
 from ftplib import FTP
 from datetime import datetime, timedelta
@@ -695,17 +696,29 @@ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, end
     try:
         df = pd.DataFrame()
         temp_dfs = []
+        ###############
+        if endTime is None:
+            endTime = datetime.utcnow()
+        if startTime is None:
+            startTime = endTime - timedelta(1)
         time_parser = startTime
         while time_parser < endTime:
-            start_time_str = time_parser.strftime('%a, %d %b %H:%M:%S GMT')
-            skycentrics_token, date_str = config.get_skycentrics_token(request_str=f'GET /api/devices/{config.api_device_id}/data HTTP/1.1',date_str=start_time_str)
-            response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data',
-                                headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/json'})
+            time_parse_end = time_parser + timedelta(1)
+            start_time_str = time_parser.strftime('%Y-%m-%dT%H:%M:%S')
+            end_time_str = time_parse_end.strftime('%Y-%m-%dT%H:%M:%S')
+            skycentrics_token, date_str = config.get_skycentrics_token(
+                request_str=f'GET /api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1 HTTP/1.1',
+                date_str=None)
+            response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data?b={start_time_str}&e={end_time_str}&g=1',
+                                headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/gzip'})
             if response.status_code == 200:
-                norm_data = pd.json_normalize(response.json(), record_path=['sensors'], meta=['time'], meta_prefix='response_')
+                # Decompress the gzip response
+                decompressed_data = gzip.decompress(response.content)
+                # Parse JSON from decompressed data
+                json_data = json.loads(decompressed_data)
+                norm_data = pd.json_normalize(json_data, record_path=['sensors'], meta=['time'], meta_prefix='response_')
                 if len(norm_data) != 0:
-                    norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"])
+                    norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"], utc=True)
                     norm_data["time_pt"] = norm_data["time_pt"].dt.tz_convert(time_zone)
                     norm_data = pd.pivot_table(norm_data, index="time_pt", columns="id", values="data")
@@ -716,8 +729,8 @@ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, end
                     temp_dfs.append(norm_data)
             else:
                 print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
-            time.sleep(60)
-            time_parser = time_parser + timedelta(minutes=1)
+            time_parser = time_parse_end
+        ##############
         if len(temp_dfs) > 0:
             df = pd.concat(temp_dfs, ignore_index=False)
             if create_csv:
@@ -733,7 +746,7 @@ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, end
     except Exception as e:
         print(f"An error occurred: {e}")
         raise e
-    return pd.DataFrame()
+    # return pd.DataFrame()
 def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
     """
@@ -1017,6 +1030,62 @@ def get_sub_dirs(dir: str) -> List[str]:
         return
     return directories
+def get_OAT_open_meteo(lat: float, long: float, start_date: datetime, end_date: datetime = None, time_zone: str = "America/Los_Angeles",
+                       use_noaa_names : bool = True) -> pd.DataFrame:
+    if end_date is None:
+        end_date = datetime.today() - timedelta(1)
+    # datetime.today().date().strftime('%Y%m%d%H%M%S')
+    start_date_str = start_date.date().strftime('%Y-%m-%d')
+    end_date_str = end_date.date().strftime('%Y-%m-%d')
+    print(f"Getting Open Meteao data for {start_date_str} to {end_date_str}")
+    try:
+        openmeteo = openmeteo_requests.Client()
+        url = "https://archive-api.open-meteo.com/v1/archive"
+        params = {
+            "latitude": lat,
+            "longitude": long,
+            "start_date": start_date_str,
+            "end_date": end_date_str,
+            "hourly": "temperature_2m",
+            "temperature_unit": "fahrenheit",
+            "timezone": time_zone,
+        }
+        responses = openmeteo.weather_api(url, params=params)
+        # Process first location. Add a for-loop for multiple locations or weather models
+        response = responses[0]
+        # Process hourly data. The order of variables needs to be the same as requested.
+        hourly = response.Hourly()
+        hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
+        hourly_data = {"time_pt": pd.date_range(
+            start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
+            end =  pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
+            freq = pd.Timedelta(seconds = hourly.Interval()),
+            inclusive = "left"
+        )}
+        hourly_data["temperature_2m"] = hourly_temperature_2m
+        hourly_data["time_pt"] = hourly_data["time_pt"].tz_convert(time_zone).tz_localize(None)
+        hourly_data = pd.DataFrame(hourly_data)
+        hourly_data.set_index('time_pt', inplace = True)
+        if use_noaa_names:
+            hourly_data = hourly_data.rename(columns = {'temperature_2m':'airTemp_F'})
+            hourly_data['dewPoint_F'] = None
+        # Convert float32 to float64 for SQL database compatibility
+        for col in hourly_data.select_dtypes(include=['float32']).columns:
+            hourly_data[col] = hourly_data[col].astype('float64')
+        return hourly_data
+    except Exception as e:
+        print(f'Could not get OAT data: {e}')
+        return pd.DataFrame()
 def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids : dict = {}) -> dict:
     """
@@ -1034,6 +1103,7 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
     dict:
         Dictionary with key as Station Name and Value as DF of Parsed Weather Data
     """
+    #TODO swap out for this if empty: https://open-meteo.com/en/docs/historical-weather-api?start_date=2025-12-29&latitude=47.6&longitude=-122.33&temperature_unit=fahrenheit&end_date=2026-01-04
     formatted_dfs = {}
     weather_directory = config.get_weather_dir_path()
     try:

{ecopipeline-1.0.2.dist-info → ecopipeline-1.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 1.0.2
+Version: 1.0.3
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-1.0.2.dist-info → ecopipeline-1.0.3.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 ecopipeline/__init__.py,sha256=pjC00JWsjVAhS0jUKHD-wyi4UIpTsWbIg9JaxLS1mlc,275
-ecopipeline/event_tracking/__init__.py,sha256=SV2kkvJgptjeyLQlqHWcDRpQO6-JC433_dRZ3H9-ZNU,131
-ecopipeline/event_tracking/event_tracking.py,sha256=NxrGJylD5m5t_OI4VwItbUUbtLW19LrDLy3sjUeQL80,29981
-ecopipeline/extract/__init__.py,sha256=EHJ5lHWuLOwEOOzk5GmgAeZKCLQHDDaiWzLLs-nH7v8,723
-ecopipeline/extract/extract.py,sha256=xK1fg7i0mCN04qCx3AM5VDRpM6ZQ7_2ai0BbQF2WYPk,55306
+ecopipeline/event_tracking/__init__.py,sha256=1saCNVWbcp7bwz1kXfKa2d8aUoGWNWvWSj7IJY4fzc8,219
+ecopipeline/event_tracking/event_tracking.py,sha256=LASkal4MgGLN7UzrAjbKw3eaM9JwKwt3YpkIraRSgiE,51172
+ecopipeline/extract/__init__.py,sha256=j_8-q_yrPRySwYyloMv5v2XQeYYyYfX0N-MW2ZDA4rg,775
+ecopipeline/extract/extract.py,sha256=MykzAchL_0LY0NG9TOAadpm5MSgjn7lPRI8AvSIMUBk,58530
 ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
 ecopipeline/load/load.py,sha256=PaSGWOZI0Xg44_SWN7htn2DPIAU_s8mOtCGibXq25tM,24614
 ecopipeline/transform/__init__.py,sha256=FjGcNpYNEYPdYQhogNRrQlKH2hGNNLv55jig1KaGaHY,2686
@@ -18,8 +18,8 @@ ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl,sha256=9UpCZ3rSu0mU4LoT
 ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl,sha256=Uq6I2dl5GcR5wb5QxurgDP4A2D4-N3neDL3BKtx53A4,2047
 ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl,sha256=CVbUWJvOQXg2nZ-0GP9FWtU-ffMGcwg3ok5q669bmf8,1472
 ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl,sha256=JiEp4SxR9eq-olKd8TKAG37iHMscJE_2SSHizGqBdno,1472
-ecopipeline-1.0.2.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ecopipeline-1.0.2.dist-info/METADATA,sha256=g9VTZ2Gi045tlTwMThfSRt7v56XIubzch7Aasbj32gk,2329
-ecopipeline-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ecopipeline-1.0.2.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
-ecopipeline-1.0.2.dist-info/RECORD,,
+ecopipeline-1.0.3.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ecopipeline-1.0.3.dist-info/METADATA,sha256=zHiCg-0X-XgTFWJYCH_NkIIp-E3izV8-Y5ae0c480_0,2329
+ecopipeline-1.0.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ecopipeline-1.0.3.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
+ecopipeline-1.0.3.dist-info/RECORD,,

{ecopipeline-1.0.2.dist-info → ecopipeline-1.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{ecopipeline-1.0.2.dist-info → ecopipeline-1.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ecopipeline-1.0.2.dist-info → ecopipeline-1.0.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

ecopipeline 1.0.2__py3-none-any.whl → 1.0.3__py3-none-any.whl

ecopipeline 1.0.2py3-none-any.whl → 1.0.3py3-none-any.whl