PyPI - ecopipeline - Versions diffs - 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl - Mend

ecopipeline 0.4.16py3-none-any.whl → 0.4.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

ecopipeline/extract/extract.py CHANGED Viewed

@@ -16,7 +16,7 @@ import requests
 import subprocess
-def get_last_full_day_from_db(config : ConfigManager) -> datetime:
+def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
     """
     Function retrieves the last line from the database with the most recent datetime
     in local time.
@@ -25,6 +25,8 @@ def get_last_full_day_from_db(config : ConfigManager) -> datetime:
     ----------
     config : ecopipeline.ConfigManager
         The ConfigManager object that holds configuration data for the pipeline
+    table_identifier : str
+        Table identifier in config.ini with minute data. Default: "minute"
     Returns
     -------
@@ -32,14 +34,14 @@ def get_last_full_day_from_db(config : ConfigManager) -> datetime:
         end of last full day populated in database or default past time if no data found
     """
     # config_dict = get_login_info(["minute"], config)
-    table_config_dict = config.get_db_table_info(["minute"])
+    table_config_dict = config.get_db_table_info([table_identifier])
     # db_connection, db_cursor = connect_db(config_info=config_dict['database'])
     db_connection, db_cursor = config.connect_db()
     return_time = datetime(year=2000, month=1, day=9, hour=23, minute=59, second=0).astimezone(timezone('US/Pacific')) # arbitrary default time
     try:
         db_cursor.execute(
-            f"select * from {table_config_dict['minute']['table_name']} order by time_pt DESC LIMIT 1")
+            f"select * from {table_config_dict[table_identifier]['table_name']} order by time_pt DESC LIMIT 1")
         last_row_data = pd.DataFrame(db_cursor.fetchall())
         if len(last_row_data.index) > 0:
@@ -98,18 +100,20 @@ def get_db_row_from_time(time: datetime, config : ConfigManager) -> pd.DataFrame
     return row_data
-def extract_new(startTime: datetime, filenames: List[str], decihex = False, timeZone: str = None, endTime: datetime = None, dateStringStartIdx : int = -17) -> List[str]:
+def extract_new(startTime: datetime, filenames: List[str], decihex = False, timeZone: str = None, endTime: datetime = None, dateStringStartIdx : int = -17,
+                dateStringEndIdx : int = -3, dateFormat : str = "%Y%m%d%H%M%S", epochFormat : bool = False) -> List[str]:
     """
     Function filters the filenames to only those equal to or newer than the date specified startTime.
     If filenames are in deciheximal, The function can still handel it. Note that for some projects,
     files are dropped at irregular intervals so data cannot be filtered by exact date.
-    Currently, this function expects file names to be in one of two formats:
+    Currently, this function expects file names to be in one of three formats:
-    1. normal (set decihex = False) format assumes file names are in format such that characters [-17,-3] in the file names string
-        are the files date in the form "%Y%m%d%H%M%S"
+    1. default (set decihex = False) format assumes file names are in format such that characters [-17,-3] in the file names string
+        are the files date in the form "%Y%m%d%H%M%S"
     2. deciheximal (set decihex = True) format assumes file names are in format such there is a deciheximal value between a '.' and '_' character in each filename string
         that has a deciheximal value equal to the number of seconds since January 1, 1970 to represent the timestamp of the data in the file.
+    3. custom format is the same as default format but uses a custom date format with the dateFormat parameter and expects the date to be characters [dateStringStartIdx,dateStringEndIdx]
     Parameters
     ----------
@@ -125,7 +129,9 @@ def extract_new(startTime: datetime, filenames: List[str], decihex = False, time
         time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
         defaults to None
     dateStringStartIdx: int
-        The character index in each file where the date in format "%Y%m%d%H%M%S" starts. Default is -17 (meaning 17 characters from the end of the filename string)
+        The character index in each file where the date in format starts. Default is -17 (meaning 17 characters from the end of the filename string)
+    dateStringEndIdx: int
+        The character index in each file where the date in format ends. Default is -3 (meaning 3 characters from the end of the filename string)
     Returns
     -------
@@ -145,8 +151,11 @@ def extract_new(startTime: datetime, filenames: List[str], decihex = False, time
     else:
-        startTime_int = int(startTime.strftime("%Y%m%d%H%M%S"))
-        return_list = list(filter(lambda filename: int(filename[dateStringStartIdx:dateStringStartIdx+14]) >= startTime_int and (endTime is None or int(filename[dateStringStartIdx:dateStringStartIdx+14]) < int(endTime.strftime("%Y%m%d%H%M%S"))), filenames))
+        if epochFormat:
+            startTime_int = int(startTime.timestamp())
+        else:
+            startTime_int = int(startTime.strftime(dateFormat))
+        return_list = list(filter(lambda filename: int(filename[dateStringStartIdx:dateStringEndIdx]) >= startTime_int and (endTime is None or int(filename[dateStringStartIdx:dateStringStartIdx+14]) < int(endTime.strftime("%Y%m%d%H%M%S"))), filenames))
     return return_list
 def extract_files(extension: str, config: ConfigManager, data_sub_dir : str = "", file_prefix : str = "") -> List[str]:
@@ -791,8 +800,8 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
         noaa_dfs = _convert_to_df(station_ids, noaa_filenames, weather_directory)
         formatted_dfs = _format_df(station_ids, noaa_dfs)
     except:
-        # temporary solution for NOAA ftp not including 2024
-        noaa_df = pd.DataFrame(index=pd.date_range(start='2024-01-01', periods=10, freq='H'))
+        # temporary solution for NOAA ftp not including 2025
+        noaa_df = pd.DataFrame(index=pd.date_range(start='2025-01-01', periods=10, freq='H'))
         noaa_df['conditions'] = None
         noaa_df['airTemp_F'] = None
         noaa_df['dewPoint_F'] = None
@@ -931,7 +940,7 @@ def _download_noaa_data(stations: dict, weather_directory : str) -> List[str]:
         print("FTP ERROR")
         return
     # Download files for each station from 2010 till present year
-    for year in range(2010, year_end + 1):
+    for year in range(2010, year_end):
         # Set FTP credentials and connect
         wd = f"/pub/data/noaa/isd-lite/{year}/"
         ftp_server.cwd(wd)

ecopipeline/transform/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc
+from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity
 from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
 from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
 __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
@@ -6,4 +6,4 @@ __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_mi
            "nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
            "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
            "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
-           "shift_accumulative_columns","heat_output_calc"]
+           "shift_accumulative_columns","heat_output_calc", "add_relative_humidity"]

ecopipeline/transform/transform.py CHANGED Viewed

@@ -3,7 +3,7 @@ import numpy as np
 import datetime as dt
 import csv
 import os
-from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw
+from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw, temp_f_to_c
 from ecopipeline import ConfigManager
 pd.set_option('display.max_columns', None)
@@ -425,6 +425,55 @@ def sensor_adjustment(df: pd.DataFrame, config : ConfigManager) -> pd.DataFrame:
     return df
+def add_relative_humidity(df : pd.DataFrame, temp_col : str ='airTemp_F', dew_point_col : str ='dewPoint_F', degree_f : bool = True):
+    """
+    Add a column for relative humidity to the DataFrame.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        DataFrame containing air temperature and dew point temperature.
+    temp_col : str
+        Column name for air temperature.
+    dew_point_col : str
+        Column name for dew point temperature.
+    degree_f : bool
+        True if temperature columns are in °F, false if in °C
+    Returns
+    -------
+    pd.DataFrame:
+        DataFrame with an added column for relative humidity.
+    """
+    # Define constants
+    A = 6.11
+    B = 7.5
+    C = 237.3
+    if degree_f:
+        df[f"{temp_col}_C"] = df[temp_col].apply(temp_f_to_c)
+        df[f"{dew_point_col}_C"] = df[dew_point_col].apply(temp_f_to_c)
+        temp_col_c = f"{temp_col}_C"
+        dew_point_col_c = f"{dew_point_col}_C"
+    else:
+        temp_col_c = temp_col
+        dew_point_col_c = dew_point_col
+    # Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
+    e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
+    e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
+    # Calculate relative humidity
+    df['relative_humidity'] = (e / e_s) * 100.0
+    # Handle cases where relative humidity exceeds 100% due to rounding
+    df['relative_humidity'] = np.clip(df['relative_humidity'], 0.0, 100.0)
+    if degree_f:
+        df.drop(columns=[temp_col_c, dew_point_col_c])
+    return df
 def cop_method_1(df: pd.DataFrame, recircLosses, heatout_primary_column : str = 'HeatOut_Primary', total_input_power_column : str = 'PowerIn_Total') -> pd.DataFrame:
     """
     Performs COP calculation method 1 (original AWS method).

ecopipeline/utils/unit_convert.py CHANGED Viewed

@@ -14,6 +14,10 @@ def temp_c_to_f_non_noaa(temp_c : float):
     temp_f = 32 + (temp_c * 1.8)
     return temp_f
+def temp_f_to_c(temp_f : float):
+    temp_c = (temp_f - 32) * 5.0 / 9.0
+    return temp_c
 def power_btuhr_to_kw(power_btuhr : float):
     power_kw = power_btuhr / 3412.0
     return power_kw

{ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ecopipeline
-Version: 0.4.16
+Version: 0.4.18
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
 ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
-ecopipeline/extract/extract.py,sha256=w0bbzhy42gDnkMW320yGV46Wpq642KBKIfGAtc85CVg,42673
+ecopipeline/extract/extract.py,sha256=Ldlc3dUEyW8SfIU7KI7rmCYaIXofHm3-vfxB2igpIRE,43365
 ecopipeline/load/__init__.py,sha256=7ipv7GJfZ5382lcrdNm4MyM-WiCEVuRWTqxyzDSZhqg,197
 ecopipeline/load/load.py,sha256=RbGZSsigkChZpX1SZzYvZuS6-DS0k-d2IYUsJvZDvmk,17485
-ecopipeline/transform/__init__.py,sha256=BjrmFNeRLMdB8HS7ckV86VE79dGXkY8JbeYidLpOfUc,2232
+ecopipeline/transform/__init__.py,sha256=DcIJfkRs4OmZzDeEfW_OiOIXNqN6CUl1_lW0SS7-eN8,2280
 ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
 ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
-ecopipeline/transform/transform.py,sha256=OVGV-Js0VAuwjjPmhZCZV_AKHsqslZnuBEuWEbOyOCY,41594
+ecopipeline/transform/transform.py,sha256=9jVIFPGa_xVqcIg--2Wj2Fu8WGp3_C6tWGDvBYmt_Jc,43181
 ecopipeline/utils/ConfigManager.py,sha256=t4sfTjGO0g5P50XBQqGVFWaXfAlW1GMDh1DLoBuFGks,9826
 ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
-ecopipeline/utils/unit_convert.py,sha256=6wTIpwmM9vQt6WmbCfa5ABITD_-yzcCl2ZKCnl0IFag,2973
-ecopipeline-0.4.16.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ecopipeline-0.4.16.dist-info/METADATA,sha256=ESNjAVFFwIos64TutzHqJe3zOW0FIsTv64dK3d2Hoc0,2308
-ecopipeline-0.4.16.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-ecopipeline-0.4.16.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
-ecopipeline-0.4.16.dist-info/RECORD,,
+ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
+ecopipeline-0.4.18.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ecopipeline-0.4.18.dist-info/METADATA,sha256=8lh7Wpk6OSMqe-I9v6EXr9E2giiqsskXUTK9xtmjH1Q,2308
+ecopipeline-0.4.18.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
+ecopipeline-0.4.18.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
+ecopipeline-0.4.18.dist-info/RECORD,,

{ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/LICENSE RENAMED Viewed

File without changes

{ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/top_level.txt RENAMED Viewed

File without changes

ecopipeline 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl

ecopipeline 0.4.16py3-none-any.whl → 0.4.18py3-none-any.whl