ecopipeline 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ import requests
16
16
  import subprocess
17
17
 
18
18
 
19
- def get_last_full_day_from_db(config : ConfigManager) -> datetime:
19
+ def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
20
20
  """
21
21
  Function retrieves the last line from the database with the most recent datetime
22
22
  in local time.
@@ -25,6 +25,8 @@ def get_last_full_day_from_db(config : ConfigManager) -> datetime:
25
25
  ----------
26
26
  config : ecopipeline.ConfigManager
27
27
  The ConfigManager object that holds configuration data for the pipeline
28
+ table_identifier : str
29
+ Table identifier in config.ini with minute data. Default: "minute"
28
30
 
29
31
  Returns
30
32
  -------
@@ -32,14 +34,14 @@ def get_last_full_day_from_db(config : ConfigManager) -> datetime:
32
34
  end of last full day populated in database or default past time if no data found
33
35
  """
34
36
  # config_dict = get_login_info(["minute"], config)
35
- table_config_dict = config.get_db_table_info(["minute"])
37
+ table_config_dict = config.get_db_table_info([table_identifier])
36
38
  # db_connection, db_cursor = connect_db(config_info=config_dict['database'])
37
39
  db_connection, db_cursor = config.connect_db()
38
40
  return_time = datetime(year=2000, month=1, day=9, hour=23, minute=59, second=0).astimezone(timezone('US/Pacific')) # arbitrary default time
39
41
 
40
42
  try:
41
43
  db_cursor.execute(
42
- f"select * from {table_config_dict['minute']['table_name']} order by time_pt DESC LIMIT 1")
44
+ f"select * from {table_config_dict[table_identifier]['table_name']} order by time_pt DESC LIMIT 1")
43
45
 
44
46
  last_row_data = pd.DataFrame(db_cursor.fetchall())
45
47
  if len(last_row_data.index) > 0:
@@ -98,18 +100,20 @@ def get_db_row_from_time(time: datetime, config : ConfigManager) -> pd.DataFrame
98
100
 
99
101
  return row_data
100
102
 
101
- def extract_new(startTime: datetime, filenames: List[str], decihex = False, timeZone: str = None, endTime: datetime = None, dateStringStartIdx : int = -17) -> List[str]:
103
+ def extract_new(startTime: datetime, filenames: List[str], decihex = False, timeZone: str = None, endTime: datetime = None, dateStringStartIdx : int = -17,
104
+ dateStringEndIdx : int = -3, dateFormat : str = "%Y%m%d%H%M%S", epochFormat : bool = False) -> List[str]:
102
105
  """
103
106
  Function filters the filenames to only those equal to or newer than the date specified startTime.
104
107
  If filenames are in deciheximal, The function can still handel it. Note that for some projects,
105
108
  files are dropped at irregular intervals so data cannot be filtered by exact date.
106
109
 
107
- Currently, this function expects file names to be in one of two formats:
110
+ Currently, this function expects file names to be in one of three formats:
108
111
 
109
- 1. normal (set decihex = False) format assumes file names are in format such that characters [-17,-3] in the file names string
110
- are the files date in the form "%Y%m%d%H%M%S"
112
+ 1. default (set decihex = False) format assumes file names are in format such that characters [-17,-3] in the file names string
113
+ are the files date in the form "%Y%m%d%H%M%S"
111
114
  2. deciheximal (set decihex = True) format assumes file names are in format such there is a deciheximal value between a '.' and '_' character in each filename string
112
115
  that has a deciheximal value equal to the number of seconds since January 1, 1970 to represent the timestamp of the data in the file.
116
+ 3. custom format is the same as default format but uses a custom date format with the dateFormat parameter and expects the date to be characters [dateStringStartIdx,dateStringEndIdx]
113
117
 
114
118
  Parameters
115
119
  ----------
@@ -125,7 +129,9 @@ def extract_new(startTime: datetime, filenames: List[str], decihex = False, time
125
129
  time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
126
130
  defaults to None
127
131
  dateStringStartIdx: int
128
- The character index in each file where the date in format "%Y%m%d%H%M%S" starts. Default is -17 (meaning 17 characters from the end of the filename string)
132
+ The character index in each file where the date in format starts. Default is -17 (meaning 17 characters from the end of the filename string)
133
+ dateStringEndIdx: int
134
+ The character index in each file where the date in format ends. Default is -3 (meaning 3 characters from the end of the filename string)
129
135
 
130
136
  Returns
131
137
  -------
@@ -145,8 +151,11 @@ def extract_new(startTime: datetime, filenames: List[str], decihex = False, time
145
151
 
146
152
 
147
153
  else:
148
- startTime_int = int(startTime.strftime("%Y%m%d%H%M%S"))
149
- return_list = list(filter(lambda filename: int(filename[dateStringStartIdx:dateStringStartIdx+14]) >= startTime_int and (endTime is None or int(filename[dateStringStartIdx:dateStringStartIdx+14]) < int(endTime.strftime("%Y%m%d%H%M%S"))), filenames))
154
+ if epochFormat:
155
+ startTime_int = int(startTime.timestamp())
156
+ else:
157
+ startTime_int = int(startTime.strftime(dateFormat))
158
+ return_list = list(filter(lambda filename: int(filename[dateStringStartIdx:dateStringEndIdx]) >= startTime_int and (endTime is None or int(filename[dateStringStartIdx:dateStringStartIdx+14]) < int(endTime.strftime("%Y%m%d%H%M%S"))), filenames))
150
159
  return return_list
151
160
 
152
161
  def extract_files(extension: str, config: ConfigManager, data_sub_dir : str = "", file_prefix : str = "") -> List[str]:
@@ -791,8 +800,8 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
791
800
  noaa_dfs = _convert_to_df(station_ids, noaa_filenames, weather_directory)
792
801
  formatted_dfs = _format_df(station_ids, noaa_dfs)
793
802
  except:
794
- # temporary solution for NOAA ftp not including 2024
795
- noaa_df = pd.DataFrame(index=pd.date_range(start='2024-01-01', periods=10, freq='H'))
803
+ # temporary solution for NOAA ftp not including 2025
804
+ noaa_df = pd.DataFrame(index=pd.date_range(start='2025-01-01', periods=10, freq='H'))
796
805
  noaa_df['conditions'] = None
797
806
  noaa_df['airTemp_F'] = None
798
807
  noaa_df['dewPoint_F'] = None
@@ -931,7 +940,7 @@ def _download_noaa_data(stations: dict, weather_directory : str) -> List[str]:
931
940
  print("FTP ERROR")
932
941
  return
933
942
  # Download files for each station from 2010 till present year
934
- for year in range(2010, year_end + 1):
943
+ for year in range(2010, year_end):
935
944
  # Set FTP credentials and connect
936
945
  wd = f"/pub/data/noaa/isd-lite/{year}/"
937
946
  ftp_server.cwd(wd)
@@ -1,4 +1,4 @@
1
- from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc
1
+ from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity
2
2
  from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
3
3
  from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
4
4
  __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
@@ -6,4 +6,4 @@ __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_mi
6
6
  "nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
7
7
  "create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
8
8
  "get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
9
- "shift_accumulative_columns","heat_output_calc"]
9
+ "shift_accumulative_columns","heat_output_calc", "add_relative_humidity"]
@@ -3,7 +3,7 @@ import numpy as np
3
3
  import datetime as dt
4
4
  import csv
5
5
  import os
6
- from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw
6
+ from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw, temp_f_to_c
7
7
  from ecopipeline import ConfigManager
8
8
 
9
9
  pd.set_option('display.max_columns', None)
@@ -425,6 +425,55 @@ def sensor_adjustment(df: pd.DataFrame, config : ConfigManager) -> pd.DataFrame:
425
425
 
426
426
  return df
427
427
 
428
+ def add_relative_humidity(df : pd.DataFrame, temp_col : str ='airTemp_F', dew_point_col : str ='dewPoint_F', degree_f : bool = True):
429
+ """
430
+ Add a column for relative humidity to the DataFrame.
431
+
432
+ Parameters
433
+ ----------
434
+ df : pd.DataFrame
435
+ DataFrame containing air temperature and dew point temperature.
436
+ temp_col : str
437
+ Column name for air temperature.
438
+ dew_point_col : str
439
+ Column name for dew point temperature.
440
+ degree_f : bool
441
+ True if temperature columns are in °F, false if in °C
442
+
443
+ Returns
444
+ -------
445
+ pd.DataFrame:
446
+ DataFrame with an added column for relative humidity.
447
+ """
448
+ # Define constants
449
+ A = 6.11
450
+ B = 7.5
451
+ C = 237.3
452
+
453
+ if degree_f:
454
+ df[f"{temp_col}_C"] = df[temp_col].apply(temp_f_to_c)
455
+ df[f"{dew_point_col}_C"] = df[dew_point_col].apply(temp_f_to_c)
456
+ temp_col_c = f"{temp_col}_C"
457
+ dew_point_col_c = f"{dew_point_col}_C"
458
+ else:
459
+ temp_col_c = temp_col
460
+ dew_point_col_c = dew_point_col
461
+
462
+ # Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
463
+ e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
464
+ e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
465
+
466
+ # Calculate relative humidity
467
+ df['relative_humidity'] = (e / e_s) * 100.0
468
+
469
+ # Handle cases where relative humidity exceeds 100% due to rounding
470
+ df['relative_humidity'] = np.clip(df['relative_humidity'], 0.0, 100.0)
471
+
472
+ if degree_f:
473
+ df.drop(columns=[temp_col_c, dew_point_col_c])
474
+
475
+ return df
476
+
428
477
  def cop_method_1(df: pd.DataFrame, recircLosses, heatout_primary_column : str = 'HeatOut_Primary', total_input_power_column : str = 'PowerIn_Total') -> pd.DataFrame:
429
478
  """
430
479
  Performs COP calculation method 1 (original AWS method).
@@ -14,6 +14,10 @@ def temp_c_to_f_non_noaa(temp_c : float):
14
14
  temp_f = 32 + (temp_c * 1.8)
15
15
  return temp_f
16
16
 
17
+ def temp_f_to_c(temp_f : float):
18
+ temp_c = (temp_f - 32) * 5.0 / 9.0
19
+ return temp_c
20
+
17
21
  def power_btuhr_to_kw(power_btuhr : float):
18
22
  power_kw = power_btuhr / 3412.0
19
23
  return power_kw
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ecopipeline
3
- Version: 0.4.16
3
+ Version: 0.4.18
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,17 +1,17 @@
1
1
  ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
2
2
  ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
3
- ecopipeline/extract/extract.py,sha256=w0bbzhy42gDnkMW320yGV46Wpq642KBKIfGAtc85CVg,42673
3
+ ecopipeline/extract/extract.py,sha256=Ldlc3dUEyW8SfIU7KI7rmCYaIXofHm3-vfxB2igpIRE,43365
4
4
  ecopipeline/load/__init__.py,sha256=7ipv7GJfZ5382lcrdNm4MyM-WiCEVuRWTqxyzDSZhqg,197
5
5
  ecopipeline/load/load.py,sha256=RbGZSsigkChZpX1SZzYvZuS6-DS0k-d2IYUsJvZDvmk,17485
6
- ecopipeline/transform/__init__.py,sha256=BjrmFNeRLMdB8HS7ckV86VE79dGXkY8JbeYidLpOfUc,2232
6
+ ecopipeline/transform/__init__.py,sha256=DcIJfkRs4OmZzDeEfW_OiOIXNqN6CUl1_lW0SS7-eN8,2280
7
7
  ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
8
8
  ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
9
- ecopipeline/transform/transform.py,sha256=OVGV-Js0VAuwjjPmhZCZV_AKHsqslZnuBEuWEbOyOCY,41594
9
+ ecopipeline/transform/transform.py,sha256=9jVIFPGa_xVqcIg--2Wj2Fu8WGp3_C6tWGDvBYmt_Jc,43181
10
10
  ecopipeline/utils/ConfigManager.py,sha256=t4sfTjGO0g5P50XBQqGVFWaXfAlW1GMDh1DLoBuFGks,9826
11
11
  ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
12
- ecopipeline/utils/unit_convert.py,sha256=6wTIpwmM9vQt6WmbCfa5ABITD_-yzcCl2ZKCnl0IFag,2973
13
- ecopipeline-0.4.16.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- ecopipeline-0.4.16.dist-info/METADATA,sha256=ESNjAVFFwIos64TutzHqJe3zOW0FIsTv64dK3d2Hoc0,2308
15
- ecopipeline-0.4.16.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
16
- ecopipeline-0.4.16.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
17
- ecopipeline-0.4.16.dist-info/RECORD,,
12
+ ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
13
+ ecopipeline-0.4.18.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ ecopipeline-0.4.18.dist-info/METADATA,sha256=8lh7Wpk6OSMqe-I9v6EXr9E2giiqsskXUTK9xtmjH1Q,2308
15
+ ecopipeline-0.4.18.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
16
+ ecopipeline-0.4.18.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
17
+ ecopipeline-0.4.18.dist-info/RECORD,,