ecopipeline 0.4.16__py3-none-any.whl → 0.4.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecopipeline/extract/extract.py +22 -13
- ecopipeline/transform/__init__.py +2 -2
- ecopipeline/transform/transform.py +50 -1
- ecopipeline/utils/unit_convert.py +4 -0
- {ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/METADATA +1 -1
- {ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/RECORD +9 -9
- {ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/LICENSE +0 -0
- {ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/WHEEL +0 -0
- {ecopipeline-0.4.16.dist-info → ecopipeline-0.4.18.dist-info}/top_level.txt +0 -0
ecopipeline/extract/extract.py
CHANGED
|
@@ -16,7 +16,7 @@ import requests
|
|
|
16
16
|
import subprocess
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def get_last_full_day_from_db(config : ConfigManager) -> datetime:
|
|
19
|
+
def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
|
|
20
20
|
"""
|
|
21
21
|
Function retrieves the last line from the database with the most recent datetime
|
|
22
22
|
in local time.
|
|
@@ -25,6 +25,8 @@ def get_last_full_day_from_db(config : ConfigManager) -> datetime:
|
|
|
25
25
|
----------
|
|
26
26
|
config : ecopipeline.ConfigManager
|
|
27
27
|
The ConfigManager object that holds configuration data for the pipeline
|
|
28
|
+
table_identifier : str
|
|
29
|
+
Table identifier in config.ini with minute data. Default: "minute"
|
|
28
30
|
|
|
29
31
|
Returns
|
|
30
32
|
-------
|
|
@@ -32,14 +34,14 @@ def get_last_full_day_from_db(config : ConfigManager) -> datetime:
|
|
|
32
34
|
end of last full day populated in database or default past time if no data found
|
|
33
35
|
"""
|
|
34
36
|
# config_dict = get_login_info(["minute"], config)
|
|
35
|
-
table_config_dict = config.get_db_table_info([
|
|
37
|
+
table_config_dict = config.get_db_table_info([table_identifier])
|
|
36
38
|
# db_connection, db_cursor = connect_db(config_info=config_dict['database'])
|
|
37
39
|
db_connection, db_cursor = config.connect_db()
|
|
38
40
|
return_time = datetime(year=2000, month=1, day=9, hour=23, minute=59, second=0).astimezone(timezone('US/Pacific')) # arbitrary default time
|
|
39
41
|
|
|
40
42
|
try:
|
|
41
43
|
db_cursor.execute(
|
|
42
|
-
f"select * from {table_config_dict[
|
|
44
|
+
f"select * from {table_config_dict[table_identifier]['table_name']} order by time_pt DESC LIMIT 1")
|
|
43
45
|
|
|
44
46
|
last_row_data = pd.DataFrame(db_cursor.fetchall())
|
|
45
47
|
if len(last_row_data.index) > 0:
|
|
@@ -98,18 +100,20 @@ def get_db_row_from_time(time: datetime, config : ConfigManager) -> pd.DataFrame
|
|
|
98
100
|
|
|
99
101
|
return row_data
|
|
100
102
|
|
|
101
|
-
def extract_new(startTime: datetime, filenames: List[str], decihex = False, timeZone: str = None, endTime: datetime = None, dateStringStartIdx : int = -17
|
|
103
|
+
def extract_new(startTime: datetime, filenames: List[str], decihex = False, timeZone: str = None, endTime: datetime = None, dateStringStartIdx : int = -17,
|
|
104
|
+
dateStringEndIdx : int = -3, dateFormat : str = "%Y%m%d%H%M%S", epochFormat : bool = False) -> List[str]:
|
|
102
105
|
"""
|
|
103
106
|
Function filters the filenames to only those equal to or newer than the date specified startTime.
|
|
104
107
|
If filenames are in deciheximal, The function can still handel it. Note that for some projects,
|
|
105
108
|
files are dropped at irregular intervals so data cannot be filtered by exact date.
|
|
106
109
|
|
|
107
|
-
Currently, this function expects file names to be in one of
|
|
110
|
+
Currently, this function expects file names to be in one of three formats:
|
|
108
111
|
|
|
109
|
-
1.
|
|
110
|
-
are the files date in the form "%Y%m%d%H%M%S"
|
|
112
|
+
1. default (set decihex = False) format assumes file names are in format such that characters [-17,-3] in the file names string
|
|
113
|
+
are the files date in the form "%Y%m%d%H%M%S"
|
|
111
114
|
2. deciheximal (set decihex = True) format assumes file names are in format such there is a deciheximal value between a '.' and '_' character in each filename string
|
|
112
115
|
that has a deciheximal value equal to the number of seconds since January 1, 1970 to represent the timestamp of the data in the file.
|
|
116
|
+
3. custom format is the same as default format but uses a custom date format with the dateFormat parameter and expects the date to be characters [dateStringStartIdx,dateStringEndIdx]
|
|
113
117
|
|
|
114
118
|
Parameters
|
|
115
119
|
----------
|
|
@@ -125,7 +129,9 @@ def extract_new(startTime: datetime, filenames: List[str], decihex = False, time
|
|
|
125
129
|
time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
|
|
126
130
|
defaults to None
|
|
127
131
|
dateStringStartIdx: int
|
|
128
|
-
The character index in each file where the date in format
|
|
132
|
+
The character index in each file where the date in format starts. Default is -17 (meaning 17 characters from the end of the filename string)
|
|
133
|
+
dateStringEndIdx: int
|
|
134
|
+
The character index in each file where the date in format ends. Default is -3 (meaning 3 characters from the end of the filename string)
|
|
129
135
|
|
|
130
136
|
Returns
|
|
131
137
|
-------
|
|
@@ -145,8 +151,11 @@ def extract_new(startTime: datetime, filenames: List[str], decihex = False, time
|
|
|
145
151
|
|
|
146
152
|
|
|
147
153
|
else:
|
|
148
|
-
|
|
149
|
-
|
|
154
|
+
if epochFormat:
|
|
155
|
+
startTime_int = int(startTime.timestamp())
|
|
156
|
+
else:
|
|
157
|
+
startTime_int = int(startTime.strftime(dateFormat))
|
|
158
|
+
return_list = list(filter(lambda filename: int(filename[dateStringStartIdx:dateStringEndIdx]) >= startTime_int and (endTime is None or int(filename[dateStringStartIdx:dateStringStartIdx+14]) < int(endTime.strftime("%Y%m%d%H%M%S"))), filenames))
|
|
150
159
|
return return_list
|
|
151
160
|
|
|
152
161
|
def extract_files(extension: str, config: ConfigManager, data_sub_dir : str = "", file_prefix : str = "") -> List[str]:
|
|
@@ -791,8 +800,8 @@ def get_noaa_data(station_names: List[str], config : ConfigManager, station_ids
|
|
|
791
800
|
noaa_dfs = _convert_to_df(station_ids, noaa_filenames, weather_directory)
|
|
792
801
|
formatted_dfs = _format_df(station_ids, noaa_dfs)
|
|
793
802
|
except:
|
|
794
|
-
# temporary solution for NOAA ftp not including
|
|
795
|
-
noaa_df = pd.DataFrame(index=pd.date_range(start='
|
|
803
|
+
# temporary solution for NOAA ftp not including 2025
|
|
804
|
+
noaa_df = pd.DataFrame(index=pd.date_range(start='2025-01-01', periods=10, freq='H'))
|
|
796
805
|
noaa_df['conditions'] = None
|
|
797
806
|
noaa_df['airTemp_F'] = None
|
|
798
807
|
noaa_df['dewPoint_F'] = None
|
|
@@ -931,7 +940,7 @@ def _download_noaa_data(stations: dict, weather_directory : str) -> List[str]:
|
|
|
931
940
|
print("FTP ERROR")
|
|
932
941
|
return
|
|
933
942
|
# Download files for each station from 2010 till present year
|
|
934
|
-
for year in range(2010, year_end
|
|
943
|
+
for year in range(2010, year_end):
|
|
935
944
|
# Set FTP credentials and connect
|
|
936
945
|
wd = f"/pub/data/noaa/isd-lite/{year}/"
|
|
937
946
|
ftp_server.cwd(wd)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc
|
|
1
|
+
from .transform import rename_sensors, avg_duplicate_times, remove_outliers, ffill_missing, nullify_erroneous, sensor_adjustment, round_time, aggregate_df, join_to_hourly, concat_last_row, join_to_daily, cop_method_1, cop_method_2, create_summary_tables, remove_partial_days,convert_c_to_f,convert_l_to_g, convert_on_off_col_to_bool, flag_dhw_outage,generate_event_log_df,convert_time_zone, shift_accumulative_columns,heat_output_calc, add_relative_humidity
|
|
2
2
|
from .lbnl import nclarity_filter_new, site_specific, condensate_calculations, gas_valve_diff, gather_outdoor_conditions, aqsuite_prep_time, nclarity_csv_to_df, _add_date, add_local_time, aqsuite_filter_new, get_refrig_charge, elev_correction, change_ID_to_HVAC, get_hvac_state, get_cop_values, get_cfm_values, replace_humidity, create_fan_curves, lbnl_temperature_conversions, lbnl_pressure_conversions, lbnl_sat_calculations, get_site_cfm_info, get_site_info, merge_indexlike_rows
|
|
3
3
|
from .bayview import calculate_cop_values, aggregate_values, get_energy_by_min, verify_power_energy, get_temp_zones120, get_storage_gals120
|
|
4
4
|
__all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_missing", "nullify_erroneous", "sensor_adjustment", "round_time", "aggregate_df", "join_to_hourly", "concat_last_row", "join_to_daily",
|
|
@@ -6,4 +6,4 @@ __all__ = ["rename_sensors", "avg_duplicate_times", "remove_outliers", "ffill_mi
|
|
|
6
6
|
"nclarity_csv_to_df", "_add_date", "add_local_time", "aqsuite_filter_new", "get_refrig_charge", "elev_correction", "change_ID_to_HVAC", "get_hvac_state", "get_cop_values", "get_cfm_values", "replace_humidity",
|
|
7
7
|
"create_fan_curves", "lbnl_temperature_conversions", "lbnl_pressure_conversions", "lbnl_sat_calculations", "get_site_cfm_info", "get_site_info", "merge_indexlike_rows", "calculate_cop_values", "aggregate_values",
|
|
8
8
|
"get_energy_by_min", "verify_power_energy", "get_temp_zones120", "get_storage_gals120","convert_c_to_f","convert_l_to_g", "convert_on_off_col_to_bool", "flag_dhw_outage","generate_event_log_df","convert_time_zone",
|
|
9
|
-
"shift_accumulative_columns","heat_output_calc"]
|
|
9
|
+
"shift_accumulative_columns","heat_output_calc", "add_relative_humidity"]
|
|
@@ -3,7 +3,7 @@ import numpy as np
|
|
|
3
3
|
import datetime as dt
|
|
4
4
|
import csv
|
|
5
5
|
import os
|
|
6
|
-
from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw
|
|
6
|
+
from ecopipeline.utils.unit_convert import temp_c_to_f_non_noaa, volume_l_to_g, power_btuhr_to_kw, temp_f_to_c
|
|
7
7
|
from ecopipeline import ConfigManager
|
|
8
8
|
|
|
9
9
|
pd.set_option('display.max_columns', None)
|
|
@@ -425,6 +425,55 @@ def sensor_adjustment(df: pd.DataFrame, config : ConfigManager) -> pd.DataFrame:
|
|
|
425
425
|
|
|
426
426
|
return df
|
|
427
427
|
|
|
428
|
+
def add_relative_humidity(df : pd.DataFrame, temp_col : str ='airTemp_F', dew_point_col : str ='dewPoint_F', degree_f : bool = True):
|
|
429
|
+
"""
|
|
430
|
+
Add a column for relative humidity to the DataFrame.
|
|
431
|
+
|
|
432
|
+
Parameters
|
|
433
|
+
----------
|
|
434
|
+
df : pd.DataFrame
|
|
435
|
+
DataFrame containing air temperature and dew point temperature.
|
|
436
|
+
temp_col : str
|
|
437
|
+
Column name for air temperature.
|
|
438
|
+
dew_point_col : str
|
|
439
|
+
Column name for dew point temperature.
|
|
440
|
+
degree_f : bool
|
|
441
|
+
True if temperature columns are in °F, false if in °C
|
|
442
|
+
|
|
443
|
+
Returns
|
|
444
|
+
-------
|
|
445
|
+
pd.DataFrame:
|
|
446
|
+
DataFrame with an added column for relative humidity.
|
|
447
|
+
"""
|
|
448
|
+
# Define constants
|
|
449
|
+
A = 6.11
|
|
450
|
+
B = 7.5
|
|
451
|
+
C = 237.3
|
|
452
|
+
|
|
453
|
+
if degree_f:
|
|
454
|
+
df[f"{temp_col}_C"] = df[temp_col].apply(temp_f_to_c)
|
|
455
|
+
df[f"{dew_point_col}_C"] = df[dew_point_col].apply(temp_f_to_c)
|
|
456
|
+
temp_col_c = f"{temp_col}_C"
|
|
457
|
+
dew_point_col_c = f"{dew_point_col}_C"
|
|
458
|
+
else:
|
|
459
|
+
temp_col_c = temp_col
|
|
460
|
+
dew_point_col_c = dew_point_col
|
|
461
|
+
|
|
462
|
+
# Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
|
|
463
|
+
e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
|
|
464
|
+
e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
|
|
465
|
+
|
|
466
|
+
# Calculate relative humidity
|
|
467
|
+
df['relative_humidity'] = (e / e_s) * 100.0
|
|
468
|
+
|
|
469
|
+
# Handle cases where relative humidity exceeds 100% due to rounding
|
|
470
|
+
df['relative_humidity'] = np.clip(df['relative_humidity'], 0.0, 100.0)
|
|
471
|
+
|
|
472
|
+
if degree_f:
|
|
473
|
+
df.drop(columns=[temp_col_c, dew_point_col_c])
|
|
474
|
+
|
|
475
|
+
return df
|
|
476
|
+
|
|
428
477
|
def cop_method_1(df: pd.DataFrame, recircLosses, heatout_primary_column : str = 'HeatOut_Primary', total_input_power_column : str = 'PowerIn_Total') -> pd.DataFrame:
|
|
429
478
|
"""
|
|
430
479
|
Performs COP calculation method 1 (original AWS method).
|
|
@@ -14,6 +14,10 @@ def temp_c_to_f_non_noaa(temp_c : float):
|
|
|
14
14
|
temp_f = 32 + (temp_c * 1.8)
|
|
15
15
|
return temp_f
|
|
16
16
|
|
|
17
|
+
def temp_f_to_c(temp_f : float):
|
|
18
|
+
temp_c = (temp_f - 32) * 5.0 / 9.0
|
|
19
|
+
return temp_c
|
|
20
|
+
|
|
17
21
|
def power_btuhr_to_kw(power_btuhr : float):
|
|
18
22
|
power_kw = power_btuhr / 3412.0
|
|
19
23
|
return power_kw
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
|
|
2
2
|
ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
|
|
3
|
-
ecopipeline/extract/extract.py,sha256=
|
|
3
|
+
ecopipeline/extract/extract.py,sha256=Ldlc3dUEyW8SfIU7KI7rmCYaIXofHm3-vfxB2igpIRE,43365
|
|
4
4
|
ecopipeline/load/__init__.py,sha256=7ipv7GJfZ5382lcrdNm4MyM-WiCEVuRWTqxyzDSZhqg,197
|
|
5
5
|
ecopipeline/load/load.py,sha256=RbGZSsigkChZpX1SZzYvZuS6-DS0k-d2IYUsJvZDvmk,17485
|
|
6
|
-
ecopipeline/transform/__init__.py,sha256=
|
|
6
|
+
ecopipeline/transform/__init__.py,sha256=DcIJfkRs4OmZzDeEfW_OiOIXNqN6CUl1_lW0SS7-eN8,2280
|
|
7
7
|
ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
|
|
8
8
|
ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
|
|
9
|
-
ecopipeline/transform/transform.py,sha256=
|
|
9
|
+
ecopipeline/transform/transform.py,sha256=9jVIFPGa_xVqcIg--2Wj2Fu8WGp3_C6tWGDvBYmt_Jc,43181
|
|
10
10
|
ecopipeline/utils/ConfigManager.py,sha256=t4sfTjGO0g5P50XBQqGVFWaXfAlW1GMDh1DLoBuFGks,9826
|
|
11
11
|
ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
|
|
12
|
-
ecopipeline/utils/unit_convert.py,sha256=
|
|
13
|
-
ecopipeline-0.4.
|
|
14
|
-
ecopipeline-0.4.
|
|
15
|
-
ecopipeline-0.4.
|
|
16
|
-
ecopipeline-0.4.
|
|
17
|
-
ecopipeline-0.4.
|
|
12
|
+
ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
|
|
13
|
+
ecopipeline-0.4.18.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
ecopipeline-0.4.18.dist-info/METADATA,sha256=8lh7Wpk6OSMqe-I9v6EXr9E2giiqsskXUTK9xtmjH1Q,2308
|
|
15
|
+
ecopipeline-0.4.18.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
16
|
+
ecopipeline-0.4.18.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
|
|
17
|
+
ecopipeline-0.4.18.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|