ecopipeline 0.7.7__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ecopipeline-0.7.7/src/ecopipeline.egg-info → ecopipeline-0.8.1}/PKG-INFO +1 -1
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/setup.cfg +1 -1
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/__init__.py +2 -1
- ecopipeline-0.8.1/src/ecopipeline/event_tracking/__init__.py +2 -0
- ecopipeline-0.8.1/src/ecopipeline/event_tracking/event_tracking.py +253 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/load/load.py +15 -6
- {ecopipeline-0.7.7 → ecopipeline-0.8.1/src/ecopipeline.egg-info}/PKG-INFO +1 -1
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline.egg-info/SOURCES.txt +2 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/LICENSE +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/README.md +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/pyproject.toml +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/setup.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/extract/__init__.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/extract/extract.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/load/__init__.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/transform/__init__.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/transform/bayview.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/transform/lbnl.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/transform/transform.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/utils/ConfigManager.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/utils/__init__.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline/utils/unit_convert.py +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline.egg-info/requires.txt +0 -0
- {ecopipeline-0.7.7 → ecopipeline-0.8.1}/src/ecopipeline.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = ecopipeline
|
|
3
|
-
version = 0.
|
|
3
|
+
version = 0.8.1
|
|
4
4
|
authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
|
|
5
5
|
description = Contains functions for use in Ecotope Datapipelines
|
|
6
6
|
long_description = file: README.md
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from .utils.ConfigManager import ConfigManager
|
|
2
2
|
from . import extract
|
|
3
3
|
from . import transform
|
|
4
|
+
from . import event_tracking
|
|
4
5
|
from . import load
|
|
5
|
-
__all__ = ['extract', 'transform', 'load', 'ConfigManager']
|
|
6
|
+
__all__ = ['extract', 'transform', 'event_tracking', 'load', 'ConfigManager']
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
import datetime as dt
|
|
4
|
+
from ecopipeline import ConfigManager
|
|
5
|
+
|
|
6
|
+
def flag_boundary_alarms(df: pd.DataFrame, daily_df : pd.DataFrame, config : ConfigManager, default_fault_time : int = 15, site: str = "") -> pd.DataFrame:
|
|
7
|
+
"""
|
|
8
|
+
Function will take a pandas dataframe and location of alarm information in a csv,
|
|
9
|
+
and create an dataframe with applicable alarm events
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
df: pd.DataFrame
|
|
14
|
+
post-transformed dataframe for minute data
|
|
15
|
+
daily_df: pd.DataFrame
|
|
16
|
+
post-transformed dataframe for daily data
|
|
17
|
+
config : ecopipeline.ConfigManager
|
|
18
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
19
|
+
called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
20
|
+
The file must have at least three columns which must be titled "variable_name", "low_alarm", and "high_alarm" which should contain the
|
|
21
|
+
name of each variable in the dataframe that requires the alarming, the lower bound for acceptable data, and the upper bound for
|
|
22
|
+
acceptable data respectively
|
|
23
|
+
default_fault_time : int
|
|
24
|
+
Number of consecutive minutes that a sensor must be out of bounds for to trigger an alarm. Can be customized for each variable with
|
|
25
|
+
the fault_time column in Varriable_Names.csv
|
|
26
|
+
site: str
|
|
27
|
+
string of site name if processing a particular site in a Variable_Names.csv file with multiple sites. Leave as an empty string if not aplicable.
|
|
28
|
+
|
|
29
|
+
Returns
|
|
30
|
+
-------
|
|
31
|
+
pd.DataFrame:
|
|
32
|
+
Pandas dataframe with alarm events
|
|
33
|
+
"""
|
|
34
|
+
variable_names_path = config.get_var_names_path()
|
|
35
|
+
try:
|
|
36
|
+
bounds_df = pd.read_csv(variable_names_path)
|
|
37
|
+
except FileNotFoundError:
|
|
38
|
+
print("File Not Found: ", variable_names_path)
|
|
39
|
+
return pd.DataFrame()
|
|
40
|
+
|
|
41
|
+
if (site != ""):
|
|
42
|
+
if not 'site' in bounds_df.columns:
|
|
43
|
+
raise Exception("site parameter is non null, however, site is not present in Variable_Names.csv")
|
|
44
|
+
bounds_df = bounds_df.loc[bounds_df['site'] == site]
|
|
45
|
+
|
|
46
|
+
required_columns = ["variable_name", "high_alarm", "low_alarm"]
|
|
47
|
+
for required_column in required_columns:
|
|
48
|
+
if not required_column in bounds_df.columns:
|
|
49
|
+
raise Exception(f"{required_column} is not present in Variable_Names.csv")
|
|
50
|
+
if not 'pretty_name' in bounds_df.columns:
|
|
51
|
+
bounds_df['pretty_name'] = bounds_df['variable_name']
|
|
52
|
+
if not 'fault_time' in bounds_df.columns:
|
|
53
|
+
bounds_df['fault_time'] = default_fault_time
|
|
54
|
+
bounds_df = bounds_df.loc[:, ["variable_name", "high_alarm", "low_alarm", "fault_time", "pretty_name"]]
|
|
55
|
+
bounds_df.dropna(axis=0, thresh=2, inplace=True)
|
|
56
|
+
bounds_df.set_index(['variable_name'], inplace=True)
|
|
57
|
+
# ensure that lower and upper bounds are numbers
|
|
58
|
+
bounds_df['high_alarm'] = pd.to_numeric(bounds_df['high_alarm'], errors='coerce').astype(float)
|
|
59
|
+
bounds_df['low_alarm'] = pd.to_numeric(bounds_df['low_alarm'], errors='coerce').astype(float)
|
|
60
|
+
bounds_df['fault_time'] = pd.to_numeric(bounds_df['fault_time'], errors='coerce').astype('Int64')
|
|
61
|
+
bounds_df = bounds_df[bounds_df.index.notnull()]
|
|
62
|
+
alarms = {}
|
|
63
|
+
for bound_var, bounds in bounds_df.iterrows():
|
|
64
|
+
if bound_var in df.columns:
|
|
65
|
+
lower_mask = df[bound_var] < bounds["low_alarm"]
|
|
66
|
+
upper_mask = df[bound_var] > bounds["high_alarm"]
|
|
67
|
+
if pd.isna(bounds['fault_time']):
|
|
68
|
+
bounds['fault_time'] = default_fault_time
|
|
69
|
+
for day in daily_df.index:
|
|
70
|
+
next_day = day + pd.Timedelta(days=1)
|
|
71
|
+
# low alert
|
|
72
|
+
low_filtered_df = lower_mask.loc[(lower_mask.index >= day) & (lower_mask.index < next_day)]
|
|
73
|
+
low_consecutive_condition = low_filtered_df.rolling(window=bounds["fault_time"]).min() == 1
|
|
74
|
+
if low_consecutive_condition.any():
|
|
75
|
+
first_true_index = low_consecutive_condition.idxmax()
|
|
76
|
+
adjusted_time = first_true_index - pd.Timedelta(minutes=bounds["fault_time"]-1)
|
|
77
|
+
alarm_string = f"Lower bound alarm for {bounds['pretty_name']} (first one at {adjusted_time.strftime('%H:%M')})."
|
|
78
|
+
if day in alarms:
|
|
79
|
+
alarms[day].append([bound_var, alarm_string])
|
|
80
|
+
else:
|
|
81
|
+
alarms[day] = [[bound_var, alarm_string]]
|
|
82
|
+
# high alert
|
|
83
|
+
up_filtered_df = upper_mask.loc[(upper_mask.index >= day) & (upper_mask.index < next_day)]
|
|
84
|
+
up_consecutive_condition = up_filtered_df.rolling(window=bounds["fault_time"]).min() == 1
|
|
85
|
+
if up_consecutive_condition.any():
|
|
86
|
+
first_true_index = up_consecutive_condition.idxmax()
|
|
87
|
+
adjusted_time = first_true_index - pd.Timedelta(minutes=bounds["fault_time"]-1)
|
|
88
|
+
alarm_string = f"Upper bound alarm for {bounds['pretty_name']} (first one at {adjusted_time.strftime('%H:%M')})."
|
|
89
|
+
if day in alarms:
|
|
90
|
+
alarms[day].append([bound_var, alarm_string])
|
|
91
|
+
else:
|
|
92
|
+
alarms[day] = [[bound_var, alarm_string]]
|
|
93
|
+
events = {
|
|
94
|
+
'start_time_pt' : [],
|
|
95
|
+
'end_time_pt' : [],
|
|
96
|
+
'event_type' : [],
|
|
97
|
+
'event_detail' : [],
|
|
98
|
+
'variable_name' : []
|
|
99
|
+
}
|
|
100
|
+
for key, value_list in alarms.items():
|
|
101
|
+
for value in value_list:
|
|
102
|
+
events['start_time_pt'].append(key)
|
|
103
|
+
events['end_time_pt'].append(key)
|
|
104
|
+
events['event_type'].append('SILENT_ALARM')
|
|
105
|
+
events['event_detail'].append(value[1])
|
|
106
|
+
events['variable_name'].append(value[0])
|
|
107
|
+
|
|
108
|
+
event_df = pd.DataFrame(events)
|
|
109
|
+
event_df.set_index('start_time_pt', inplace=True)
|
|
110
|
+
return event_df
|
|
111
|
+
|
|
112
|
+
# def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column : str, supply_temp : int = 110, consecutive_minutes : int = 15) -> pd.DataFrame:
|
|
113
|
+
# """
|
|
114
|
+
# Parameters
|
|
115
|
+
# ----------
|
|
116
|
+
# df : pd.DataFrame
|
|
117
|
+
# Single pandas dataframe of sensor data on minute intervals.
|
|
118
|
+
# daily_df : pd.DataFrame
|
|
119
|
+
# Single pandas dataframe of sensor data on daily intervals.
|
|
120
|
+
# dhw_outlet_column : str
|
|
121
|
+
# Name of the column in df and daily_df that contains temperature of DHW supplied to building occupants
|
|
122
|
+
# supply_temp : int
|
|
123
|
+
# the minimum DHW temperature acceptable to supply to building occupants
|
|
124
|
+
# consecutive_minutes : int
|
|
125
|
+
# the number of minutes in a row that DHW is not delivered to tenants to qualify as a DHW Outage
|
|
126
|
+
|
|
127
|
+
# Returns
|
|
128
|
+
# -------
|
|
129
|
+
# event_df : pd.DataFrame
|
|
130
|
+
# Dataframe with 'ALARM' events on the days in which there was a DHW Outage.
|
|
131
|
+
# """
|
|
132
|
+
# # TODO edge case for outage that spans over a day
|
|
133
|
+
# events = {
|
|
134
|
+
# 'start_time_pt' : [],
|
|
135
|
+
# 'end_time_pt' : [],
|
|
136
|
+
# 'event_type' : [],
|
|
137
|
+
# 'event_detail' : [],
|
|
138
|
+
# }
|
|
139
|
+
# mask = df[dhw_outlet_column] < supply_temp
|
|
140
|
+
# for day in daily_df.index:
|
|
141
|
+
# next_day = day + pd.Timedelta(days=1)
|
|
142
|
+
# filtered_df = mask.loc[(mask.index >= day) & (mask.index < next_day)]
|
|
143
|
+
|
|
144
|
+
# consecutive_condition = filtered_df.rolling(window=consecutive_minutes).min() == 1
|
|
145
|
+
# if consecutive_condition.any():
|
|
146
|
+
# # first_true_index = consecutive_condition['supply_temp'].idxmax()
|
|
147
|
+
# first_true_index = consecutive_condition.idxmax()
|
|
148
|
+
# adjusted_time = first_true_index - pd.Timedelta(minutes=consecutive_minutes-1)
|
|
149
|
+
# events['start_time_pt'].append(day)
|
|
150
|
+
# events['end_time_pt'].append(next_day - pd.Timedelta(minutes=1))
|
|
151
|
+
# events['event_type'].append("ALARM")
|
|
152
|
+
# events['event_detail'].append(f"Hot Water Outage Occured (first one starting at {adjusted_time.strftime('%H:%M')})")
|
|
153
|
+
# event_df = pd.DataFrame(events)
|
|
154
|
+
# event_df.set_index('start_time_pt', inplace=True)
|
|
155
|
+
# return event_df
|
|
156
|
+
|
|
157
|
+
# def generate_event_log_df(config : ConfigManager):
|
|
158
|
+
# """
|
|
159
|
+
# Creates an event log df based on user submitted events in an event log csv
|
|
160
|
+
# Parameters
|
|
161
|
+
# ----------
|
|
162
|
+
# config : ecopipeline.ConfigManager
|
|
163
|
+
# The ConfigManager object that holds configuration data for the pipeline.
|
|
164
|
+
|
|
165
|
+
# Returns
|
|
166
|
+
# -------
|
|
167
|
+
# event_df : pd.DataFrame
|
|
168
|
+
# Dataframe formatted from events in Event_log.csv for pipeline.
|
|
169
|
+
# """
|
|
170
|
+
# event_filename = config.get_event_log_path()
|
|
171
|
+
# try:
|
|
172
|
+
# event_df = pd.read_csv(event_filename)
|
|
173
|
+
# event_df['start_time_pt'] = pd.to_datetime(event_df['start_time_pt'])
|
|
174
|
+
# event_df['end_time_pt'] = pd.to_datetime(event_df['end_time_pt'])
|
|
175
|
+
# event_df.set_index('start_time_pt', inplace=True)
|
|
176
|
+
# return event_df
|
|
177
|
+
# except Exception as e:
|
|
178
|
+
# print(f"Error processing file {event_filename}: {e}")
|
|
179
|
+
# return pd.DataFrame({
|
|
180
|
+
# 'start_time_pt' : [],
|
|
181
|
+
# 'end_time_pt' : [],
|
|
182
|
+
# 'event_type' : [],
|
|
183
|
+
# 'event_detail' : [],
|
|
184
|
+
# })
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
189
|
+
# """
|
|
190
|
+
# Function must be called on the raw minute data df after the rename_varriables() and before the ffill_missing() function has been called.
|
|
191
|
+
# The function returns a dataframe indexed by day. Each column will expanded to 3 columns, appended with '_missing_mins', '_avg_gap', and
|
|
192
|
+
# '_max_gap' respectively. the columns will carry the following statisctics:
|
|
193
|
+
# _missing_mins -> the number of minutes in the day that have no reported data value for the column
|
|
194
|
+
# _avg_gap -> the average gap (in minutes) between collected data values that day
|
|
195
|
+
# _max_gap -> the maximum gap (in minutes) between collected data values that day
|
|
196
|
+
|
|
197
|
+
# Parameters
|
|
198
|
+
# ----------
|
|
199
|
+
# df : pd.DataFrame
|
|
200
|
+
# minute data df after the rename_varriables() and before the ffill_missing() function has been called
|
|
201
|
+
|
|
202
|
+
# Returns
|
|
203
|
+
# -------
|
|
204
|
+
# daily_data_stats : pd.DataFrame
|
|
205
|
+
# new dataframe with the columns descriped in the function's description
|
|
206
|
+
# """
|
|
207
|
+
# min_time = df.index.min()
|
|
208
|
+
# start_day = min_time.floor('D')
|
|
209
|
+
|
|
210
|
+
# # If min_time is not exactly at the start of the day, move to the next day
|
|
211
|
+
# if min_time != start_day:
|
|
212
|
+
# start_day = start_day + pd.tseries.offsets.Day(1)
|
|
213
|
+
|
|
214
|
+
# # Build a complete minutely timestamp index over the full date range
|
|
215
|
+
# full_index = pd.date_range(start=start_day,
|
|
216
|
+
# end=df.index.max().floor('D') - pd.Timedelta(minutes=1),
|
|
217
|
+
# freq='T')
|
|
218
|
+
|
|
219
|
+
# # Reindex to include any completely missing minutes
|
|
220
|
+
# df_full = df.reindex(full_index)
|
|
221
|
+
|
|
222
|
+
# # Resample daily to count missing values per column
|
|
223
|
+
# total_missing = df_full.isna().resample('D').sum().astype(int)
|
|
224
|
+
|
|
225
|
+
# # Function to calculate max consecutive missing values
|
|
226
|
+
# def max_consecutive_nans(x):
|
|
227
|
+
# is_na = x.isna()
|
|
228
|
+
# groups = (is_na != is_na.shift()).cumsum()
|
|
229
|
+
# return is_na.groupby(groups).sum().max() or 0
|
|
230
|
+
|
|
231
|
+
# # Function to calculate average consecutive missing values
|
|
232
|
+
# def avg_consecutive_nans(x):
|
|
233
|
+
# is_na = x.isna()
|
|
234
|
+
# groups = (is_na != is_na.shift()).cumsum()
|
|
235
|
+
# gap_lengths = is_na.groupby(groups).sum()
|
|
236
|
+
# gap_lengths = gap_lengths[gap_lengths > 0]
|
|
237
|
+
# if len(gap_lengths) == 0:
|
|
238
|
+
# return 0
|
|
239
|
+
# return gap_lengths.mean()
|
|
240
|
+
|
|
241
|
+
# # Apply daily, per column
|
|
242
|
+
# max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
|
|
243
|
+
# avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
|
|
244
|
+
|
|
245
|
+
# # Rename columns to include a suffix
|
|
246
|
+
# total_missing = total_missing.add_suffix('_missing_mins')
|
|
247
|
+
# max_consec_missing = max_consec_missing.add_suffix('_max_gap')
|
|
248
|
+
# avg_consec_missing = avg_consec_missing.add_suffix('_avg_gap')
|
|
249
|
+
|
|
250
|
+
# # Concatenate along columns (axis=1)
|
|
251
|
+
# combined_df = pd.concat([total_missing, max_consec_missing, avg_consec_missing], axis=1)
|
|
252
|
+
|
|
253
|
+
# return combined_df
|
|
@@ -319,8 +319,8 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
|
|
|
319
319
|
site_name = config.get_site_name()
|
|
320
320
|
column_names = f"start_time_pt,site_name"
|
|
321
321
|
column_types = ["datetime","varchar(25)","datetime",
|
|
322
|
-
"ENUM('MISC_EVENT','DATA_LOSS','DATA_LOSS_COP','SITE_VISIT','SYSTEM_MAINTENANCE','EQUIPMENT_MALFUNCTION','PARTIAL_OCCUPANCY','INSTALLATION_ERROR','ALARM','MV_COMMISSIONED','PLANT_COMMISSIONED','INSTALLATION_ERROR_COP','SOO_PERIOD','SOO_PERIOD_COP','SYSTEM_TESTING')",
|
|
323
|
-
"varchar(
|
|
322
|
+
"ENUM('MISC_EVENT','DATA_LOSS','DATA_LOSS_COP','SITE_VISIT','SYSTEM_MAINTENANCE','EQUIPMENT_MALFUNCTION','PARTIAL_OCCUPANCY','INSTALLATION_ERROR','ALARM','SILENT_ALARM','MV_COMMISSIONED','PLANT_COMMISSIONED','INSTALLATION_ERROR_COP','SOO_PERIOD','SOO_PERIOD_COP','SYSTEM_TESTING')",
|
|
323
|
+
"varchar(800)"]
|
|
324
324
|
column_list = ['end_time_pt','event_type', 'event_detail']
|
|
325
325
|
if not set(column_list).issubset(event_df.columns):
|
|
326
326
|
raise Exception(f"event_df should contain a dataframe with columns start_time_pt, end_time_pt, event_type, and event_detail. Instead, found dataframe with columns {event_df.columns}")
|
|
@@ -329,21 +329,26 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
|
|
|
329
329
|
column_names += "," + column
|
|
330
330
|
|
|
331
331
|
# create SQL statement
|
|
332
|
-
insert_str = "INSERT INTO " + table_name + " (" + column_names + ", last_modified_date, last_modified_by) VALUES (%s,%s,%s,%s,%s,'"+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+"','automatic_upload')"
|
|
332
|
+
insert_str = "INSERT INTO " + table_name + " (" + column_names + ", variable_name, last_modified_date, last_modified_by) VALUES (%s,%s,%s,%s,%s,%s,'"+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+"','automatic_upload')"
|
|
333
333
|
|
|
334
|
+
if not 'variable_name' in event_df.columns:
|
|
335
|
+
event_df['variable_name'] = None
|
|
334
336
|
# add aditional columns for db creation
|
|
335
337
|
full_column_names = column_names.split(",")[1:]
|
|
336
338
|
full_column_names.append('last_modified_date')
|
|
337
339
|
full_column_names.append('last_modified_by')
|
|
340
|
+
full_column_names.append('variable_name')
|
|
338
341
|
full_column_types = column_types[1:]
|
|
339
342
|
full_column_types.append('datetime')
|
|
340
343
|
full_column_types.append('varchar(60)')
|
|
344
|
+
full_column_types.append('varchar(70)')
|
|
341
345
|
|
|
342
346
|
|
|
343
347
|
existing_rows = pd.DataFrame({
|
|
344
348
|
'start_time_pt' : [],
|
|
345
349
|
'end_time_pt' : [],
|
|
346
350
|
'event_type' : [],
|
|
351
|
+
'variable_name' : [],
|
|
347
352
|
'last_modified_by' : []
|
|
348
353
|
})
|
|
349
354
|
|
|
@@ -358,9 +363,9 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
|
|
|
358
363
|
try:
|
|
359
364
|
# find existing times in database for upsert statement
|
|
360
365
|
cursor.execute(
|
|
361
|
-
f"SELECT id, start_time_pt, end_time_pt, event_type, last_modified_by FROM {table_name} WHERE start_time_pt >= '{event_df.index.min()}' AND site_name = '{site_name}'")
|
|
366
|
+
f"SELECT id, start_time_pt, end_time_pt, event_detail, event_type, variable_name, last_modified_by FROM {table_name} WHERE start_time_pt >= '{event_df.index.min()}' AND site_name = '{site_name}'")
|
|
362
367
|
# Fetch the results into a DataFrame
|
|
363
|
-
existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id','start_time_pt', 'end_time_pt', 'event_type', 'last_modified_by'])
|
|
368
|
+
existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id','start_time_pt', 'end_time_pt', 'event_detail', 'event_type', 'variable_name', 'last_modified_by'])
|
|
364
369
|
existing_rows['start_time_pt'] = pd.to_datetime(existing_rows['start_time_pt'])
|
|
365
370
|
existing_rows['end_time_pt'] = pd.to_datetime(existing_rows['end_time_pt'])
|
|
366
371
|
|
|
@@ -371,7 +376,7 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
|
|
|
371
376
|
ignoredRows = 0
|
|
372
377
|
try:
|
|
373
378
|
for index, row in event_df.iterrows():
|
|
374
|
-
time_data = [index,site_name,row['end_time_pt'],row['event_type'],row['event_detail']]
|
|
379
|
+
time_data = [index,site_name,row['end_time_pt'],row['event_type'],row['event_detail'],row['variable_name']]
|
|
375
380
|
#remove nans and infinites
|
|
376
381
|
time_data = [None if (x is None or pd.isna(x)) else x for x in time_data]
|
|
377
382
|
time_data = [None if (x == float('inf') or x == float('-inf')) else x for x in time_data]
|
|
@@ -379,6 +384,10 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
|
|
|
379
384
|
(existing_rows['start_time_pt'] == index) &
|
|
380
385
|
(existing_rows['event_type'] == row['event_type'])
|
|
381
386
|
]
|
|
387
|
+
if not time_data[-1] is None and not filtered_existing_rows.empty:
|
|
388
|
+
filtered_existing_rows = filtered_existing_rows[(filtered_existing_rows['variable_name'] == row['variable_name']) &
|
|
389
|
+
(filtered_existing_rows['event_detail'] == row['event_detail'])]
|
|
390
|
+
|
|
382
391
|
if not filtered_existing_rows.empty:
|
|
383
392
|
first_matching_row = filtered_existing_rows.iloc[0] # Retrieves the first row
|
|
384
393
|
statement, values = _generate_mysql_update_event_table(row, first_matching_row['id'])
|
|
@@ -9,6 +9,8 @@ src/ecopipeline.egg-info/SOURCES.txt
|
|
|
9
9
|
src/ecopipeline.egg-info/dependency_links.txt
|
|
10
10
|
src/ecopipeline.egg-info/requires.txt
|
|
11
11
|
src/ecopipeline.egg-info/top_level.txt
|
|
12
|
+
src/ecopipeline/event_tracking/__init__.py
|
|
13
|
+
src/ecopipeline/event_tracking/event_tracking.py
|
|
12
14
|
src/ecopipeline/extract/__init__.py
|
|
13
15
|
src/ecopipeline/extract/extract.py
|
|
14
16
|
src/ecopipeline/load/__init__.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|