ecopipeline 0.10.2__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecopipeline/__init__.py +1 -0
- ecopipeline/event_tracking/event_tracking.py +136 -18
- ecopipeline/utils/NOAADataDownloader.py +498 -0
- ecopipeline/utils/__init__.py +2 -1
- {ecopipeline-0.10.2.dist-info → ecopipeline-0.11.1.dist-info}/METADATA +1 -1
- {ecopipeline-0.10.2.dist-info → ecopipeline-0.11.1.dist-info}/RECORD +9 -8
- {ecopipeline-0.10.2.dist-info → ecopipeline-0.11.1.dist-info}/WHEEL +0 -0
- {ecopipeline-0.10.2.dist-info → ecopipeline-0.11.1.dist-info}/licenses/LICENSE +0 -0
- {ecopipeline-0.10.2.dist-info → ecopipeline-0.11.1.dist-info}/top_level.txt +0 -0
ecopipeline/__init__.py
CHANGED
|
@@ -4,15 +4,17 @@ import datetime as dt
|
|
|
4
4
|
from ecopipeline import ConfigManager
|
|
5
5
|
import re
|
|
6
6
|
import mysql.connector.errors as mysqlerrors
|
|
7
|
+
from datetime import timedelta
|
|
7
8
|
|
|
8
9
|
def central_alarm_df_creator(df: pd.DataFrame, daily_data : pd.DataFrame, config : ConfigManager, system: str = "",
|
|
9
10
|
default_cop_high_bound : float = 4.5, default_cop_low_bound : float = 0,
|
|
10
|
-
default_boundary_fault_time : int = 15, site_name : str = None
|
|
11
|
+
default_boundary_fault_time : int = 15, site_name : str = None, day_table_name_header : str = "day",
|
|
12
|
+
power_ratio_period_days : int = 7) -> pd.DataFrame:
|
|
11
13
|
day_list = daily_data.index.to_list()
|
|
12
14
|
print('Checking for alarms...')
|
|
13
15
|
alarm_df = _convert_silent_alarm_dict_to_df({})
|
|
14
16
|
boundary_alarm_df = flag_boundary_alarms(df, config, full_days=day_list, system=system, default_fault_time= default_boundary_fault_time)
|
|
15
|
-
pwr_alarm_df = power_ratio_alarm(daily_data, config, system=system)
|
|
17
|
+
pwr_alarm_df = power_ratio_alarm(daily_data, config, day_table_name = config.get_table_name(day_table_name_header), system=system, ratio_period_days=power_ratio_period_days)
|
|
16
18
|
abnormal_COP_df = flag_abnormal_COP(daily_data, config, system = system, default_high_bound=default_cop_high_bound, default_low_bound=default_cop_low_bound)
|
|
17
19
|
|
|
18
20
|
if len(boundary_alarm_df) > 0:
|
|
@@ -251,7 +253,31 @@ def _check_and_add_alarm(df : pd.DataFrame, mask : pd.Series, alarms_dict, day,
|
|
|
251
253
|
else:
|
|
252
254
|
alarms_dict[day] = [[var_name, alarm_string]]
|
|
253
255
|
|
|
254
|
-
def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, system: str = "", verbose : bool = False) -> pd.DataFrame:
|
|
256
|
+
def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, day_table_name : str, system: str = "", verbose : bool = False, ratio_period_days : int = 7) -> pd.DataFrame:
|
|
257
|
+
"""
|
|
258
|
+
Function will take a pandas dataframe of daily data and location of alarm information in a csv,
|
|
259
|
+
and create an dataframe with applicable alarm events
|
|
260
|
+
|
|
261
|
+
Parameters
|
|
262
|
+
----------
|
|
263
|
+
daily_df: pd.DataFrame
|
|
264
|
+
post-transformed dataframe for daily data. It should be noted that this function expects consecutive, in order days. If days
|
|
265
|
+
are out of order or have gaps, the function may return erroneous alarms.
|
|
266
|
+
config : ecopipeline.ConfigManager
|
|
267
|
+
The ConfigManager object that holds configuration data for the pipeline. Among other things, this object will point to a file
|
|
268
|
+
called Varriable_Names.csv in the input folder of the pipeline (e.g. "full/path/to/pipeline/input/Variable_Names.csv").
|
|
269
|
+
The file must have at least two columns which must be titled "variable_name", "alarm_codes" which should contain the
|
|
270
|
+
name of each variable in the dataframe that requires the alarming and the ratio alarm code in the form "PR_{Power Ratio Name}:{low percentage}-{high percentage}
|
|
271
|
+
system: str
|
|
272
|
+
string of system name if processing a particular system in a Variable_Names.csv file with multiple systems. Leave as an empty string if not aplicable.
|
|
273
|
+
verbose : bool
|
|
274
|
+
add print statements in power ratio
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
pd.DataFrame:
|
|
279
|
+
Pandas dataframe with alarm events, empty if no alarms triggered
|
|
280
|
+
"""
|
|
255
281
|
daily_df_copy = daily_df.copy()
|
|
256
282
|
variable_names_path = config.get_var_names_path()
|
|
257
283
|
try:
|
|
@@ -274,8 +300,15 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, system: st
|
|
|
274
300
|
ratios_df = ratios_df.loc[:, ["variable_name", "alarm_codes", "pretty_name"]]
|
|
275
301
|
ratios_df = ratios_df[ratios_df['alarm_codes'].str.contains('PR', na=False)]
|
|
276
302
|
ratios_df.dropna(axis=0, thresh=2, inplace=True)
|
|
277
|
-
|
|
303
|
+
if ratio_period_days > 1:
|
|
304
|
+
if verbose:
|
|
305
|
+
print(f"adding last {ratio_period_days} to daily_df")
|
|
306
|
+
daily_df_copy = _append_previous_days_to_df(daily_df_copy, config, ratio_period_days, day_table_name)
|
|
307
|
+
elif ratio_period_days < 1:
|
|
308
|
+
print("power ratio alarm period, ratio_period_days, must be more than 1")
|
|
309
|
+
return pd.DataFrame()
|
|
278
310
|
|
|
311
|
+
ratios_df.set_index(['variable_name'], inplace=True)
|
|
279
312
|
ratio_dict = {}
|
|
280
313
|
for ratios_var, ratios in ratios_df.iterrows():
|
|
281
314
|
if not ratios_var in daily_df_copy.columns:
|
|
@@ -297,26 +330,111 @@ def power_ratio_alarm(daily_df: pd.DataFrame, config : ConfigManager, system: st
|
|
|
297
330
|
ratio_dict[pr_id] = [[ratios_var],[float(low_high[0])],[float(low_high[1])],[ratios['pretty_name']]]
|
|
298
331
|
if verbose:
|
|
299
332
|
print("ratio_dict keys:", ratio_dict.keys())
|
|
333
|
+
# Create blocks of ratio_period_days
|
|
334
|
+
blocks_df = _create_period_blocks(daily_df_copy, ratio_period_days, verbose)
|
|
335
|
+
|
|
336
|
+
if blocks_df.empty:
|
|
337
|
+
print("No complete blocks available for analysis")
|
|
338
|
+
return pd.DataFrame()
|
|
339
|
+
|
|
300
340
|
alarms = {}
|
|
301
341
|
for key, value_list in ratio_dict.items():
|
|
302
|
-
|
|
342
|
+
# Calculate total for each block
|
|
343
|
+
blocks_df[key] = blocks_df[value_list[0]].sum(axis=1)
|
|
303
344
|
for i in range(len(value_list[0])):
|
|
304
345
|
column_name = value_list[0][i]
|
|
305
|
-
|
|
346
|
+
# Calculate ratio for each block
|
|
347
|
+
blocks_df[f'{column_name}_{key}'] = (blocks_df[column_name]/blocks_df[key]) * 100
|
|
306
348
|
if verbose:
|
|
307
|
-
print(f"
|
|
308
|
-
|
|
309
|
-
return _convert_silent_alarm_dict_to_df(alarms)
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
349
|
+
print(f"Block ratios for {column_name}_{key}:", blocks_df[f'{column_name}_{key}'])
|
|
350
|
+
_check_and_add_ratio_alarm_blocks(blocks_df, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i], ratio_period_days)
|
|
351
|
+
return _convert_silent_alarm_dict_to_df(alarms)
|
|
352
|
+
# alarms = {}
|
|
353
|
+
# for key, value_list in ratio_dict.items():
|
|
354
|
+
# daily_df_copy[key] = daily_df_copy[value_list[0]].sum(axis=1)
|
|
355
|
+
# for i in range(len(value_list[0])):
|
|
356
|
+
# column_name = value_list[0][i]
|
|
357
|
+
# daily_df_copy[f'{column_name}_{key}'] = (daily_df_copy[column_name]/daily_df_copy[key]) * 100
|
|
358
|
+
# if verbose:
|
|
359
|
+
# print(f"Ratios for {column_name}_{key}",daily_df_copy[f'{column_name}_{key}'])
|
|
360
|
+
# _check_and_add_ratio_alarm(daily_df_copy, key, column_name, value_list[3][i], alarms, value_list[2][i], value_list[1][i])
|
|
361
|
+
# return _convert_silent_alarm_dict_to_df(alarms)
|
|
362
|
+
|
|
363
|
+
# def _check_and_add_ratio_alarm(daily_df: pd.DataFrame, alarm_key : str, column_name : str, pretty_name : str, alarms_dict : dict, high_bound : float, low_bound : float):
|
|
364
|
+
# alarm_daily_df = daily_df.loc[(daily_df[f"{column_name}_{alarm_key}"] < low_bound) | (daily_df[f"{column_name}_{alarm_key}"] > high_bound)]
|
|
365
|
+
# if not alarm_daily_df.empty:
|
|
366
|
+
# for day, values in alarm_daily_df.iterrows():
|
|
367
|
+
# alarm_str = f"Power ratio alarm: {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
|
|
368
|
+
# if day in alarms_dict:
|
|
369
|
+
# alarms_dict[day].append([column_name, alarm_str])
|
|
370
|
+
# else:
|
|
371
|
+
# alarms_dict[day] = [[column_name, alarm_str]]
|
|
372
|
+
def _check_and_add_ratio_alarm_blocks(blocks_df: pd.DataFrame, alarm_key: str, column_name: str, pretty_name: str, alarms_dict: dict, high_bound: float, low_bound: float, ratio_period_days: int):
|
|
373
|
+
"""
|
|
374
|
+
Check for alarms in block-based ratios and add to alarms dictionary.
|
|
375
|
+
"""
|
|
376
|
+
alarm_blocks_df = blocks_df.loc[(blocks_df[f"{column_name}_{alarm_key}"] < low_bound) | (blocks_df[f"{column_name}_{alarm_key}"] > high_bound)]
|
|
377
|
+
if not alarm_blocks_df.empty:
|
|
378
|
+
for block_end_date, values in alarm_blocks_df.iterrows():
|
|
379
|
+
alarm_str = f"Power ratio alarm ({ratio_period_days}-day block ending {block_end_date.strftime('%Y-%m-%d')}): {pretty_name} accounted for {round(values[f'{column_name}_{alarm_key}'], 2)}% of {alarm_key} energy use. {round(low_bound, 2)}-{round(high_bound, 2)}% of {alarm_key} energy use expected."
|
|
380
|
+
if block_end_date in alarms_dict:
|
|
381
|
+
alarms_dict[block_end_date].append([column_name, alarm_str])
|
|
318
382
|
else:
|
|
319
|
-
alarms_dict[
|
|
383
|
+
alarms_dict[block_end_date] = [[column_name, alarm_str]]
|
|
384
|
+
|
|
385
|
+
def _create_period_blocks(daily_df: pd.DataFrame, ratio_period_days: int, verbose: bool = False) -> pd.DataFrame:
|
|
386
|
+
"""
|
|
387
|
+
Create blocks of ratio_period_days by summing values within each block.
|
|
388
|
+
Each block will be represented by its end date.
|
|
389
|
+
"""
|
|
390
|
+
if len(daily_df) < ratio_period_days:
|
|
391
|
+
if verbose:
|
|
392
|
+
print(f"Not enough data for {ratio_period_days}-day blocks. Need at least {ratio_period_days} days, have {len(daily_df)}")
|
|
393
|
+
return pd.DataFrame()
|
|
394
|
+
|
|
395
|
+
blocks = []
|
|
396
|
+
block_dates = []
|
|
397
|
+
|
|
398
|
+
# Create blocks by summing consecutive groups of ratio_period_days
|
|
399
|
+
for i in range(ratio_period_days - 1, len(daily_df)):
|
|
400
|
+
start_idx = i - ratio_period_days + 1
|
|
401
|
+
end_idx = i + 1
|
|
402
|
+
|
|
403
|
+
block_data = daily_df.iloc[start_idx:end_idx].sum()
|
|
404
|
+
blocks.append(block_data)
|
|
405
|
+
# Use the end date of the block as the identifier
|
|
406
|
+
block_dates.append(daily_df.index[i])
|
|
407
|
+
|
|
408
|
+
if not blocks:
|
|
409
|
+
return pd.DataFrame()
|
|
410
|
+
|
|
411
|
+
blocks_df = pd.DataFrame(blocks, index=block_dates)
|
|
412
|
+
|
|
413
|
+
if verbose:
|
|
414
|
+
print(f"Created {len(blocks_df)} blocks of {ratio_period_days} days each")
|
|
415
|
+
print(f"Block date range: {blocks_df.index.min()} to {blocks_df.index.max()}")
|
|
416
|
+
|
|
417
|
+
return blocks_df
|
|
418
|
+
|
|
419
|
+
def _append_previous_days_to_df(daily_df: pd.DataFrame, config : ConfigManager, ratio_period_days : int, day_table_name : str, primary_key : str = "time_pt") -> pd.DataFrame:
|
|
420
|
+
db_connection, cursor = config.connect_db()
|
|
421
|
+
period_start = daily_df.index.min() - timedelta(ratio_period_days)
|
|
422
|
+
try:
|
|
423
|
+
# find existing times in database for upsert statement
|
|
424
|
+
cursor.execute(
|
|
425
|
+
f"SELECT * FROM {day_table_name} WHERE {primary_key} < '{daily_df.index.min()}' AND {primary_key} >= '{period_start}'")
|
|
426
|
+
result = cursor.fetchall()
|
|
427
|
+
column_names = [desc[0] for desc in cursor.description]
|
|
428
|
+
old_days_df = pd.DataFrame(result, columns=column_names)
|
|
429
|
+
old_days_df = old_days_df.set_index(primary_key)
|
|
430
|
+
daily_df = pd.concat([daily_df, old_days_df])
|
|
431
|
+
daily_df = daily_df.sort_index(ascending=True)
|
|
432
|
+
except mysqlerrors.Error:
|
|
433
|
+
print(f"Table {day_table_name} has no data.")
|
|
434
|
+
|
|
435
|
+
db_connection.close()
|
|
436
|
+
cursor.close()
|
|
437
|
+
return daily_df
|
|
320
438
|
|
|
321
439
|
# def flag_dhw_outage(df: pd.DataFrame, daily_df : pd.DataFrame, dhw_outlet_column : str, supply_temp : int = 110, consecutive_minutes : int = 15) -> pd.DataFrame:
|
|
322
440
|
# """
|
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import pandas as pd
|
|
3
|
+
# from datetime import datetime, timedelta
|
|
4
|
+
# import os
|
|
5
|
+
# import gzip
|
|
6
|
+
# import urllib.request
|
|
7
|
+
from io import StringIO
|
|
8
|
+
|
|
9
|
+
class NOAADataDownloader:
|
|
10
|
+
def __init__(self, station_code, api_token=None):
|
|
11
|
+
"""
|
|
12
|
+
Initialize downloader for a specific weather station
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
station_code (str): Airport code (e.g., 'KLAX', 'LAX', 'JFK', 'ORD')
|
|
16
|
+
api_token (str, optional): NOAA API token for daily data access
|
|
17
|
+
"""
|
|
18
|
+
self.station_code = station_code.upper().strip()
|
|
19
|
+
self.api_token = api_token
|
|
20
|
+
self.base_url = "https://www.ncdc.noaa.gov/cdo-web/api/v2/"
|
|
21
|
+
|
|
22
|
+
# Clean airport code - add K if not present for US airports
|
|
23
|
+
if len(self.station_code) == 3 and not self.station_code.startswith('K'):
|
|
24
|
+
self.station_code = 'K' + self.station_code
|
|
25
|
+
|
|
26
|
+
# Find station information
|
|
27
|
+
self.station_info = self._find_station_info()
|
|
28
|
+
|
|
29
|
+
if not self.station_info:
|
|
30
|
+
raise ValueError(f"Could not find weather station for {station_code}")
|
|
31
|
+
|
|
32
|
+
print(f"Initialized downloader for: {self.station_info['name']}")
|
|
33
|
+
if self.station_info.get('usaf') and self.station_info.get('wban'):
|
|
34
|
+
print(f"ISD Station ID: {self.station_info['usaf']}-{self.station_info['wban']}")
|
|
35
|
+
if self.station_info.get('ghcn_id'):
|
|
36
|
+
print(f"GHCN-D Station ID: {self.station_info['ghcn_id']}")
|
|
37
|
+
|
|
38
|
+
def _find_station_info(self):
|
|
39
|
+
"""Find station information for the given airport code"""
|
|
40
|
+
|
|
41
|
+
# First try common stations mapping
|
|
42
|
+
common_stations = self._get_common_stations()
|
|
43
|
+
if self.station_code in common_stations:
|
|
44
|
+
return common_stations[self.station_code]
|
|
45
|
+
|
|
46
|
+
# Try searching ISD station history
|
|
47
|
+
isd_station = self._search_isd_stations()
|
|
48
|
+
if isd_station:
|
|
49
|
+
return isd_station
|
|
50
|
+
|
|
51
|
+
# Try API search if token available
|
|
52
|
+
if self.api_token:
|
|
53
|
+
api_station = self._search_api_stations()
|
|
54
|
+
if api_station:
|
|
55
|
+
return api_station
|
|
56
|
+
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
def _get_common_stations(self):
|
|
60
|
+
"""Return mapping of common airport codes to station information"""
|
|
61
|
+
return {
|
|
62
|
+
'KLAX': {
|
|
63
|
+
'name': 'LOS ANGELES INTERNATIONAL AIRPORT',
|
|
64
|
+
'usaf': '722950',
|
|
65
|
+
'wban': '23174',
|
|
66
|
+
'ghcn_id': 'GHCND:USW00023174',
|
|
67
|
+
'latitude': 33.938,
|
|
68
|
+
'longitude': -118.389,
|
|
69
|
+
'elevation': 32.0
|
|
70
|
+
},
|
|
71
|
+
'KJFK': {
|
|
72
|
+
'name': 'JOHN F KENNEDY INTERNATIONAL AIRPORT',
|
|
73
|
+
'usaf': '744860',
|
|
74
|
+
'wban': '94789',
|
|
75
|
+
'ghcn_id': 'GHCND:USW00094789',
|
|
76
|
+
'latitude': 40.640,
|
|
77
|
+
'longitude': -73.779,
|
|
78
|
+
'elevation': 3.4
|
|
79
|
+
},
|
|
80
|
+
'KORD': {
|
|
81
|
+
'name': 'CHICAGO OHARE INTERNATIONAL AIRPORT',
|
|
82
|
+
'usaf': '725300',
|
|
83
|
+
'wban': '94846',
|
|
84
|
+
'ghcn_id': 'GHCND:USW00094846',
|
|
85
|
+
'latitude': 41.995,
|
|
86
|
+
'longitude': -87.934,
|
|
87
|
+
'elevation': 201.5
|
|
88
|
+
},
|
|
89
|
+
'KDEN': {
|
|
90
|
+
'name': 'DENVER INTERNATIONAL AIRPORT',
|
|
91
|
+
'usaf': '725650',
|
|
92
|
+
'wban': '03017',
|
|
93
|
+
'ghcn_id': 'GHCND:USW00003017',
|
|
94
|
+
'latitude': 39.833,
|
|
95
|
+
'longitude': -104.65,
|
|
96
|
+
'elevation': 1640.0
|
|
97
|
+
},
|
|
98
|
+
'KATL': {
|
|
99
|
+
'name': 'HARTSFIELD JACKSON ATLANTA INTERNATIONAL AIRPORT',
|
|
100
|
+
'usaf': '722190',
|
|
101
|
+
'wban': '13874',
|
|
102
|
+
'ghcn_id': 'GHCND:USW00013874',
|
|
103
|
+
'latitude': 33.640,
|
|
104
|
+
'longitude': -84.427,
|
|
105
|
+
'elevation': 308.5
|
|
106
|
+
},
|
|
107
|
+
'KMIA': {
|
|
108
|
+
'name': 'MIAMI INTERNATIONAL AIRPORT',
|
|
109
|
+
'usaf': '722020',
|
|
110
|
+
'wban': '12839',
|
|
111
|
+
'ghcn_id': 'GHCND:USW00012839',
|
|
112
|
+
'latitude': 25.793,
|
|
113
|
+
'longitude': -80.290,
|
|
114
|
+
'elevation': 11.0
|
|
115
|
+
},
|
|
116
|
+
'KSEA': {
|
|
117
|
+
'name': 'SEATTLE TACOMA INTERNATIONAL AIRPORT',
|
|
118
|
+
'usaf': '727930',
|
|
119
|
+
'wban': '24233',
|
|
120
|
+
'ghcn_id': 'GHCND:USW00024233',
|
|
121
|
+
'latitude': 47.449,
|
|
122
|
+
'longitude': -122.309,
|
|
123
|
+
'elevation': 131.1
|
|
124
|
+
},
|
|
125
|
+
'KBOS': {
|
|
126
|
+
'name': 'BOSTON LOGAN INTERNATIONAL AIRPORT',
|
|
127
|
+
'usaf': '725090',
|
|
128
|
+
'wban': '14739',
|
|
129
|
+
'ghcn_id': 'GHCND:USW00014739',
|
|
130
|
+
'latitude': 42.361,
|
|
131
|
+
'longitude': -71.020,
|
|
132
|
+
'elevation': 6.1
|
|
133
|
+
},
|
|
134
|
+
'KPHX': {
|
|
135
|
+
'name': 'PHOENIX SKY HARBOR INTERNATIONAL AIRPORT',
|
|
136
|
+
'usaf': '722780',
|
|
137
|
+
'wban': '23183',
|
|
138
|
+
'ghcn_id': 'GHCND:USW00023183',
|
|
139
|
+
'latitude': 33.434,
|
|
140
|
+
'longitude': -112.008,
|
|
141
|
+
'elevation': 337.1
|
|
142
|
+
},
|
|
143
|
+
'KLAS': {
|
|
144
|
+
'name': 'LAS VEGAS MCCARRAN INTERNATIONAL AIRPORT',
|
|
145
|
+
'usaf': '723860',
|
|
146
|
+
'wban': '23169',
|
|
147
|
+
'ghcn_id': 'GHCND:USW00023169',
|
|
148
|
+
'latitude': 36.080,
|
|
149
|
+
'longitude': -115.152,
|
|
150
|
+
'elevation': 664.1
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
def _search_isd_stations(self):
|
|
155
|
+
"""Search ISD station history for the airport"""
|
|
156
|
+
try:
|
|
157
|
+
url = "https://www.ncei.noaa.gov/data/global-hourly/doc/isd-history.csv"
|
|
158
|
+
response = requests.get(url, timeout=10)
|
|
159
|
+
response.raise_for_status()
|
|
160
|
+
|
|
161
|
+
df = pd.read_csv(StringIO(response.text))
|
|
162
|
+
|
|
163
|
+
# Search for airport code in station name
|
|
164
|
+
search_terms = [
|
|
165
|
+
self.station_code.replace('K', ''), # LAX from KLAX
|
|
166
|
+
self.station_code, # KLAX
|
|
167
|
+
self.station_code + ' ', # Exact match with space
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
for term in search_terms:
|
|
171
|
+
mask = df['STATION NAME'].str.contains(term, case=False, na=False)
|
|
172
|
+
matches = df[mask]
|
|
173
|
+
|
|
174
|
+
if not matches.empty:
|
|
175
|
+
# Take the first match with recent data
|
|
176
|
+
best_match = matches.iloc[0]
|
|
177
|
+
|
|
178
|
+
return {
|
|
179
|
+
'name': best_match['STATION NAME'],
|
|
180
|
+
'usaf': str(best_match['USAF']).zfill(6),
|
|
181
|
+
'wban': str(best_match['WBAN']).zfill(5),
|
|
182
|
+
'country': best_match['CTRY'],
|
|
183
|
+
'state': best_match.get('STATE', ''),
|
|
184
|
+
'latitude': best_match['LAT'],
|
|
185
|
+
'longitude': best_match['LON'],
|
|
186
|
+
'elevation': best_match['ELEV(M)'],
|
|
187
|
+
'begin_date': str(best_match['BEGIN']),
|
|
188
|
+
'end_date': str(best_match['END'])
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
return None
|
|
192
|
+
|
|
193
|
+
except Exception as e:
|
|
194
|
+
print(f"ISD search failed: {e}")
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
def _search_api_stations(self):
|
|
198
|
+
"""Search for stations using NOAA API"""
|
|
199
|
+
if not self.api_token:
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
try:
|
|
203
|
+
url = f"{self.base_url}stations"
|
|
204
|
+
params = {'limit': 100, 'format': 'json'}
|
|
205
|
+
headers = {"token": self.api_token}
|
|
206
|
+
|
|
207
|
+
response = requests.get(url, params=params, headers=headers, timeout=10)
|
|
208
|
+
response.raise_for_status()
|
|
209
|
+
|
|
210
|
+
data = response.json()
|
|
211
|
+
if 'results' in data:
|
|
212
|
+
search_terms = [self.station_code.replace('K', ''), self.station_code]
|
|
213
|
+
|
|
214
|
+
for station in data['results']:
|
|
215
|
+
name = station.get('name', '').upper()
|
|
216
|
+
for term in search_terms:
|
|
217
|
+
if term in name:
|
|
218
|
+
return {
|
|
219
|
+
'name': station.get('name'),
|
|
220
|
+
'ghcn_id': station.get('id'),
|
|
221
|
+
'latitude': station.get('latitude'),
|
|
222
|
+
'longitude': station.get('longitude'),
|
|
223
|
+
'elevation': station.get('elevation'),
|
|
224
|
+
'mindate': station.get('mindate'),
|
|
225
|
+
'maxdate': station.get('maxdate')
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
return None
|
|
229
|
+
|
|
230
|
+
except Exception as e:
|
|
231
|
+
print(f"API search failed: {e}")
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
def get_station_info(self):
|
|
235
|
+
"""Return station information"""
|
|
236
|
+
return self.station_info.copy()
|
|
237
|
+
|
|
238
|
+
# def download_hourly_data(self, start_date, end_date, data_types=None):
|
|
239
|
+
# """
|
|
240
|
+
# Download hourly weather data using NOAA's data access API
|
|
241
|
+
|
|
242
|
+
# Args:
|
|
243
|
+
# start_date (str or pd.Timestamp): Start date in YYYY-MM-DD format or pandas Timestamp or datetime
|
|
244
|
+
# end_date (str or pd.Timestamp): End date in YYYY-MM-DD format or pandas Timestamp or datetime
|
|
245
|
+
# data_types (list, optional): List of data types to download
|
|
246
|
+
|
|
247
|
+
# Returns:
|
|
248
|
+
# pandas.DataFrame: Hourly weather data
|
|
249
|
+
# """
|
|
250
|
+
# if not (self.station_info.get('usaf') and self.station_info.get('wban')):
|
|
251
|
+
# raise ValueError("Station does not have ISD identifiers for hourly data")
|
|
252
|
+
|
|
253
|
+
# # Convert pd.Timestamp to string format if needed
|
|
254
|
+
# if isinstance(start_date, pd.Timestamp):
|
|
255
|
+
# start_date = start_date.strftime('%Y-%m-%d')
|
|
256
|
+
# elif hasattr(start_date, 'strftime'): # datetime.datetime or similar
|
|
257
|
+
# start_date = start_date.strftime('%Y-%m-%d')
|
|
258
|
+
|
|
259
|
+
# if isinstance(end_date, pd.Timestamp):
|
|
260
|
+
# end_date = end_date.strftime('%Y-%m-%d')
|
|
261
|
+
# elif hasattr(end_date, 'strftime'): # datetime.datetime or similar
|
|
262
|
+
# end_date = end_date.strftime('%Y-%m-%d')
|
|
263
|
+
|
|
264
|
+
# # Create station ID in format expected by the API
|
|
265
|
+
# station_id = f"{self.station_info['usaf']}{self.station_info['wban']}"
|
|
266
|
+
# # station_id = "USW00023174"#"USC00457180"
|
|
267
|
+
# # print("station_id is ",station_id)
|
|
268
|
+
# # Default data types for hourly weather data
|
|
269
|
+
# if not data_types:
|
|
270
|
+
# data_types = [
|
|
271
|
+
# 'TMP', # Temperature
|
|
272
|
+
# 'DEW', # Dew point
|
|
273
|
+
# 'SLP', # Sea level pressure
|
|
274
|
+
# 'WND', # Wind direction and speed
|
|
275
|
+
# 'VIS', # Visibility
|
|
276
|
+
# 'AA1' # Precipitation (if available)
|
|
277
|
+
# ]
|
|
278
|
+
|
|
279
|
+
# # NOAA's data access API endpoint
|
|
280
|
+
# base_url = "https://www.ncei.noaa.gov/access/services/data/v1"
|
|
281
|
+
|
|
282
|
+
# params = {
|
|
283
|
+
# 'dataset': 'global-hourly',
|
|
284
|
+
# # 'dataTypes': 'TMP',#','.join(data_types),
|
|
285
|
+
# 'stations': station_id,
|
|
286
|
+
# 'startDate': start_date,
|
|
287
|
+
# 'endDate': end_date,
|
|
288
|
+
# 'format': 'json',
|
|
289
|
+
# 'includeAttributes': 'true',
|
|
290
|
+
# 'includeStationName': 'true',
|
|
291
|
+
# 'includeStationLocation': 'true'
|
|
292
|
+
# }
|
|
293
|
+
|
|
294
|
+
# try:
|
|
295
|
+
# print(f"Downloading hourly data from {start_date} to {end_date}...")
|
|
296
|
+
# print(f"Station: {station_id} ({self.station_info.get('name', 'Unknown')})")
|
|
297
|
+
# full_url = requests.Request('GET', base_url, params=params).prepare().url
|
|
298
|
+
# print(f"API Request URL:")
|
|
299
|
+
# print(f"{full_url}")
|
|
300
|
+
# print()
|
|
301
|
+
# # https://www.ncei.noaa.gov/access/services/data/v1?dataset=global-hourly
|
|
302
|
+
# # &dataTypes=TMP%2CDEW%2CSLP%2CWND%2CVIS%2CAA1&stations=USW00023174&startDate=2025-08-26&endDate=2025-09-18&format=json
|
|
303
|
+
# # &includeAttributes=true&includeStationName=true&includeStationLocation=true
|
|
304
|
+
|
|
305
|
+
# # https://www.ncei.noaa.gov/access/services/data/v1?dataset=global-summary-of-the-year
|
|
306
|
+
# # &dataTypes=DP01,DP05,DP10,DSND,DSNW,DT00,DT32,DX32,DX70,DX90,SNOW,PRCP&stations=ASN00084027&startDate=1952-01-01&endDate=1970-12-31&includeAttributes=true&format=pdf
|
|
307
|
+
|
|
308
|
+
# response = requests.get(base_url, params=params, timeout=60)
|
|
309
|
+
# response.raise_for_status()
|
|
310
|
+
|
|
311
|
+
# # Parse JSON response
|
|
312
|
+
# data = response.json()
|
|
313
|
+
|
|
314
|
+
# if not data:
|
|
315
|
+
# print("No data returned from API")
|
|
316
|
+
# return pd.DataFrame()
|
|
317
|
+
|
|
318
|
+
# # Convert to DataFrame
|
|
319
|
+
# df = pd.DataFrame(data)
|
|
320
|
+
|
|
321
|
+
# if df.empty:
|
|
322
|
+
# print("No hourly data found for the specified parameters")
|
|
323
|
+
# return pd.DataFrame()
|
|
324
|
+
|
|
325
|
+
# # Process the data
|
|
326
|
+
# df = self._process_hourly_data(df)
|
|
327
|
+
|
|
328
|
+
# print(f"Successfully downloaded {len(df)} hourly records")
|
|
329
|
+
# return df
|
|
330
|
+
|
|
331
|
+
# except requests.exceptions.RequestException as e:
|
|
332
|
+
# print(f"API request failed: {e}")
|
|
333
|
+
# if hasattr(e, 'response') and e.response is not None:
|
|
334
|
+
# print(f"Response status: {e.response.status_code}")
|
|
335
|
+
# print(f"Response text: {e.response.text[:500]}...")
|
|
336
|
+
# return pd.DataFrame()
|
|
337
|
+
# except Exception as e:
|
|
338
|
+
# print(f"Failed to download hourly data: {e}")
|
|
339
|
+
# return pd.DataFrame()
|
|
340
|
+
|
|
341
|
+
# def _process_hourly_data(self, df):
|
|
342
|
+
# """Process and clean hourly data from NOAA API"""
|
|
343
|
+
# try:
|
|
344
|
+
# # Convert DATE to datetime
|
|
345
|
+
# if 'DATE' in df.columns:
|
|
346
|
+
# df['datetime'] = pd.to_datetime(df['DATE'], errors='coerce')
|
|
347
|
+
# df = df.dropna(subset=['datetime'])
|
|
348
|
+
# df = df.sort_values('datetime')
|
|
349
|
+
|
|
350
|
+
# # Process temperature data (convert tenths of degrees C to C)
|
|
351
|
+
# if 'TMP' in df.columns:
|
|
352
|
+
# df['temperature_c'] = pd.to_numeric(df['TMP'], errors='coerce') / 10
|
|
353
|
+
# df['temperature_f'] = df['temperature_c'] * 9/5 + 32
|
|
354
|
+
|
|
355
|
+
# # Process dew point data
|
|
356
|
+
# if 'DEW' in df.columns:
|
|
357
|
+
# df['dewpoint_c'] = pd.to_numeric(df['DEW'], errors='coerce') / 10
|
|
358
|
+
# df['dewpoint_f'] = df['dewpoint_c'] * 9/5 + 32
|
|
359
|
+
|
|
360
|
+
# # Process sea level pressure (convert tenths of hPa to hPa)
|
|
361
|
+
# if 'SLP' in df.columns:
|
|
362
|
+
# df['pressure_hpa'] = pd.to_numeric(df['SLP'], errors='coerce') / 10
|
|
363
|
+
|
|
364
|
+
# # Process wind data - format is typically "999,9" (direction,speed)
|
|
365
|
+
# if 'WND' in df.columns:
|
|
366
|
+
# wind_data = df['WND'].astype(str)
|
|
367
|
+
|
|
368
|
+
# # Extract wind direction and speed
|
|
369
|
+
# wind_direction = []
|
|
370
|
+
# wind_speed = []
|
|
371
|
+
|
|
372
|
+
# for wind_str in wind_data:
|
|
373
|
+
# try:
|
|
374
|
+
# if ',' in wind_str:
|
|
375
|
+
# dir_str, speed_str = wind_str.split(',')[:2]
|
|
376
|
+
|
|
377
|
+
# # Wind direction (degrees)
|
|
378
|
+
# direction = int(dir_str) if dir_str != '999' else None
|
|
379
|
+
# wind_direction.append(direction)
|
|
380
|
+
|
|
381
|
+
# # Wind speed (tenths of m/s to m/s)
|
|
382
|
+
# speed = float(speed_str) / 10 if speed_str != '9999' else None
|
|
383
|
+
# wind_speed.append(speed)
|
|
384
|
+
# else:
|
|
385
|
+
# wind_direction.append(None)
|
|
386
|
+
# wind_speed.append(None)
|
|
387
|
+
# except (ValueError, IndexError):
|
|
388
|
+
# wind_direction.append(None)
|
|
389
|
+
# wind_speed.append(None)
|
|
390
|
+
|
|
391
|
+
# df['wind_direction'] = wind_direction
|
|
392
|
+
# df['wind_speed_mps'] = wind_speed
|
|
393
|
+
# df['wind_speed_kmh'] = pd.Series(wind_speed) * 3.6
|
|
394
|
+
# df['wind_speed_mph'] = pd.Series(wind_speed) * 2.237
|
|
395
|
+
|
|
396
|
+
# # Process visibility (meters)
|
|
397
|
+
# if 'VIS' in df.columns:
|
|
398
|
+
# df['visibility_m'] = pd.to_numeric(df['VIS'], errors='coerce')
|
|
399
|
+
# df['visibility_km'] = df['visibility_m'] / 1000
|
|
400
|
+
# df['visibility_mi'] = df['visibility_m'] / 1609.34
|
|
401
|
+
|
|
402
|
+
# # Add station information columns
|
|
403
|
+
# if 'STATION' in df.columns:
|
|
404
|
+
# df['station_id'] = df['STATION']
|
|
405
|
+
|
|
406
|
+
# if 'NAME' in df.columns:
|
|
407
|
+
# df['station_name'] = df['NAME']
|
|
408
|
+
|
|
409
|
+
# if 'LATITUDE' in df.columns:
|
|
410
|
+
# df['latitude'] = pd.to_numeric(df['LATITUDE'], errors='coerce')
|
|
411
|
+
|
|
412
|
+
# if 'LONGITUDE' in df.columns:
|
|
413
|
+
# df['longitude'] = pd.to_numeric(df['LONGITUDE'], errors='coerce')
|
|
414
|
+
|
|
415
|
+
# if 'ELEVATION' in df.columns:
|
|
416
|
+
# df['elevation_m'] = pd.to_numeric(df['ELEVATION'], errors='coerce')
|
|
417
|
+
|
|
418
|
+
# return df
|
|
419
|
+
|
|
420
|
+
# except Exception as e:
|
|
421
|
+
# print(f"Error processing hourly data: {e}")
|
|
422
|
+
# return df
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def download_daily_TAVG_data(self, start_date, end_date, convert_to_fahrenheit = True):
|
|
426
|
+
"""
|
|
427
|
+
Download daily Average Temperature data using NOAA API
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
start_date (str or pd.Timestamp): Start date in YYYY-MM-DD format or pandas Timestamp or datetime
|
|
431
|
+
end_date (str or pd.Timestamp): End date in YYYY-MM-DD format or pandas Timestamp or datetime
|
|
432
|
+
convert_to_fahrenheit (bool): converts temperature values to fahrenhiet. Otherwise will be celcius*10
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
pandas.DataFrame: Daily weather data
|
|
436
|
+
"""
|
|
437
|
+
if not self.api_token:
|
|
438
|
+
raise ValueError("API token required for daily data. Get one from https://www.ncdc.noaa.gov/cdo-web/token")
|
|
439
|
+
|
|
440
|
+
if not self.station_info.get('ghcn_id'):
|
|
441
|
+
raise ValueError("Station does not have GHCN-D identifier for daily data")
|
|
442
|
+
|
|
443
|
+
# Convert pd.Timestamp to string format if needed
|
|
444
|
+
if isinstance(start_date, pd.Timestamp):
|
|
445
|
+
start_date = start_date.strftime('%Y-%m-%d')
|
|
446
|
+
elif hasattr(start_date, 'strftime'): # datetime.datetime or similar
|
|
447
|
+
start_date = start_date.strftime('%Y-%m-%d')
|
|
448
|
+
|
|
449
|
+
if isinstance(end_date, pd.Timestamp):
|
|
450
|
+
end_date = end_date.strftime('%Y-%m-%d')
|
|
451
|
+
elif hasattr(end_date, 'strftime'): # datetime.datetime or similar
|
|
452
|
+
end_date = end_date.strftime('%Y-%m-%d')
|
|
453
|
+
|
|
454
|
+
# if not datatypes:
|
|
455
|
+
# datatypes = ['TAVG']
|
|
456
|
+
|
|
457
|
+
url = f"{self.base_url}data"
|
|
458
|
+
params = {
|
|
459
|
+
'datasetid': 'GHCND',
|
|
460
|
+
'stationid': self.station_info['ghcn_id'],
|
|
461
|
+
'startdate': start_date,
|
|
462
|
+
'enddate': end_date,
|
|
463
|
+
'datatypeid': 'TAVG',
|
|
464
|
+
'limit': 1000,
|
|
465
|
+
'format': 'json'
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
try:
|
|
469
|
+
print(f"Downloading daily data from {start_date} to {end_date}...")
|
|
470
|
+
|
|
471
|
+
headers = {"token": self.api_token}
|
|
472
|
+
response = requests.get(url, params=params, headers=headers)
|
|
473
|
+
response.raise_for_status()
|
|
474
|
+
|
|
475
|
+
data = response.json()
|
|
476
|
+
if 'results' in data:
|
|
477
|
+
df = pd.DataFrame(data['results'])
|
|
478
|
+
|
|
479
|
+
if not df.empty:
|
|
480
|
+
df['date'] = pd.to_datetime(df['date'])
|
|
481
|
+
df = df.sort_values('date')
|
|
482
|
+
# Convert value from tenths of Celsius to Fahrenheit
|
|
483
|
+
df['value'] = (df['value'] / 10) * 9/5 + 32
|
|
484
|
+
df = df.set_index('date')
|
|
485
|
+
df = df[['value']].rename(columns={'value': 'OAT_NOAA'})
|
|
486
|
+
|
|
487
|
+
print(f"Successfully downloaded {len(df)} daily records")
|
|
488
|
+
return df
|
|
489
|
+
else:
|
|
490
|
+
print("No daily data found for the specified parameters")
|
|
491
|
+
return pd.DataFrame()
|
|
492
|
+
else:
|
|
493
|
+
print("No daily data found")
|
|
494
|
+
return pd.DataFrame()
|
|
495
|
+
|
|
496
|
+
except requests.exceptions.RequestException as e:
|
|
497
|
+
print(f"Daily data download failed: {e}")
|
|
498
|
+
return pd.DataFrame()
|
ecopipeline/utils/__init__.py
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
from .ConfigManager import *
|
|
1
|
+
from .ConfigManager import *
|
|
2
|
+
from .NOAADataDownloader import *
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
ecopipeline/__init__.py,sha256=
|
|
1
|
+
ecopipeline/__init__.py,sha256=pjC00JWsjVAhS0jUKHD-wyi4UIpTsWbIg9JaxLS1mlc,275
|
|
2
2
|
ecopipeline/event_tracking/__init__.py,sha256=SV2kkvJgptjeyLQlqHWcDRpQO6-JC433_dRZ3H9-ZNU,131
|
|
3
|
-
ecopipeline/event_tracking/event_tracking.py,sha256=
|
|
3
|
+
ecopipeline/event_tracking/event_tracking.py,sha256=HffWAIAkNJ8INdG3_86RnDgw2bpHwv9hhkZ5oiiugZY,29653
|
|
4
4
|
ecopipeline/extract/__init__.py,sha256=gQ3sak6NJ63Gpo-hZXrtZfeKOTHLRyAVXfTgxxRpqPo,675
|
|
5
5
|
ecopipeline/extract/extract.py,sha256=y32feIIzgABwrwfduNQM1hICmkVOU4PYu6-M07zCLpU,51422
|
|
6
6
|
ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
|
|
@@ -10,10 +10,11 @@ ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-f
|
|
|
10
10
|
ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
|
|
11
11
|
ecopipeline/transform/transform.py,sha256=wL4B00XBwLWVlf7goOLSHKgLFmIsXprQNepGLLO_wTk,50028
|
|
12
12
|
ecopipeline/utils/ConfigManager.py,sha256=-g1wtExdvhYO5Y6Q3cRbywa__DxRMFruLrB4YanwaPY,12168
|
|
13
|
-
ecopipeline/utils/
|
|
13
|
+
ecopipeline/utils/NOAADataDownloader.py,sha256=iC2nl_O4PS1KFrchcPXRZxshwZwUMSqXy6BQBUwnOUU,20927
|
|
14
|
+
ecopipeline/utils/__init__.py,sha256=7dT3tP6SMK4uBW6NBmQ8i6LaNTTuV6fpAZToBBlJ904,62
|
|
14
15
|
ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
|
|
15
|
-
ecopipeline-0.
|
|
16
|
-
ecopipeline-0.
|
|
17
|
-
ecopipeline-0.
|
|
18
|
-
ecopipeline-0.
|
|
19
|
-
ecopipeline-0.
|
|
16
|
+
ecopipeline-0.11.1.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
+
ecopipeline-0.11.1.dist-info/METADATA,sha256=_-HP7vfIrz6JBltdDkX4obF-AUJGrbxZfnFtrUBQ49k,2330
|
|
18
|
+
ecopipeline-0.11.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
19
|
+
ecopipeline-0.11.1.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
|
|
20
|
+
ecopipeline-0.11.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|