ecopipeline 0.4.18__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ecopipeline-0.4.18/src/ecopipeline.egg-info → ecopipeline-0.5.1}/PKG-INFO +2 -2
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/setup.cfg +1 -1
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/extract/extract.py +1 -1
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/load/__init__.py +2 -2
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/load/load.py +63 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/transform.py +22 -18
- {ecopipeline-0.4.18 → ecopipeline-0.5.1/src/ecopipeline.egg-info}/PKG-INFO +2 -2
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/LICENSE +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/README.md +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/pyproject.toml +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/setup.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/__init__.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/extract/__init__.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/__init__.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/bayview.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/lbnl.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/utils/ConfigManager.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/utils/__init__.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/utils/unit_convert.py +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/requires.txt +0 -0
- {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: ecopipeline
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Contains functions for use in Ecotope Datapipelines
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = ecopipeline
|
|
3
|
-
version = 0.
|
|
3
|
+
version = 0.5.1
|
|
4
4
|
authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
|
|
5
5
|
description = Contains functions for use in Ecotope Datapipelines
|
|
6
6
|
long_description = file: README.md
|
|
@@ -940,7 +940,7 @@ def _download_noaa_data(stations: dict, weather_directory : str) -> List[str]:
|
|
|
940
940
|
print("FTP ERROR")
|
|
941
941
|
return
|
|
942
942
|
# Download files for each station from 2010 till present year
|
|
943
|
-
for year in range(2010, year_end):
|
|
943
|
+
for year in range(2010, year_end + 1):
|
|
944
944
|
# Set FTP credentials and connect
|
|
945
945
|
wd = f"/pub/data/noaa/isd-lite/{year}/"
|
|
946
946
|
ftp_server.cwd(wd)
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table
|
|
2
|
-
__all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table"]
|
|
1
|
+
from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
|
|
2
|
+
__all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"]
|
|
@@ -368,6 +368,69 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
|
|
|
368
368
|
cursor.close()
|
|
369
369
|
return True
|
|
370
370
|
|
|
371
|
+
def report_data_loss(config : ConfigManager, site_name : str = None):
|
|
372
|
+
"""
|
|
373
|
+
Logs data loss event in event database (assumes one exists)
|
|
374
|
+
|
|
375
|
+
Parameters
|
|
376
|
+
----------
|
|
377
|
+
config : ecopipeline.ConfigManager
|
|
378
|
+
The ConfigManager object that holds configuration data for the pipeline.
|
|
379
|
+
site_name : str
|
|
380
|
+
the name of the site to correspond the events with. If left blank will default to minute table name
|
|
381
|
+
|
|
382
|
+
Returns
|
|
383
|
+
-------
|
|
384
|
+
bool:
|
|
385
|
+
A boolean value indicating if the data was successfully written to the database.
|
|
386
|
+
"""
|
|
387
|
+
# Drop empty columns
|
|
388
|
+
|
|
389
|
+
dbname = config.get_db_name()
|
|
390
|
+
table_name = "site_events"
|
|
391
|
+
if site_name is None:
|
|
392
|
+
site_name = config.get_site_name()
|
|
393
|
+
error_string = "Error proccessing data. Please check logs to resolve."
|
|
394
|
+
|
|
395
|
+
print(f"logging DATA_LOSS into {table_name}")
|
|
396
|
+
|
|
397
|
+
# create SQL statement
|
|
398
|
+
insert_str = "INSERT INTO " + table_name + " (start_time_pt, site_name, event_detail, event_type, last_modified_date, last_modified_by) VALUES "
|
|
399
|
+
insert_str += f"('{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','{site_name}','{error_string}','DATA_LOSS','{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','automatic_upload')"
|
|
400
|
+
|
|
401
|
+
existing_rows = pd.DataFrame({
|
|
402
|
+
'id' : []
|
|
403
|
+
})
|
|
404
|
+
|
|
405
|
+
connection, cursor = config.connect_db()
|
|
406
|
+
|
|
407
|
+
# create db table if it does not exist, otherwise add missing columns to existing table
|
|
408
|
+
if not check_table_exists(cursor, table_name, dbname):
|
|
409
|
+
print(f"Cannot log data loss. {table_name} does not exist in database {dbname}")
|
|
410
|
+
return False
|
|
411
|
+
else:
|
|
412
|
+
try:
|
|
413
|
+
# find existing times in database for upsert statement
|
|
414
|
+
cursor.execute(
|
|
415
|
+
f"SELECT id FROM {table_name} WHERE end_time_pt IS NULL AND site_name = '{site_name}' AND event_type = 'DATA_LOSS' and event_detail = '{error_string}'")
|
|
416
|
+
# Fetch the results into a DataFrame
|
|
417
|
+
existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id'])
|
|
418
|
+
|
|
419
|
+
except mysqlerrors.Error as e:
|
|
420
|
+
print(f"Retrieving data from {table_name} caused exception: {e}")
|
|
421
|
+
try:
|
|
422
|
+
|
|
423
|
+
if existing_rows.empty:
|
|
424
|
+
cursor.execute(insert_str)
|
|
425
|
+
connection.commit()
|
|
426
|
+
print("Successfully logged data loss.")
|
|
427
|
+
except Exception as e:
|
|
428
|
+
# Print the exception message
|
|
429
|
+
print(f"Caught an exception when uploading to site_events table: {e}")
|
|
430
|
+
connection.close()
|
|
431
|
+
cursor.close()
|
|
432
|
+
return True
|
|
433
|
+
|
|
371
434
|
def _generate_mysql_update_event_table(row, id):
|
|
372
435
|
statement = f"UPDATE site_events SET "
|
|
373
436
|
statment_elems = []
|
|
@@ -449,28 +449,32 @@ def add_relative_humidity(df : pd.DataFrame, temp_col : str ='airTemp_F', dew_po
|
|
|
449
449
|
A = 6.11
|
|
450
450
|
B = 7.5
|
|
451
451
|
C = 237.3
|
|
452
|
+
try:
|
|
453
|
+
if degree_f:
|
|
454
|
+
df[f"{temp_col}_C"] = df[temp_col].apply(temp_f_to_c)
|
|
455
|
+
df[f"{dew_point_col}_C"] = df[dew_point_col].apply(temp_f_to_c)
|
|
456
|
+
temp_col_c = f"{temp_col}_C"
|
|
457
|
+
dew_point_col_c = f"{dew_point_col}_C"
|
|
458
|
+
else:
|
|
459
|
+
temp_col_c = temp_col
|
|
460
|
+
dew_point_col_c = dew_point_col
|
|
452
461
|
|
|
453
|
-
|
|
454
|
-
df[
|
|
455
|
-
df[
|
|
456
|
-
temp_col_c = f"{temp_col}_C"
|
|
457
|
-
dew_point_col_c = f"{dew_point_col}_C"
|
|
458
|
-
else:
|
|
459
|
-
temp_col_c = temp_col
|
|
460
|
-
dew_point_col_c = dew_point_col
|
|
461
|
-
|
|
462
|
-
# Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
|
|
463
|
-
e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
|
|
464
|
-
e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
|
|
462
|
+
# Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
|
|
463
|
+
e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
|
|
464
|
+
e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
|
|
465
465
|
|
|
466
|
-
|
|
467
|
-
|
|
466
|
+
# Calculate relative humidity
|
|
467
|
+
df['relative_humidity'] = (e / e_s) * 100.0
|
|
468
468
|
|
|
469
|
-
|
|
470
|
-
|
|
469
|
+
# Handle cases where relative humidity exceeds 100% due to rounding
|
|
470
|
+
df['relative_humidity'] = np.clip(df['relative_humidity'], 0.0, 100.0)
|
|
471
471
|
|
|
472
|
-
|
|
473
|
-
|
|
472
|
+
if degree_f:
|
|
473
|
+
df.drop(columns=[temp_col_c, dew_point_col_c])
|
|
474
|
+
except:
|
|
475
|
+
|
|
476
|
+
df['relative_humidity'] = None
|
|
477
|
+
print("Unable to calculate relative humidity data for timeframe")
|
|
474
478
|
|
|
475
479
|
return df
|
|
476
480
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
2
|
Name: ecopipeline
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: Contains functions for use in Ecotope Datapipelines
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|