ecopipeline 0.4.18__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {ecopipeline-0.4.18/src/ecopipeline.egg-info → ecopipeline-0.5.1}/PKG-INFO +2 -2
  2. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/setup.cfg +1 -1
  3. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/extract/extract.py +1 -1
  4. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/load/__init__.py +2 -2
  5. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/load/load.py +63 -0
  6. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/transform.py +22 -18
  7. {ecopipeline-0.4.18 → ecopipeline-0.5.1/src/ecopipeline.egg-info}/PKG-INFO +2 -2
  8. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/LICENSE +0 -0
  9. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/README.md +0 -0
  10. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/pyproject.toml +0 -0
  11. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/setup.py +0 -0
  12. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/__init__.py +0 -0
  13. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/extract/__init__.py +0 -0
  14. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/__init__.py +0 -0
  15. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/bayview.py +0 -0
  16. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/transform/lbnl.py +0 -0
  17. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/utils/ConfigManager.py +0 -0
  18. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/utils/__init__.py +0 -0
  19. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline/utils/unit_convert.py +0 -0
  20. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
  21. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  22. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/requires.txt +0 -0
  23. {ecopipeline-0.4.18 → ecopipeline-0.5.1}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: ecopipeline
3
- Version: 0.4.18
3
+ Version: 0.5.1
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 0.4.18
3
+ version = 0.5.1
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -940,7 +940,7 @@ def _download_noaa_data(stations: dict, weather_directory : str) -> List[str]:
940
940
  print("FTP ERROR")
941
941
  return
942
942
  # Download files for each station from 2010 till present year
943
- for year in range(2010, year_end):
943
+ for year in range(2010, year_end + 1):
944
944
  # Set FTP credentials and connect
945
945
  wd = f"/pub/data/noaa/isd-lite/{year}/"
946
946
  ftp_server.cwd(wd)
@@ -1,2 +1,2 @@
1
- from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table
2
- __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table"]
1
+ from .load import check_table_exists, create_new_table, load_overwrite_database, load_event_table, report_data_loss
2
+ __all__ = ["check_table_exists", "create_new_table", "load_overwrite_database", "load_event_table", "report_data_loss"]
@@ -368,6 +368,69 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
368
368
  cursor.close()
369
369
  return True
370
370
 
371
+ def report_data_loss(config : ConfigManager, site_name : str = None):
372
+ """
373
+ Logs data loss event in event database (assumes one exists)
374
+
375
+ Parameters
376
+ ----------
377
+ config : ecopipeline.ConfigManager
378
+ The ConfigManager object that holds configuration data for the pipeline.
379
+ site_name : str
380
+ the name of the site to correspond the events with. If left blank will default to minute table name
381
+
382
+ Returns
383
+ -------
384
+ bool:
385
+ A boolean value indicating if the data was successfully written to the database.
386
+ """
387
+ # Drop empty columns
388
+
389
+ dbname = config.get_db_name()
390
+ table_name = "site_events"
391
+ if site_name is None:
392
+ site_name = config.get_site_name()
393
+ error_string = "Error proccessing data. Please check logs to resolve."
394
+
395
+ print(f"logging DATA_LOSS into {table_name}")
396
+
397
+ # create SQL statement
398
+ insert_str = "INSERT INTO " + table_name + " (start_time_pt, site_name, event_detail, event_type, last_modified_date, last_modified_by) VALUES "
399
+ insert_str += f"('{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','{site_name}','{error_string}','DATA_LOSS','{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','automatic_upload')"
400
+
401
+ existing_rows = pd.DataFrame({
402
+ 'id' : []
403
+ })
404
+
405
+ connection, cursor = config.connect_db()
406
+
407
+ # create db table if it does not exist, otherwise add missing columns to existing table
408
+ if not check_table_exists(cursor, table_name, dbname):
409
+ print(f"Cannot log data loss. {table_name} does not exist in database {dbname}")
410
+ return False
411
+ else:
412
+ try:
413
+ # find existing times in database for upsert statement
414
+ cursor.execute(
415
+ f"SELECT id FROM {table_name} WHERE end_time_pt IS NULL AND site_name = '{site_name}' AND event_type = 'DATA_LOSS' and event_detail = '{error_string}'")
416
+ # Fetch the results into a DataFrame
417
+ existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id'])
418
+
419
+ except mysqlerrors.Error as e:
420
+ print(f"Retrieving data from {table_name} caused exception: {e}")
421
+ try:
422
+
423
+ if existing_rows.empty:
424
+ cursor.execute(insert_str)
425
+ connection.commit()
426
+ print("Successfully logged data loss.")
427
+ except Exception as e:
428
+ # Print the exception message
429
+ print(f"Caught an exception when uploading to site_events table: {e}")
430
+ connection.close()
431
+ cursor.close()
432
+ return True
433
+
371
434
  def _generate_mysql_update_event_table(row, id):
372
435
  statement = f"UPDATE site_events SET "
373
436
  statment_elems = []
@@ -449,28 +449,32 @@ def add_relative_humidity(df : pd.DataFrame, temp_col : str ='airTemp_F', dew_po
449
449
  A = 6.11
450
450
  B = 7.5
451
451
  C = 237.3
452
+ try:
453
+ if degree_f:
454
+ df[f"{temp_col}_C"] = df[temp_col].apply(temp_f_to_c)
455
+ df[f"{dew_point_col}_C"] = df[dew_point_col].apply(temp_f_to_c)
456
+ temp_col_c = f"{temp_col}_C"
457
+ dew_point_col_c = f"{dew_point_col}_C"
458
+ else:
459
+ temp_col_c = temp_col
460
+ dew_point_col_c = dew_point_col
452
461
 
453
- if degree_f:
454
- df[f"{temp_col}_C"] = df[temp_col].apply(temp_f_to_c)
455
- df[f"{dew_point_col}_C"] = df[dew_point_col].apply(temp_f_to_c)
456
- temp_col_c = f"{temp_col}_C"
457
- dew_point_col_c = f"{dew_point_col}_C"
458
- else:
459
- temp_col_c = temp_col
460
- dew_point_col_c = dew_point_col
461
-
462
- # Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
463
- e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
464
- e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
462
+ # Calculate saturation vapor pressure (e_s) and actual vapor pressure (e)
463
+ e_s = A * 10 ** ((B * df[temp_col_c]) / (df[temp_col_c] + C))
464
+ e = A * 10 ** ((B * df[dew_point_col_c]) / (df[dew_point_col_c] + C))
465
465
 
466
- # Calculate relative humidity
467
- df['relative_humidity'] = (e / e_s) * 100.0
466
+ # Calculate relative humidity
467
+ df['relative_humidity'] = (e / e_s) * 100.0
468
468
 
469
- # Handle cases where relative humidity exceeds 100% due to rounding
470
- df['relative_humidity'] = np.clip(df['relative_humidity'], 0.0, 100.0)
469
+ # Handle cases where relative humidity exceeds 100% due to rounding
470
+ df['relative_humidity'] = np.clip(df['relative_humidity'], 0.0, 100.0)
471
471
 
472
- if degree_f:
473
- df.drop(columns=[temp_col_c, dew_point_col_c])
472
+ if degree_f:
473
+ df.drop(columns=[temp_col_c, dew_point_col_c])
474
+ except:
475
+
476
+ df['relative_humidity'] = None
477
+ print("Unable to calculate relative humidity data for timeframe")
474
478
 
475
479
  return df
476
480
 
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.2
2
2
  Name: ecopipeline
3
- Version: 0.4.18
3
+ Version: 0.5.1
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
File without changes
File without changes
File without changes
File without changes