PyPI - ecopipeline - Versions diffs - 0.5.1__py3-none-any.whl → 0.6.2__py3-none-any.whl - Mend

ecopipeline 0.5.1py3-none-any.whl → 0.6.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

ecopipeline/load/load.py CHANGED Viewed

@@ -8,7 +8,7 @@ import math
 pd.set_option('display.max_columns', None)
 import mysql.connector.errors as mysqlerrors
 from ecopipeline import ConfigManager
-import datetime
+from datetime import datetime, timedelta
 import numpy as np
 data_map = {'int64':'float',
@@ -161,97 +161,123 @@ def create_new_columns(cursor : mysql.connector.cursor.MySQLCursor, table_name:
     return True
-def load_overwrite_database(cursor : mysql.connector.cursor.MySQLCursor, dataframe: pd.DataFrame, config_info: dict, data_type: str, primary_key: str = "time_pt", table_name: str = None):
+def load_overwrite_database(config : ConfigManager, dataframe: pd.DataFrame, config_info: dict, data_type: str,
+                            primary_key: str = "time_pt", table_name: str = None, auto_log_data_loss : bool = False):
     """
     Loads given pandas DataFrame into a MySQL table overwriting any conflicting data. Uses an UPSERT strategy to ensure any gaps in data are filled.
     Note: will not overwrite values with NULL. Must have a new value to overwrite existing values in database
     Parameters
     ----------
-    cursor : mysql.connector.cursor.MySQLCursor
-        A cursor object connected to the database where the data will land
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline.
     dataframe: pd.DataFrame
         The pandas DataFrame to be written into the mySQL server.
     config_info: dict
         The dictionary containing the configuration information in the data upload. This can be aquired through the get_login_info() function in this package
     data_type: str
-        The header name corresponding to the table you wish to write data to.
+        The header name corresponding to the table you wish to write data to.
+    primary_key : str
+        The name of the primary key in the database to upload to. Default as 'time_pt'
+    table_name : str
+        overwrites table name from config_info if needed
+    auto_log_data_loss : bool
+        if set to True, a data loss event will be reported if no data exits in the dataframe
+        for the last two days from the current date OR if an error occurs
     Returns
     -------
     bool:
         A boolean value indicating if the data was successfully written to the database.
     """
-    # Drop empty columns
-    dataframe = dataframe.dropna(axis=1, how='all')
-    dbname = config_info['database']
-    if table_name == None:
-        table_name = config_info[data_type]["table_name"]
-    if(len(dataframe.index) <= 0):
-        print(f"Attempted to write to {table_name} but dataframe was empty.")
-        return True
+    # Database Connection
+    db_connection, cursor = config.connect_db()
+    try:
-    print(f"Attempting to write data for {dataframe.index[0]} to {dataframe.index[-1]} into {table_name}")
-    # Get string of all column names for sql insert
-    sensor_names = primary_key
-    sensor_types = ["datetime"]
-    for column in dataframe.columns:
-        sensor_names += "," + column
-        sensor_types.append(data_map[dataframe[column].dtype.name])
+        # Drop empty columns
+        dataframe = dataframe.dropna(axis=1, how='all')
-    # create SQL statement
-    insert_str = "INSERT INTO " + table_name + " (" + sensor_names + ") VALUES ("
-    for column in dataframe.columns:
-        insert_str += "%s, "
-    insert_str += "%s)"
-    # last_time = datetime.datetime.strptime('20/01/1990', "%d/%m/%Y") # arbitrary past date
-    existing_rows_list = []
-    # create db table if it does not exist, otherwise add missing columns to existing table
-    if not check_table_exists(cursor, table_name, dbname):
-        if not create_new_table(cursor, table_name, sensor_names.split(",")[1:], sensor_types[1:], primary_key=primary_key): #split on colums and remove first column aka time_pt
-            print(f"Could not create new table {table_name} in database {dbname}")
-            return False
-    else:
-        try:
-            # find existing times in database for upsert statement
-            cursor.execute(
-                f"SELECT {primary_key} FROM {table_name} WHERE {primary_key} >= '{dataframe.index.min()}'")
-            # Fetch the results into a DataFrame
-            existing_rows = pd.DataFrame(cursor.fetchall(), columns=[primary_key])
+        dbname = config_info['database']
+        if table_name == None:
+            table_name = config_info[data_type]["table_name"]
+        if(len(dataframe.index) <= 0):
+            print(f"Attempted to write to {table_name} but dataframe was empty.")
+            ret_value = True
+        else:
-            # Convert the primary_key column to a list
-            existing_rows_list = existing_rows[primary_key].tolist()
+            print(f"Attempting to write data for {dataframe.index[0]} to {dataframe.index[-1]} into {table_name}")
+            if auto_log_data_loss and dataframe.index[-1] < datetime.now() - timedelta(days=3):
+                report_data_loss(config)
+            # Get string of all column names for sql insert
+            sensor_names = primary_key
+            sensor_types = ["datetime"]
+            for column in dataframe.columns:
+                sensor_names += "," + column
+                sensor_types.append(data_map[dataframe[column].dtype.name])
+            # create SQL statement
+            insert_str = "INSERT INTO " + table_name + " (" + sensor_names + ") VALUES ("
+            for column in dataframe.columns:
+                insert_str += "%s, "
+            insert_str += "%s)"
+            # last_time = datetime.strptime('20/01/1990', "%d/%m/%Y") # arbitrary past date
+            existing_rows_list = []
+            # create db table if it does not exist, otherwise add missing columns to existing table
+            if not check_table_exists(cursor, table_name, dbname):
+                if not create_new_table(cursor, table_name, sensor_names.split(",")[1:], sensor_types[1:], primary_key=primary_key): #split on colums and remove first column aka time_pt
+                    ret_value = False
+                    raise Exception(f"Could not create new table {table_name} in database {dbname}")
+            else:
+                try:
+                    # find existing times in database for upsert statement
+                    cursor.execute(
+                        f"SELECT {primary_key} FROM {table_name} WHERE {primary_key} >= '{dataframe.index.min()}'")
+                    # Fetch the results into a DataFrame
+                    existing_rows = pd.DataFrame(cursor.fetchall(), columns=[primary_key])
+                    # Convert the primary_key column to a list
+                    existing_rows_list = existing_rows[primary_key].tolist()
+                except mysqlerrors.Error:
+                    print(f"Table {table_name} has no data.")
+                missing_cols, missing_types = find_missing_columns(cursor, dataframe, config_info, table_name)
+                if len(missing_cols):
+                    if not create_new_columns(cursor, table_name, missing_cols, missing_types):
+                        print("Unable to add new columns due to database error.")
+            updatedRows = 0
+            for index, row in dataframe.iterrows():
+                time_data = row.values.tolist()
+                #remove nans and infinites
+                time_data = [None if (x is None or pd.isna(x)) else x for x in time_data]
+                time_data = [None if (x == float('inf') or x == float('-inf')) else x for x in time_data]
+                if index in existing_rows_list:
+                    statement, values = _generate_mysql_update(row, index, table_name, primary_key)
+                    if statement != "":
+                        cursor.execute(statement, values)
+                        updatedRows += 1
+                else:
+                    cursor.execute(insert_str, (index, *time_data))
-        except mysqlerrors.Error:
-            print(f"Table {table_name} has no data.")
+            db_connection.commit()
+            print(f"Successfully wrote {len(dataframe.index)} rows to table {table_name} in database {dbname}. {updatedRows} existing rows were overwritten.")
+            ret_value = True
+    except Exception as e:
+        print(f"Unable to load data into database. Exception: {e}")
+        if auto_log_data_loss:
+            report_data_loss(config)
+        ret_value = False
-        missing_cols, missing_types = find_missing_columns(cursor, dataframe, config_info, table_name)
-        if len(missing_cols):
-            if not create_new_columns(cursor, table_name, missing_cols, missing_types):
-                print("Unable to add new columns due to database error.")
-    updatedRows = 0
-    for index, row in dataframe.iterrows():
-        time_data = row.values.tolist()
-        #remove nans and infinites
-        time_data = [None if (x is None or pd.isna(x)) else x for x in time_data]
-        time_data = [None if (x == float('inf') or x == float('-inf')) else x for x in time_data]
-        if index in existing_rows_list:
-            statement, values = _generate_mysql_update(row, index, table_name, primary_key)
-            if statement != "":
-                cursor.execute(statement, values)
-                updatedRows += 1
-        else:
-            cursor.execute(insert_str, (index, *time_data))
+    db_connection.close()
+    cursor.close()
+    return ret_value
-    print(f"Successfully wrote {len(dataframe.index)} rows to table {table_name} in database {dbname}. {updatedRows} existing rows were overwritten.")
-    return True
 def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name : str = None):
     """
@@ -299,7 +325,7 @@ def load_event_table(config : ConfigManager, event_df: pd.DataFrame, site_name :
         column_names += "," + column
     # create SQL statement
-    insert_str = "INSERT INTO " + table_name + " (" + column_names + ", last_modified_date, last_modified_by) VALUES (%s,%s,%s,%s,%s,'"+datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')+"','automatic_upload')"
+    insert_str = "INSERT INTO " + table_name + " (" + column_names + ", last_modified_date, last_modified_by) VALUES (%s,%s,%s,%s,%s,'"+datetime.now().strftime('%Y-%m-%d %H:%M:%S')+"','automatic_upload')"
     # add aditional columns for db creation
     full_column_names = column_names.split(",")[1:]
@@ -390,13 +416,13 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
     table_name = "site_events"
     if site_name is None:
         site_name = config.get_site_name()
-    error_string = "Error proccessing data. Please check logs to resolve."
+    error_string = "Error processing data. Please check logs to resolve."
     print(f"logging DATA_LOSS into {table_name}")
     # create SQL statement
     insert_str = "INSERT INTO " + table_name + " (start_time_pt, site_name, event_detail, event_type, last_modified_date, last_modified_by) VALUES "
-    insert_str += f"('{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','{site_name}','{error_string}','DATA_LOSS','{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','automatic_upload')"
+    insert_str += f"('{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','{site_name}','{error_string}','DATA_LOSS','{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}','automatic_upload')"
     existing_rows = pd.DataFrame({
         'id' : []
@@ -412,7 +438,7 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
         try:
             # find existing times in database for upsert statement
             cursor.execute(
-                f"SELECT id FROM {table_name} WHERE end_time_pt IS NULL AND site_name = '{site_name}' AND event_type = 'DATA_LOSS' and event_detail = '{error_string}'")
+                f"SELECT id FROM {table_name} WHERE end_time_pt IS NULL AND site_name = '{site_name}' AND event_type = 'DATA_LOSS'")
             # Fetch the results into a DataFrame
             existing_rows = pd.DataFrame(cursor.fetchall(), columns=['id'])
@@ -422,8 +448,10 @@ def report_data_loss(config : ConfigManager, site_name : str = None):
         if existing_rows.empty:
             cursor.execute(insert_str)
-        connection.commit()
-        print("Successfully logged data loss.")
+            connection.commit()
+            print("Successfully logged data loss.")
+        else:
+            print("Data loss already logged.")
     except Exception as e:
         # Print the exception message
         print(f"Caught an exception when uploading to site_events table: {e}")
@@ -442,7 +470,7 @@ def _generate_mysql_update_event_table(row, id):
     if values:
         statement += ", ".join(statment_elems)
-        statement += f", last_modified_by = 'automatic_upload', last_modified_date = '{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}'"
+        statement += f", last_modified_by = 'automatic_upload', last_modified_date = '{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}'"
         statement += f" WHERE id = {id};"
         # statement += f" WHERE start_time_pt = '{start_time_pt}' AND end_time_pt = '{end_time_pt}' AND event_type = '{event_type}' AND site_name = '{site_name}';"
     else:

{ecopipeline-0.5.1.dist-info → ecopipeline-0.6.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: ecopipeline
-Version: 0.5.1
+Version: 0.6.2
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.5.1.dist-info → ecopipeline-0.6.2.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ ecopipeline/__init__.py,sha256=vCRzwd781ciCSXMP1ycM_BXAqxj3KVaNKIjsLOPcbwc,171
 ecopipeline/extract/__init__.py,sha256=3u_CUMdCguVewU3kN8x6xhVNyo1-p-gwTrhjOh7Psqg,645
 ecopipeline/extract/extract.py,sha256=ryorqnu1RgyNK7joZRcbMmTajlTlB5hwaYzzpo8Z8Ho,43369
 ecopipeline/load/__init__.py,sha256=oDAVF8AhK_qugqegjW7jK16p-nb9QzKhiNQOkEBniKM,235
-ecopipeline/load/load.py,sha256=H9OKjE-EoqhZJ-5Xixqn5vvhvgUNFDdsVsX4fqht0hE,19975
+ecopipeline/load/load.py,sha256=YfCuzsJYFNZqwR58GeF55-gRI7LpOeaK_DXYHg_0frU,21415
 ecopipeline/transform/__init__.py,sha256=DcIJfkRs4OmZzDeEfW_OiOIXNqN6CUl1_lW0SS7-eN8,2280
 ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
 ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
@@ -10,8 +10,8 @@ ecopipeline/transform/transform.py,sha256=uyBIXKCXUCT6zVnZyQohripGAzmY1yV9T1GxsX
 ecopipeline/utils/ConfigManager.py,sha256=t4sfTjGO0g5P50XBQqGVFWaXfAlW1GMDh1DLoBuFGks,9826
 ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
 ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
-ecopipeline-0.5.1.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ecopipeline-0.5.1.dist-info/METADATA,sha256=BMxe3qd_Ym_Ym5G3QN1zKwPJaZme5g4aEl6LUnaNPjc,2307
-ecopipeline-0.5.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-ecopipeline-0.5.1.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
-ecopipeline-0.5.1.dist-info/RECORD,,
+ecopipeline-0.6.2.dist-info/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ecopipeline-0.6.2.dist-info/METADATA,sha256=bjYX_gnfOZFDTS6mY71uJrBzCMkdtks6o1ZZHgVniys,2307
+ecopipeline-0.6.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+ecopipeline-0.6.2.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
+ecopipeline-0.6.2.dist-info/RECORD,,

{ecopipeline-0.5.1.dist-info → ecopipeline-0.6.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{ecopipeline-0.5.1.dist-info → ecopipeline-0.6.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{ecopipeline-0.5.1.dist-info → ecopipeline-0.6.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

ecopipeline 0.5.1__py3-none-any.whl → 0.6.2__py3-none-any.whl

ecopipeline 0.5.1py3-none-any.whl → 0.6.2py3-none-any.whl