PyPI - ecopipeline - Versions diffs - 1.0.0__tar.gz → 1.0.2__tar.gz - Mend

ecopipeline 1.0.0tar.gz → 1.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{ecopipeline-1.0.0/src/ecopipeline.egg-info → ecopipeline-1.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 1.0.0
+Version: 1.0.2
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-1.0.0 → ecopipeline-1.0.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = ecopipeline
-version = 1.0.0
+version = 1.0.2
 authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
 description = Contains functions for use in Ecotope Datapipelines
 long_description = file: README.md

{ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/extract/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
-from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df
+from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df
 __all__ = ["get_noaa_data", "json_to_df", "extract_files", "get_last_full_day_from_db", "get_db_row_from_time", 'extract_new', "csv_to_df", "get_sub_dirs", "msa_to_df", "fm_api_to_df",
-           "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df"]
+           "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df"]

{ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/extract/extract.py RENAMED Viewed

@@ -15,6 +15,7 @@ import mysql.connector.errors as mysqlerrors
 import requests
 import subprocess
 import traceback
+import time
 def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
@@ -661,6 +662,79 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
     return df_diff
+def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, time_zone: str = 'US/Pacific'):
+    """
+    Function connects to the field manager api to pull data and returns a dataframe.
+    Parameters
+    ----------
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. The config manager
+        must contain information to connect to the api, i.e. the api user name and password as well as
+        the device id for the device the data is being pulled from.
+    startTime: datetime
+        The point in time for which we want to start the data extraction from. This
+        is local time from the data's index.
+    endTime: datetime
+        The point in time for which we want to end the data extraction. This
+        is local time from the data's index.
+    create_csv : bool
+        create csv files as you process such that API need not be relied upon for reprocessing
+    time_zone: str
+        The timezone for the indexes in the output dataframe as a string. Must be a string recognized as a
+        time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
+        defaults to 'US/Pacific'
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
+    """
+    #temporary solution while no date range available
+    try:
+        df = pd.DataFrame()
+        temp_dfs = []
+        time_parser = startTime
+        while time_parser < endTime:
+            start_time_str = time_parser.strftime('%a, %d %b %H:%M:%S GMT')
+            skycentrics_token, date_str = config.get_skycentrics_token(request_str=f'GET /api/devices/{config.api_device_id}/data HTTP/1.1',date_str=start_time_str)
+            response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data',
+                                headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/json'})
+            if response.status_code == 200:
+                norm_data = pd.json_normalize(response.json(), record_path=['sensors'], meta=['time'], meta_prefix='response_')
+                if len(norm_data) != 0:
+                    norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"])
+                    norm_data["time_pt"] = norm_data["time_pt"].dt.tz_convert(time_zone)
+                    norm_data = pd.pivot_table(norm_data, index="time_pt", columns="id", values="data")
+                    # Iterate over the index and round up if necessary (work around for json format from sensors)
+                    for i in range(len(norm_data.index)):
+                        if norm_data.index[i].minute == 59 and norm_data.index[i].second == 59:
+                            norm_data.index.values[i] = norm_data.index[i] + pd.Timedelta(seconds=1)
+                    temp_dfs.append(norm_data)
+            else:
+                print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
+            time.sleep(60)
+            time_parser = time_parser + timedelta(minutes=1)
+        if len(temp_dfs) > 0:
+            df = pd.concat(temp_dfs, ignore_index=False)
+            if create_csv:
+                filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                original_directory = os.getcwd()
+                os.chdir(config.data_directory)
+                df.to_csv(filename, index_label='time_pt')
+                os.chdir(original_directory)
+        else:
+            print("No skycentrics data retieved for time frame.")
+        return df
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        raise e
+    return pd.DataFrame()
 def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
     """
     Function connects to the field manager api to pull data and returns a dataframe.
@@ -776,7 +850,7 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
     os.chdir(original_directory)
 def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : float = 1,
-                 sensor_keys : list = [], seperate_keys : bool = False):
+                 sensor_keys : list = [], seperate_keys : bool = False, device_id_overwrite : str = None, csv_prefix : str = ""):
     """
     Function connects to the things board manager api to pull data and returns a dataframe.
@@ -796,6 +870,11 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         create csv files as you process such that API need not be relied upon for reprocessing
     query_hours : float
         number of hours to query at a time from ThingsBoard API
+    device_id_overwrite : str
+        Overwrites device ID for API pull
+    csv_prefix : str
+        prefix to add to the csv title
     Returns
     -------
@@ -804,16 +883,17 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         Will return with index in UTC so needs to be converted after to appropriate timezone
     """
     df = pd.DataFrame()
+    api_device_id = device_id_overwrite if not device_id_overwrite is None else config.api_device_id
     if len(sensor_keys) <= 0:
         token = config.get_thingsboard_token()
-        key_list = _get_tb_keys(config, token)
+        key_list = _get_tb_keys(token, api_device_id)
         if len(key_list) <= 0:
-            raise Exception(f"No sensors available at ThingsBoard site with id {config.api_device_id}")
-        return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys)
+            raise Exception(f"No sensors available at ThingsBoard site with id {api_device_id}")
+        return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys, device_id_overwrite, csv_prefix)
     if seperate_keys:
         df_list = []
         for sensor_key in sensor_keys:
-            df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False))
+            df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False, device_id_overwrite, csv_prefix))
         df = pd.concat(df_list)
     else:
     # not seperate_keys:
@@ -826,13 +906,13 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         if endTime - timedelta(hours=query_hours) > startTime:
             time_diff = endTime - startTime
             midpointTime = startTime + time_diff / 2
-            df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
-            df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
+            df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False, device_id_overwrite = device_id_overwrite)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
+            df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False, device_id_overwrite = device_id_overwrite)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
             df = pd.concat([df_1, df_2])
             df = df.sort_index()
             df = df.groupby(df.index).mean()
         else:
-            url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
+            url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/values/timeseries'
             token = config.get_thingsboard_token()
             key_string = ','.join(sensor_keys)
             params = {
@@ -844,7 +924,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 'interval' : '0',
                 'agg' : 'NONE'
             }
             # Headers
             headers = {
                 'accept': 'application/json',
@@ -855,14 +934,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 response = requests.get(url, headers=headers, params=params)
                 if response.status_code == 200:
                     response_json = response.json()
-                    # if create_csv:
-                    #     json_filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.json"
-                    #     print(f"filename: {json_filename}, url: {url}, params: {params}")
-                    #     original_directory = os.getcwd()
-                    #     os.chdir(config.data_directory)
-                    #     with open(json_filename, 'w') as f:
-                    #         json.dump(response_json, f, indent=4)  # indent=4 makes it human-readable
-                    #     os.chdir(original_directory)
                     data = {}
                     for key, records in response_json.items():
@@ -886,7 +957,7 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 df = pd.DataFrame()
     # save to file
     if create_csv:
-        filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+        filename = f"{csv_prefix}{startTime.strftime('%Y%m%d%H%M%S')}.csv"
         original_directory = os.getcwd()
         os.chdir(config.data_directory)
         df.to_csv(filename, index_label='time_pt')
@@ -900,8 +971,8 @@ def _get_float_value(value):
     except (ValueError, TypeError):
         return None
-def _get_tb_keys(config: ConfigManager, token : str) -> List[str]:
-    url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/keys/timeseries'
+def _get_tb_keys(token : str, api_device_id : str) -> List[str]:
+    url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/keys/timeseries'
     # Headers
     headers = {

{ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/transform/transform.py RENAMED Viewed

@@ -157,20 +157,29 @@ def _rm_cols(col, bounds_df):  # Helper function for remove_outliers
     """
     Function will take in a pandas series and bounds information
     stored in a dataframe, then check each element of that column and set it to nan
-    if it is outside the given bounds.
+    if it is outside the given bounds.
-    Args:
-        col: pd.Series
+    Args:
+        col: pd.Series
             Pandas dataframe column from data being processed
         bounds_df: pd.DataFrame
             Pandas dataframe indexed by the names of the columns from the dataframe that col came from. There should be at least
             two columns in this dataframe, lower_bound and upper_bound, for use in removing outliers
-    Returns:
-        None
+    Returns:
+        None
     """
     if (col.name in bounds_df.index):
-        c_lower = float(bounds_df.loc[col.name]["lower_bound"])
-        c_upper = float(bounds_df.loc[col.name]["upper_bound"])
+        c_lower = bounds_df.loc[col.name]["lower_bound"]
+        c_upper = bounds_df.loc[col.name]["upper_bound"]
+        # Skip if both bounds are NaN
+        if pd.isna(c_lower) and pd.isna(c_upper):
+            return
+        # Convert bounds to float, handling NaN values
+        c_lower = float(c_lower) if not pd.isna(c_lower) else -np.inf
+        c_upper = float(c_upper) if not pd.isna(c_upper) else np.inf
         col.mask((col > c_upper) | (col < c_lower), other=np.NaN, inplace=True)
 # TODO: remove_outliers STRETCH GOAL: Functionality for alarms being raised based on bounds needs to happen here.
@@ -784,7 +793,7 @@ def convert_on_off_col_to_bool(df: pd.DataFrame, column_names: list) -> pd.DataF
     pd.DataFrame: Dataframe with specified columns converted from Celsius to Farenhiet.
     """
-    mapping = {'ON': True, 'OFF': False}
+    mapping = {'ON': True, 'OFF': False, 'On': True, 'Off': False}
     for column_name in column_names:
         df[column_name] = df[column_name].map(mapping).where(df[column_name].notna(), df[column_name])
@@ -1175,6 +1184,9 @@ def join_to_hourly(hourly_data: pd.DataFrame, noaa_data: pd.DataFrame) -> pd.Dat
     pd.DataFrame:
         A single, joined dataframe
     """
+    #fixing pipelines for new years
+    if 'OAT_NOAA' in noaa_data.columns and not noaa_data['OAT_NOAA'].notnull().any():
+        return hourly_data
     out_df = hourly_data.join(noaa_data)
     return out_df

{ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/ConfigManager.py RENAMED Viewed

@@ -4,6 +4,9 @@ import mysql.connector
 import mysql.connector.cursor
 import requests
 from datetime import datetime
+import base64
+import hashlib
+import hmac
 class ConfigManager:
     """
@@ -56,6 +59,8 @@ class ConfigManager:
         self.data_directory = data_directory
         self.api_usr = None
         self.api_pw = None
+        self.api_token = None
+        self.api_secret = None
         self.api_device_id = None
         if self.data_directory is None:
             configured_data_method = False
@@ -74,6 +79,11 @@ class ConfigManager:
                     self.api_pw = configure.get('data', 'api_pw')
                     self.api_device_id = configure.get('data','device_id')
                     configured_data_method = True
+                elif 'api_token' in configure['data'] and 'api_secret' in configure['data']:
+                    self.api_token = configure.get('data', 'api_token')
+                    self.api_secret = configure.get('data', 'api_secret')
+                    self.api_device_id = configure.get('data','device_id')
+                    configured_data_method = True
             if not configured_data_method:
                 raise Exception('data configuration section missing or incomplete in configuration file.')
@@ -261,4 +271,13 @@ class ConfigManager:
     def get_fm_device_id(self) -> str:
         if self.api_device_id is None:
             raise Exception("Field Manager device ID has not been configured.")
-        return self.api_device_id
+        return self.api_device_id
+    def get_skycentrics_token(self, request_str = 'GET /api/devices/ HTTP/1.', date_str : str = None) -> tuple:
+        if date_str is None:
+            date_str = datetime.utcnow().strftime('%a, %d %b %H:%M:%S GMT')
+        signature = base64.b64encode(hmac.new(self.api_secret.encode(),
+            '{}\n{}\n{}\n{}'.format(request_str, date_str, '', hashlib.md5(''.encode()).hexdigest()).encode(),
+            hashlib.sha1).digest())
+        token = '{}:{}'.format(self.api_token, signature.decode())
+        return token, date_str

{ecopipeline-1.0.0 → ecopipeline-1.0.2/src/ecopipeline.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 1.0.0
+Version: 1.0.2
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)