PyPI - ecopipeline - Versions diffs - 0.8.7__tar.gz → 0.8.9__tar.gz - Mend

ecopipeline 0.8.7tar.gz → 0.8.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{ecopipeline-0.8.7/src/ecopipeline.egg-info → ecopipeline-0.8.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.8.7
+Version: 0.8.9
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.8.7 → ecopipeline-0.8.9}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = ecopipeline
-version = 0.8.7
+version = 0.8.9
 authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
 description = Contains functions for use in Ecotope Datapipelines
 long_description = file: README.md

{ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/extract/extract.py RENAMED Viewed

@@ -658,7 +658,7 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
     return df_diff
-def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None) -> pd.DataFrame:
+def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
     """
     Function connects to the field manager api to pull data and returns a dataframe.
@@ -674,6 +674,8 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
     endTime: datetime
         The point in time for which we want to end the data extraction. This
         is local time from the data's index.
+    create_csv : bool
+        create csv files as you process such that API need not be relied upon for reprocessing
     Returns
     -------
@@ -715,6 +717,12 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 df.set_index('time_pt', inplace=True)
                 df = df.sort_index()
                 df = df.groupby(df.index).mean()
+            if create_csv:
+                filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                original_directory = os.getcwd()
+                os.chdir(config.data_directory)
+                df.to_csv(filename, index_label='time_pt')
+                os.chdir(original_directory)
             return df
         elif response.status_code == 500:
             json_message = response.json()
@@ -724,11 +732,17 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 time_diff = endTime - startTime
                 midpointTime = startTime + time_diff / 2
                 # recursively construct the df
-                df_1 = fm_api_to_df(config, startTime, midpointTime)
-                df_2 = fm_api_to_df(config, midpointTime, endTime)
+                df_1 = fm_api_to_df(config, startTime, midpointTime, create_csv=False)
+                df_2 = fm_api_to_df(config, midpointTime, endTime, create_csv=False)
                 df = pd.concat([df_1, df_2])
                 df = df.sort_index()
                 df = df.groupby(df.index).mean()
+                if create_csv:
+                    filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                    original_directory = os.getcwd()
+                    os.chdir(config.data_directory)
+                    df.to_csv(filename, index_label='time_pt')
+                    os.chdir(original_directory)
                 return df
         print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
@@ -753,22 +767,55 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
     os.chdir(original_directory)
-def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv = True):
+def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : int = 12):
+    """
+    Function connects to the things board manager api to pull data and returns a dataframe.
+    Parameters
+    ----------
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. The config manager
+        must contain information to connect to the api, i.e. the api user name and password as well as
+        the device id for the device the data is being pulled from.
+    startTime: datetime
+        The point in time for which we want to start the data extraction from. This
+        is local time from the data's index.
+    endTime: datetime
+        The point in time for which we want to end the data extraction. This
+        is local time from the data's index.
+    create_csv : bool
+        create csv files as you process such that API need not be relied upon for reprocessing
+    query_hours : int
+        number of hours to query at a time from ThingsBoard API
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull.
+        Will return with index in UTC so needs to be converted after to appropriate timezone
+    """
     if endTime is None:
         endTime = datetime.now()
     if startTime is None:
         # 28 hours to ensure encapsulation of last day
         startTime = endTime - timedelta(hours=28)
-    if endTime - timedelta(hours=12) > startTime:
+    if endTime - timedelta(hours=query_hours) > startTime:
         time_diff = endTime - startTime
         midpointTime = startTime + time_diff / 2
         # recursively construct the df
-        df_1 = tb_api_to_df(config, startTime, midpointTime)
-        df_2 = tb_api_to_df(config, midpointTime, endTime)
+        df_1 = tb_api_to_df(config, startTime, midpointTime, create_csv=False)
+        df_2 = tb_api_to_df(config, midpointTime, endTime, create_csv=False)
         df = pd.concat([df_1, df_2])
         df = df.sort_index()
         df = df.groupby(df.index).mean()
+        if create_csv:
+                filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                original_directory = os.getcwd()
+                os.chdir(config.data_directory)
+                df.to_csv(filename, index_label='time_pt')
+                os.chdir(original_directory)
         return df
     url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
     token = config.get_thingsboard_token()

{ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/transform/transform.py RENAMED Viewed

@@ -1104,19 +1104,19 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
     # Reindex to include any completely missing minutes
     df_full = df.reindex(full_index)
+    # df_full = df_full.select_dtypes(include='number')
+    # print("1",df_full)
     # Resample daily to count missing values per column
     total_missing = df_full.isna().resample('D').sum().astype(int)
     # Function to calculate max consecutive missing values
     def max_consecutive_nans(x):
-        is_na = x.isna()
+        is_na = pd.Series(x).isna().reset_index(drop=True)
         groups = (is_na != is_na.shift()).cumsum()
         return is_na.groupby(groups).sum().max() or 0
     # Function to calculate average consecutive missing values
     def avg_consecutive_nans(x):
-        is_na = x.isna()
+        is_na = pd.Series(x).isna().reset_index(drop=True)
         groups = (is_na != is_na.shift()).cumsum()
         gap_lengths = is_na.groupby(groups).sum()
         gap_lengths = gap_lengths[gap_lengths > 0]
@@ -1125,8 +1125,9 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
         return gap_lengths.mean()
     # Apply daily, per column
-    max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
-    avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
+    # print("hello?",type(df_full.index))
+    max_consec_missing = df_full.resample('D').agg(max_consecutive_nans)
+    avg_consec_missing = df_full.resample('D').agg(avg_consecutive_nans)
     # Rename columns to include a suffix
     total_missing = total_missing.add_suffix('_missing_mins')

{ecopipeline-0.8.7 → ecopipeline-0.8.9/src/ecopipeline.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.8.7
+Version: 0.8.9
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)