PyPI - ecopipeline - Versions diffs - 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl - Mend

ecopipeline 0.8.6py3-none-any.whl → 0.8.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

ecopipeline/extract/extract.py CHANGED Viewed

@@ -658,7 +658,7 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
     return df_diff
-def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None) -> pd.DataFrame:
+def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
     """
     Function connects to the field manager api to pull data and returns a dataframe.
@@ -674,6 +674,8 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
     endTime: datetime
         The point in time for which we want to end the data extraction. This
         is local time from the data's index.
+    create_csv : bool
+        create csv files as you process such that API need not be relied upon for reprocessing
     Returns
     -------
@@ -715,6 +717,12 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
                 df.set_index('time_pt', inplace=True)
                 df = df.sort_index()
                 df = df.groupby(df.index).mean()
+            if create_csv:
+                filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                original_directory = os.getcwd()
+                os.chdir(config.data_directory)
+                df.to_csv(filename, index_label='time_pt')
+                os.chdir(original_directory)
             return df
         elif response.status_code == 500:
             json_message = response.json()
@@ -753,7 +761,30 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
     os.chdir(original_directory)
-def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None):
+def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True):
+    """
+    Function connects to the things board manager api to pull data and returns a dataframe.
+    Parameters
+    ----------
+    config : ecopipeline.ConfigManager
+        The ConfigManager object that holds configuration data for the pipeline. The config manager
+        must contain information to connect to the api, i.e. the api user name and password as well as
+        the device id for the device the data is being pulled from.
+    startTime: datetime
+        The point in time for which we want to start the data extraction from. This
+        is local time from the data's index.
+    endTime: datetime
+        The point in time for which we want to end the data extraction. This
+        is local time from the data's index.
+    create_csv : bool
+        create csv files as you process such that API need not be relied upon for reprocessing
+    Returns
+    -------
+    pd.DataFrame:
+        Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
+    """
     if endTime is None:
         endTime = datetime.now()
     if startTime is None:
@@ -770,7 +801,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
         df = df.sort_index()
         df = df.groupby(df.index).mean()
         return df
     url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
     token = config.get_thingsboard_token()
     keys = _get_tb_keys(config, token)
@@ -808,8 +838,14 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
             df = pd.DataFrame(data)
             df.index = pd.to_datetime(df.index, unit='ms')
             df = df.sort_index()
+            # save to file
+            if create_csv:
+                filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
+                original_directory = os.getcwd()
+                os.chdir(config.data_directory)
+                df.to_csv(filename, index_label='time_pt')
+                os.chdir(original_directory)
             return df
         print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
         return pd.DataFrame()
     except Exception as e:

ecopipeline/transform/transform.py CHANGED Viewed

@@ -1104,19 +1104,23 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
     # Reindex to include any completely missing minutes
     df_full = df.reindex(full_index)
+    # df_full = df_full.select_dtypes(include='number')
+    # print("1",df_full)
     # Resample daily to count missing values per column
     total_missing = df_full.isna().resample('D').sum().astype(int)
     # Function to calculate max consecutive missing values
     def max_consecutive_nans(x):
-        is_na = x.isna()
-        groups = (is_na != is_na.shift()).cumsum()
-        return is_na.groupby(groups).sum().max() or 0
+        try:
+            # print(f"here is x :{x}")
+            is_na = pd.Series(x).isna().reset_index(drop=True)
+            groups = (is_na != is_na.shift()).cumsum()
+            return is_na.groupby(groups).sum().max() or 0
+        except:
+            raise Exception(f"{x} is the problem!")
     # Function to calculate average consecutive missing values
     def avg_consecutive_nans(x):
-        is_na = x.isna()
+        is_na = pd.Series(x).isna().reset_index(drop=True)
         groups = (is_na != is_na.shift()).cumsum()
         gap_lengths = is_na.groupby(groups).sum()
         gap_lengths = gap_lengths[gap_lengths > 0]
@@ -1125,8 +1129,9 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
         return gap_lengths.mean()
     # Apply daily, per column
-    max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
-    avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
+    # print("hello?",type(df_full.index))
+    max_consec_missing = df_full.resample('D').agg(max_consecutive_nans)
+    avg_consec_missing = df_full.resample('D').agg(avg_consecutive_nans)
     # Rename columns to include a suffix
     total_missing = total_missing.add_suffix('_missing_mins')

{ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ecopipeline
-Version: 0.8.6
+Version: 0.8.8
 Summary: Contains functions for use in Ecotope Datapipelines
 Classifier: Programming Language :: Python :: 3
 Classifier: License :: OSI Approved :: GNU General Public License (GPL)

{ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/RECORD RENAMED Viewed

@@ -2,18 +2,18 @@ ecopipeline/__init__.py,sha256=d48mO5La6OrQDkRe_qqoY6lUx7x-e8krOH388jmWjwU,218
 ecopipeline/event_tracking/__init__.py,sha256=q49j46fXMUjNUPzL4FvXEppB93i3lUni-QUZpp61tt0,64
 ecopipeline/event_tracking/event_tracking.py,sha256=LOCLE7ju320O7CrwnWRIqHRa2uAqoq-KvXZ3zWQ2S74,13224
 ecopipeline/extract/__init__.py,sha256=gQ3sak6NJ63Gpo-hZXrtZfeKOTHLRyAVXfTgxxRpqPo,675
-ecopipeline/extract/extract.py,sha256=Fch2IvqGxMClKGniiJiKqyJhcFDwkckaeiGQRVnUGUg,46708
+ecopipeline/extract/extract.py,sha256=EAe1oSmmSMd-mG9QY-4wTzXwY4ziUaQw5Vrkgt4ZXNY,48542
 ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
 ecopipeline/load/load.py,sha256=Ptxr0MOjns_HeVSmZsLLApHJGB-z6XOB2m8LNiVaD7E,23860
 ecopipeline/transform/__init__.py,sha256=hYb4F64fXdXtjBSYCqv6gLFBwKZjjnl0z7s291pFE98,2505
 ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
 ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
-ecopipeline/transform/transform.py,sha256=DBQD4WqKmdXnGQMAj6tg75HtXiSemIc7c6nZxzA2aXc,48958
+ecopipeline/transform/transform.py,sha256=J-QxcwjWkocPrHJ_mQD4m57HenwwQZnFYgU0BSMTkgQ,49229
 ecopipeline/utils/ConfigManager.py,sha256=-g1wtExdvhYO5Y6Q3cRbywa__DxRMFruLrB4YanwaPY,12168
 ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
 ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
-ecopipeline-0.8.6.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ecopipeline-0.8.6.dist-info/METADATA,sha256=-VzwnJWFOLTa3NoreQAXc0PFJNGn0eJoiRTFf8i1jmc,2329
-ecopipeline-0.8.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-ecopipeline-0.8.6.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
-ecopipeline-0.8.6.dist-info/RECORD,,
+ecopipeline-0.8.8.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ecopipeline-0.8.8.dist-info/METADATA,sha256=bmYXzXG0itQmMRAMBIBbstHteeYgiFVpbRVF5gyMOf8,2329
+ecopipeline-0.8.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+ecopipeline-0.8.8.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
+ecopipeline-0.8.8.dist-info/RECORD,,

{ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/WHEEL RENAMED Viewed

File without changes

{ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/top_level.txt RENAMED Viewed

File without changes

ecopipeline 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl

ecopipeline 0.8.6py3-none-any.whl → 0.8.8py3-none-any.whl