ecopipeline 0.8.7__tar.gz → 0.8.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {ecopipeline-0.8.7/src/ecopipeline.egg-info → ecopipeline-0.8.9}/PKG-INFO +1 -1
  2. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/setup.cfg +1 -1
  3. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/extract/extract.py +54 -7
  4. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/transform/transform.py +7 -6
  5. {ecopipeline-0.8.7 → ecopipeline-0.8.9/src/ecopipeline.egg-info}/PKG-INFO +1 -1
  6. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/LICENSE +0 -0
  7. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/README.md +0 -0
  8. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/pyproject.toml +0 -0
  9. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/setup.py +0 -0
  10. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/__init__.py +0 -0
  11. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/event_tracking/__init__.py +0 -0
  12. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/event_tracking/event_tracking.py +0 -0
  13. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/extract/__init__.py +0 -0
  14. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/load/__init__.py +0 -0
  15. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/load/load.py +0 -0
  16. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/transform/__init__.py +0 -0
  17. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/transform/bayview.py +0 -0
  18. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/transform/lbnl.py +0 -0
  19. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/utils/ConfigManager.py +0 -0
  20. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/utils/__init__.py +0 -0
  21. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline/utils/unit_convert.py +0 -0
  22. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
  23. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  24. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline.egg-info/requires.txt +0 -0
  25. {ecopipeline-0.8.7 → ecopipeline-0.8.9}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.8.7
3
+ Version: 0.8.9
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 0.8.7
3
+ version = 0.8.9
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -658,7 +658,7 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
658
658
 
659
659
  return df_diff
660
660
 
661
- def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None) -> pd.DataFrame:
661
+ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
662
662
  """
663
663
  Function connects to the field manager api to pull data and returns a dataframe.
664
664
 
@@ -674,6 +674,8 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
674
674
  endTime: datetime
675
675
  The point in time for which we want to end the data extraction. This
676
676
  is local time from the data's index.
677
+ create_csv : bool
678
+ create csv files as you process such that API need not be relied upon for reprocessing
677
679
 
678
680
  Returns
679
681
  -------
@@ -715,6 +717,12 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
715
717
  df.set_index('time_pt', inplace=True)
716
718
  df = df.sort_index()
717
719
  df = df.groupby(df.index).mean()
720
+ if create_csv:
721
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
722
+ original_directory = os.getcwd()
723
+ os.chdir(config.data_directory)
724
+ df.to_csv(filename, index_label='time_pt')
725
+ os.chdir(original_directory)
718
726
  return df
719
727
  elif response.status_code == 500:
720
728
  json_message = response.json()
@@ -724,11 +732,17 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
724
732
  time_diff = endTime - startTime
725
733
  midpointTime = startTime + time_diff / 2
726
734
  # recursively construct the df
727
- df_1 = fm_api_to_df(config, startTime, midpointTime)
728
- df_2 = fm_api_to_df(config, midpointTime, endTime)
735
+ df_1 = fm_api_to_df(config, startTime, midpointTime, create_csv=False)
736
+ df_2 = fm_api_to_df(config, midpointTime, endTime, create_csv=False)
729
737
  df = pd.concat([df_1, df_2])
730
738
  df = df.sort_index()
731
739
  df = df.groupby(df.index).mean()
740
+ if create_csv:
741
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
742
+ original_directory = os.getcwd()
743
+ os.chdir(config.data_directory)
744
+ df.to_csv(filename, index_label='time_pt')
745
+ os.chdir(original_directory)
732
746
  return df
733
747
 
734
748
  print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
@@ -753,22 +767,55 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
753
767
 
754
768
  os.chdir(original_directory)
755
769
 
756
- def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv = True):
770
+ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : int = 12):
771
+ """
772
+ Function connects to the things board manager api to pull data and returns a dataframe.
773
+
774
+ Parameters
775
+ ----------
776
+ config : ecopipeline.ConfigManager
777
+ The ConfigManager object that holds configuration data for the pipeline. The config manager
778
+ must contain information to connect to the api, i.e. the api user name and password as well as
779
+ the device id for the device the data is being pulled from.
780
+ startTime: datetime
781
+ The point in time for which we want to start the data extraction from. This
782
+ is local time from the data's index.
783
+ endTime: datetime
784
+ The point in time for which we want to end the data extraction. This
785
+ is local time from the data's index.
786
+ create_csv : bool
787
+ create csv files as you process such that API need not be relied upon for reprocessing
788
+ query_hours : int
789
+ number of hours to query at a time from ThingsBoard API
790
+
791
+ Returns
792
+ -------
793
+ pd.DataFrame:
794
+ Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull.
795
+ Will return with index in UTC so needs to be converted after to appropriate timezone
796
+ """
757
797
  if endTime is None:
758
798
  endTime = datetime.now()
759
799
  if startTime is None:
760
800
  # 28 hours to ensure encapsulation of last day
761
801
  startTime = endTime - timedelta(hours=28)
762
802
 
763
- if endTime - timedelta(hours=12) > startTime:
803
+ if endTime - timedelta(hours=query_hours) > startTime:
764
804
  time_diff = endTime - startTime
765
805
  midpointTime = startTime + time_diff / 2
766
806
  # recursively construct the df
767
- df_1 = tb_api_to_df(config, startTime, midpointTime)
768
- df_2 = tb_api_to_df(config, midpointTime, endTime)
807
+ df_1 = tb_api_to_df(config, startTime, midpointTime, create_csv=False)
808
+ df_2 = tb_api_to_df(config, midpointTime, endTime, create_csv=False)
769
809
  df = pd.concat([df_1, df_2])
770
810
  df = df.sort_index()
771
811
  df = df.groupby(df.index).mean()
812
+ if create_csv:
813
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
814
+ original_directory = os.getcwd()
815
+ os.chdir(config.data_directory)
816
+ df.to_csv(filename, index_label='time_pt')
817
+ os.chdir(original_directory)
818
+
772
819
  return df
773
820
  url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
774
821
  token = config.get_thingsboard_token()
@@ -1104,19 +1104,19 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1104
1104
 
1105
1105
  # Reindex to include any completely missing minutes
1106
1106
  df_full = df.reindex(full_index)
1107
-
1107
+ # df_full = df_full.select_dtypes(include='number')
1108
+ # print("1",df_full)
1108
1109
  # Resample daily to count missing values per column
1109
1110
  total_missing = df_full.isna().resample('D').sum().astype(int)
1110
-
1111
1111
  # Function to calculate max consecutive missing values
1112
1112
  def max_consecutive_nans(x):
1113
- is_na = x.isna()
1113
+ is_na = pd.Series(x).isna().reset_index(drop=True)
1114
1114
  groups = (is_na != is_na.shift()).cumsum()
1115
1115
  return is_na.groupby(groups).sum().max() or 0
1116
1116
 
1117
1117
  # Function to calculate average consecutive missing values
1118
1118
  def avg_consecutive_nans(x):
1119
- is_na = x.isna()
1119
+ is_na = pd.Series(x).isna().reset_index(drop=True)
1120
1120
  groups = (is_na != is_na.shift()).cumsum()
1121
1121
  gap_lengths = is_na.groupby(groups).sum()
1122
1122
  gap_lengths = gap_lengths[gap_lengths > 0]
@@ -1125,8 +1125,9 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1125
1125
  return gap_lengths.mean()
1126
1126
 
1127
1127
  # Apply daily, per column
1128
- max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
1129
- avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
1128
+ # print("hello?",type(df_full.index))
1129
+ max_consec_missing = df_full.resample('D').agg(max_consecutive_nans)
1130
+ avg_consec_missing = df_full.resample('D').agg(avg_consecutive_nans)
1130
1131
 
1131
1132
  # Rename columns to include a suffix
1132
1133
  total_missing = total_missing.add_suffix('_missing_mins')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.8.7
3
+ Version: 0.8.9
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
File without changes
File without changes
File without changes
File without changes