ecopipeline 0.8.6__tar.gz → 0.8.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {ecopipeline-0.8.6/src/ecopipeline.egg-info → ecopipeline-0.8.8}/PKG-INFO +1 -1
  2. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/setup.cfg +1 -1
  3. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/extract/extract.py +40 -4
  4. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/transform/transform.py +13 -8
  5. {ecopipeline-0.8.6 → ecopipeline-0.8.8/src/ecopipeline.egg-info}/PKG-INFO +1 -1
  6. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/LICENSE +0 -0
  7. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/README.md +0 -0
  8. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/pyproject.toml +0 -0
  9. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/setup.py +0 -0
  10. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/__init__.py +0 -0
  11. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/event_tracking/__init__.py +0 -0
  12. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/event_tracking/event_tracking.py +0 -0
  13. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/extract/__init__.py +0 -0
  14. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/load/__init__.py +0 -0
  15. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/load/load.py +0 -0
  16. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/transform/__init__.py +0 -0
  17. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/transform/bayview.py +0 -0
  18. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/transform/lbnl.py +0 -0
  19. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/utils/ConfigManager.py +0 -0
  20. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/utils/__init__.py +0 -0
  21. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline/utils/unit_convert.py +0 -0
  22. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
  23. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  24. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline.egg-info/requires.txt +0 -0
  25. {ecopipeline-0.8.6 → ecopipeline-0.8.8}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.8.6
3
+ Version: 0.8.8
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 0.8.6
3
+ version = 0.8.8
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -658,7 +658,7 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
658
658
 
659
659
  return df_diff
660
660
 
661
- def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None) -> pd.DataFrame:
661
+ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
662
662
  """
663
663
  Function connects to the field manager api to pull data and returns a dataframe.
664
664
 
@@ -674,6 +674,8 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
674
674
  endTime: datetime
675
675
  The point in time for which we want to end the data extraction. This
676
676
  is local time from the data's index.
677
+ create_csv : bool
678
+ create csv files as you process such that API need not be relied upon for reprocessing
677
679
 
678
680
  Returns
679
681
  -------
@@ -715,6 +717,12 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
715
717
  df.set_index('time_pt', inplace=True)
716
718
  df = df.sort_index()
717
719
  df = df.groupby(df.index).mean()
720
+ if create_csv:
721
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
722
+ original_directory = os.getcwd()
723
+ os.chdir(config.data_directory)
724
+ df.to_csv(filename, index_label='time_pt')
725
+ os.chdir(original_directory)
718
726
  return df
719
727
  elif response.status_code == 500:
720
728
  json_message = response.json()
@@ -753,7 +761,30 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
753
761
 
754
762
  os.chdir(original_directory)
755
763
 
756
- def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None):
764
+ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True):
765
+ """
766
+ Function connects to the things board manager api to pull data and returns a dataframe.
767
+
768
+ Parameters
769
+ ----------
770
+ config : ecopipeline.ConfigManager
771
+ The ConfigManager object that holds configuration data for the pipeline. The config manager
772
+ must contain information to connect to the api, i.e. the api user name and password as well as
773
+ the device id for the device the data is being pulled from.
774
+ startTime: datetime
775
+ The point in time for which we want to start the data extraction from. This
776
+ is local time from the data's index.
777
+ endTime: datetime
778
+ The point in time for which we want to end the data extraction. This
779
+ is local time from the data's index.
780
+ create_csv : bool
781
+ create csv files as you process such that API need not be relied upon for reprocessing
782
+
783
+ Returns
784
+ -------
785
+ pd.DataFrame:
786
+ Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
787
+ """
757
788
  if endTime is None:
758
789
  endTime = datetime.now()
759
790
  if startTime is None:
@@ -770,7 +801,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
770
801
  df = df.sort_index()
771
802
  df = df.groupby(df.index).mean()
772
803
  return df
773
-
774
804
  url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
775
805
  token = config.get_thingsboard_token()
776
806
  keys = _get_tb_keys(config, token)
@@ -808,8 +838,14 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
808
838
  df = pd.DataFrame(data)
809
839
  df.index = pd.to_datetime(df.index, unit='ms')
810
840
  df = df.sort_index()
841
+ # save to file
842
+ if create_csv:
843
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
844
+ original_directory = os.getcwd()
845
+ os.chdir(config.data_directory)
846
+ df.to_csv(filename, index_label='time_pt')
847
+ os.chdir(original_directory)
811
848
  return df
812
-
813
849
  print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
814
850
  return pd.DataFrame()
815
851
  except Exception as e:
@@ -1104,19 +1104,23 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1104
1104
 
1105
1105
  # Reindex to include any completely missing minutes
1106
1106
  df_full = df.reindex(full_index)
1107
-
1107
+ # df_full = df_full.select_dtypes(include='number')
1108
+ # print("1",df_full)
1108
1109
  # Resample daily to count missing values per column
1109
1110
  total_missing = df_full.isna().resample('D').sum().astype(int)
1110
-
1111
1111
  # Function to calculate max consecutive missing values
1112
1112
  def max_consecutive_nans(x):
1113
- is_na = x.isna()
1114
- groups = (is_na != is_na.shift()).cumsum()
1115
- return is_na.groupby(groups).sum().max() or 0
1113
+ try:
1114
+ # print(f"here is x :{x}")
1115
+ is_na = pd.Series(x).isna().reset_index(drop=True)
1116
+ groups = (is_na != is_na.shift()).cumsum()
1117
+ return is_na.groupby(groups).sum().max() or 0
1118
+ except:
1119
+ raise Exception(f"{x} is the problem!")
1116
1120
 
1117
1121
  # Function to calculate average consecutive missing values
1118
1122
  def avg_consecutive_nans(x):
1119
- is_na = x.isna()
1123
+ is_na = pd.Series(x).isna().reset_index(drop=True)
1120
1124
  groups = (is_na != is_na.shift()).cumsum()
1121
1125
  gap_lengths = is_na.groupby(groups).sum()
1122
1126
  gap_lengths = gap_lengths[gap_lengths > 0]
@@ -1125,8 +1129,9 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
1125
1129
  return gap_lengths.mean()
1126
1130
 
1127
1131
  # Apply daily, per column
1128
- max_consec_missing = df_full.resample('D').apply(lambda day: day.apply(max_consecutive_nans))
1129
- avg_consec_missing = df_full.resample('D').apply(lambda day: day.apply(avg_consecutive_nans))
1132
+ # print("hello?",type(df_full.index))
1133
+ max_consec_missing = df_full.resample('D').agg(max_consecutive_nans)
1134
+ avg_consec_missing = df_full.resample('D').agg(avg_consecutive_nans)
1130
1135
 
1131
1136
  # Rename columns to include a suffix
1132
1137
  total_missing = total_missing.add_suffix('_missing_mins')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 0.8.6
3
+ Version: 0.8.8
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
File without changes
File without changes
File without changes
File without changes