ecopipeline 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {ecopipeline-1.0.0/src/ecopipeline.egg-info → ecopipeline-1.0.2}/PKG-INFO +1 -1
  2. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/setup.cfg +1 -1
  3. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/extract/__init__.py +2 -2
  4. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/extract/extract.py +91 -20
  5. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/transform/transform.py +20 -8
  6. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/ConfigManager.py +20 -1
  7. {ecopipeline-1.0.0 → ecopipeline-1.0.2/src/ecopipeline.egg-info}/PKG-INFO +1 -1
  8. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/LICENSE +0 -0
  9. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/MANIFEST.in +0 -0
  10. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/README.md +0 -0
  11. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/pyproject.toml +0 -0
  12. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/setup.py +0 -0
  13. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/__init__.py +0 -0
  14. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/event_tracking/__init__.py +0 -0
  15. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/event_tracking/event_tracking.py +0 -0
  16. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/load/__init__.py +0 -0
  17. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/load/load.py +0 -0
  18. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/transform/__init__.py +0 -0
  19. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/transform/bayview.py +0 -0
  20. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/transform/lbnl.py +0 -0
  21. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/NOAADataDownloader.py +0 -0
  22. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/__init__.py +0 -0
  23. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/pkls/__init__.py +0 -0
  24. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/pkls/tasseron_resistance_to_temp_3.pkl +0 -0
  25. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/pkls/tasseron_temp_to_resistance_2.pkl +0 -0
  26. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/pkls/veris_resistance_to_temp_3.pkl +0 -0
  27. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/pkls/veris_temp_to_resistance_2.pkl +0 -0
  28. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline/utils/unit_convert.py +0 -0
  29. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline.egg-info/SOURCES.txt +0 -0
  30. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline.egg-info/dependency_links.txt +0 -0
  31. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline.egg-info/requires.txt +0 -0
  32. {ecopipeline-1.0.0 → ecopipeline-1.0.2}/src/ecopipeline.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ecopipeline
3
- version = 1.0.0
3
+ version = 1.0.2
4
4
  authors = ["Carlos Bello, <bellocarlos@seattleu.edu>, Emil Fahrig <fahrigemil@seattleu.edu>, Casey Mang <cmang@seattleu.edu>, Julian Harris <harrisjulian@seattleu.edu>, Roger Tram <rtram@seattleu.edu>, Nolan Price <nolan@ecotope.com>"]
5
5
  description = Contains functions for use in Ecotope Datapipelines
6
6
  long_description = file: README.md
@@ -1,3 +1,3 @@
1
- from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df
1
+ from .extract import get_noaa_data, json_to_df, extract_files, get_last_full_day_from_db, get_db_row_from_time, extract_new, csv_to_df, get_sub_dirs, msa_to_df, fm_api_to_df, small_planet_control_to_df, dent_csv_to_df, flow_csv_to_df, pull_egauge_data, egauge_csv_to_df, remove_char_sequence_from_csv_header, tb_api_to_df, skycentrics_api_to_df
2
2
  __all__ = ["get_noaa_data", "json_to_df", "extract_files", "get_last_full_day_from_db", "get_db_row_from_time", 'extract_new', "csv_to_df", "get_sub_dirs", "msa_to_df", "fm_api_to_df",
3
- "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df"]
3
+ "small_planet_control_to_df","dent_csv_to_df","flow_csv_to_df","pull_egauge_data", "egauge_csv_to_df","remove_char_sequence_from_csv_header", "tb_api_to_df", "skycentrics_api_to_df"]
@@ -15,6 +15,7 @@ import mysql.connector.errors as mysqlerrors
15
15
  import requests
16
16
  import subprocess
17
17
  import traceback
18
+ import time
18
19
 
19
20
 
20
21
  def get_last_full_day_from_db(config : ConfigManager, table_identifier : str = "minute") -> datetime:
@@ -661,6 +662,79 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
661
662
 
662
663
  return df_diff
663
664
 
665
+ def skycentrics_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, time_zone: str = 'US/Pacific'):
666
+ """
667
+ Function connects to the field manager api to pull data and returns a dataframe.
668
+
669
+ Parameters
670
+ ----------
671
+ config : ecopipeline.ConfigManager
672
+ The ConfigManager object that holds configuration data for the pipeline. The config manager
673
+ must contain information to connect to the api, i.e. the api user name and password as well as
674
+ the device id for the device the data is being pulled from.
675
+ startTime: datetime
676
+ The point in time for which we want to start the data extraction from. This
677
+ is local time from the data's index.
678
+ endTime: datetime
679
+ The point in time for which we want to end the data extraction. This
680
+ is local time from the data's index.
681
+ create_csv : bool
682
+ create csv files as you process such that API need not be relied upon for reprocessing
683
+ time_zone: str
684
+ The timezone for the indexes in the output dataframe as a string. Must be a string recognized as a
685
+ time stamp by the pandas tz_localize() function https://pandas.pydata.org/docs/reference/api/pandas.Series.tz_localize.html
686
+ defaults to 'US/Pacific'
687
+
688
+ Returns
689
+ -------
690
+ pd.DataFrame:
691
+ Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
692
+ """
693
+ #temporary solution while no date range available
694
+
695
+ try:
696
+ df = pd.DataFrame()
697
+ temp_dfs = []
698
+ time_parser = startTime
699
+ while time_parser < endTime:
700
+ start_time_str = time_parser.strftime('%a, %d %b %H:%M:%S GMT')
701
+ skycentrics_token, date_str = config.get_skycentrics_token(request_str=f'GET /api/devices/{config.api_device_id}/data HTTP/1.1',date_str=start_time_str)
702
+ response = requests.get(f'https://api.skycentrics.com/api/devices/{config.api_device_id}/data',
703
+ headers={'Date': date_str, 'x-sc-api-token': skycentrics_token, 'Accept': 'application/json'})
704
+ if response.status_code == 200:
705
+ norm_data = pd.json_normalize(response.json(), record_path=['sensors'], meta=['time'], meta_prefix='response_')
706
+ if len(norm_data) != 0:
707
+
708
+ norm_data["time_pt"] = pd.to_datetime(norm_data["response_time"])
709
+
710
+ norm_data["time_pt"] = norm_data["time_pt"].dt.tz_convert(time_zone)
711
+ norm_data = pd.pivot_table(norm_data, index="time_pt", columns="id", values="data")
712
+ # Iterate over the index and round up if necessary (work around for json format from sensors)
713
+ for i in range(len(norm_data.index)):
714
+ if norm_data.index[i].minute == 59 and norm_data.index[i].second == 59:
715
+ norm_data.index.values[i] = norm_data.index[i] + pd.Timedelta(seconds=1)
716
+ temp_dfs.append(norm_data)
717
+ else:
718
+ print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
719
+ time.sleep(60)
720
+ time_parser = time_parser + timedelta(minutes=1)
721
+ if len(temp_dfs) > 0:
722
+ df = pd.concat(temp_dfs, ignore_index=False)
723
+ if create_csv:
724
+ filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
725
+ original_directory = os.getcwd()
726
+ os.chdir(config.data_directory)
727
+ df.to_csv(filename, index_label='time_pt')
728
+ os.chdir(original_directory)
729
+ else:
730
+ print("No skycentrics data retieved for time frame.")
731
+ return df
732
+
733
+ except Exception as e:
734
+ print(f"An error occurred: {e}")
735
+ raise e
736
+ return pd.DataFrame()
737
+
664
738
  def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
665
739
  """
666
740
  Function connects to the field manager api to pull data and returns a dataframe.
@@ -776,7 +850,7 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
776
850
  os.chdir(original_directory)
777
851
 
778
852
  def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : float = 1,
779
- sensor_keys : list = [], seperate_keys : bool = False):
853
+ sensor_keys : list = [], seperate_keys : bool = False, device_id_overwrite : str = None, csv_prefix : str = ""):
780
854
  """
781
855
  Function connects to the things board manager api to pull data and returns a dataframe.
782
856
 
@@ -796,6 +870,11 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
796
870
  create csv files as you process such that API need not be relied upon for reprocessing
797
871
  query_hours : float
798
872
  number of hours to query at a time from ThingsBoard API
873
+
874
+ device_id_overwrite : str
875
+ Overwrites device ID for API pull
876
+ csv_prefix : str
877
+ prefix to add to the csv title
799
878
 
800
879
  Returns
801
880
  -------
@@ -804,16 +883,17 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
804
883
  Will return with index in UTC so needs to be converted after to appropriate timezone
805
884
  """
806
885
  df = pd.DataFrame()
886
+ api_device_id = device_id_overwrite if not device_id_overwrite is None else config.api_device_id
807
887
  if len(sensor_keys) <= 0:
808
888
  token = config.get_thingsboard_token()
809
- key_list = _get_tb_keys(config, token)
889
+ key_list = _get_tb_keys(token, api_device_id)
810
890
  if len(key_list) <= 0:
811
- raise Exception(f"No sensors available at ThingsBoard site with id {config.api_device_id}")
812
- return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys)
891
+ raise Exception(f"No sensors available at ThingsBoard site with id {api_device_id}")
892
+ return tb_api_to_df(config, startTime, endTime, create_csv, query_hours, key_list, seperate_keys, device_id_overwrite, csv_prefix)
813
893
  if seperate_keys:
814
894
  df_list = []
815
895
  for sensor_key in sensor_keys:
816
- df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False))
896
+ df_list.append(tb_api_to_df(config, startTime, endTime, False, query_hours, [sensor_key], False, device_id_overwrite, csv_prefix))
817
897
  df = pd.concat(df_list)
818
898
  else:
819
899
  # not seperate_keys:
@@ -826,13 +906,13 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
826
906
  if endTime - timedelta(hours=query_hours) > startTime:
827
907
  time_diff = endTime - startTime
828
908
  midpointTime = startTime + time_diff / 2
829
- df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
830
- df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
909
+ df_1 = tb_api_to_df(config, startTime, midpointTime, query_hours=query_hours, sensor_keys=sensor_keys, create_csv=False, device_id_overwrite = device_id_overwrite)#True if startTime >= datetime(2025,7,13,9) and startTime <= datetime(2025,7,13,10) else csv_pass_down)
910
+ df_2 = tb_api_to_df(config, midpointTime, endTime, query_hours=query_hours, sensor_keys=sensor_keys,create_csv=False, device_id_overwrite = device_id_overwrite)#True if endTime >= datetime(2025,7,13,9) and endTime <= datetime(2025,7,13,10) else csv_pass_down)
831
911
  df = pd.concat([df_1, df_2])
832
912
  df = df.sort_index()
833
913
  df = df.groupby(df.index).mean()
834
914
  else:
835
- url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
915
+ url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/values/timeseries'
836
916
  token = config.get_thingsboard_token()
837
917
  key_string = ','.join(sensor_keys)
838
918
  params = {
@@ -844,7 +924,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
844
924
  'interval' : '0',
845
925
  'agg' : 'NONE'
846
926
  }
847
-
848
927
  # Headers
849
928
  headers = {
850
929
  'accept': 'application/json',
@@ -855,14 +934,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
855
934
  response = requests.get(url, headers=headers, params=params)
856
935
  if response.status_code == 200:
857
936
  response_json = response.json()
858
- # if create_csv:
859
- # json_filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.json"
860
- # print(f"filename: {json_filename}, url: {url}, params: {params}")
861
- # original_directory = os.getcwd()
862
- # os.chdir(config.data_directory)
863
- # with open(json_filename, 'w') as f:
864
- # json.dump(response_json, f, indent=4) # indent=4 makes it human-readable
865
- # os.chdir(original_directory)
866
937
 
867
938
  data = {}
868
939
  for key, records in response_json.items():
@@ -886,7 +957,7 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
886
957
  df = pd.DataFrame()
887
958
  # save to file
888
959
  if create_csv:
889
- filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
960
+ filename = f"{csv_prefix}{startTime.strftime('%Y%m%d%H%M%S')}.csv"
890
961
  original_directory = os.getcwd()
891
962
  os.chdir(config.data_directory)
892
963
  df.to_csv(filename, index_label='time_pt')
@@ -900,8 +971,8 @@ def _get_float_value(value):
900
971
  except (ValueError, TypeError):
901
972
  return None
902
973
 
903
- def _get_tb_keys(config: ConfigManager, token : str) -> List[str]:
904
- url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/keys/timeseries'
974
+ def _get_tb_keys(token : str, api_device_id : str) -> List[str]:
975
+ url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{api_device_id}/keys/timeseries'
905
976
 
906
977
  # Headers
907
978
  headers = {
@@ -157,20 +157,29 @@ def _rm_cols(col, bounds_df): # Helper function for remove_outliers
157
157
  """
158
158
  Function will take in a pandas series and bounds information
159
159
  stored in a dataframe, then check each element of that column and set it to nan
160
- if it is outside the given bounds.
160
+ if it is outside the given bounds.
161
161
 
162
- Args:
163
- col: pd.Series
162
+ Args:
163
+ col: pd.Series
164
164
  Pandas dataframe column from data being processed
165
165
  bounds_df: pd.DataFrame
166
166
  Pandas dataframe indexed by the names of the columns from the dataframe that col came from. There should be at least
167
167
  two columns in this dataframe, lower_bound and upper_bound, for use in removing outliers
168
- Returns:
169
- None
168
+ Returns:
169
+ None
170
170
  """
171
171
  if (col.name in bounds_df.index):
172
- c_lower = float(bounds_df.loc[col.name]["lower_bound"])
173
- c_upper = float(bounds_df.loc[col.name]["upper_bound"])
172
+ c_lower = bounds_df.loc[col.name]["lower_bound"]
173
+ c_upper = bounds_df.loc[col.name]["upper_bound"]
174
+
175
+ # Skip if both bounds are NaN
176
+ if pd.isna(c_lower) and pd.isna(c_upper):
177
+ return
178
+
179
+ # Convert bounds to float, handling NaN values
180
+ c_lower = float(c_lower) if not pd.isna(c_lower) else -np.inf
181
+ c_upper = float(c_upper) if not pd.isna(c_upper) else np.inf
182
+
174
183
  col.mask((col > c_upper) | (col < c_lower), other=np.NaN, inplace=True)
175
184
 
176
185
  # TODO: remove_outliers STRETCH GOAL: Functionality for alarms being raised based on bounds needs to happen here.
@@ -784,7 +793,7 @@ def convert_on_off_col_to_bool(df: pd.DataFrame, column_names: list) -> pd.DataF
784
793
  pd.DataFrame: Dataframe with specified columns converted from Celsius to Farenhiet.
785
794
  """
786
795
 
787
- mapping = {'ON': True, 'OFF': False}
796
+ mapping = {'ON': True, 'OFF': False, 'On': True, 'Off': False}
788
797
 
789
798
  for column_name in column_names:
790
799
  df[column_name] = df[column_name].map(mapping).where(df[column_name].notna(), df[column_name])
@@ -1175,6 +1184,9 @@ def join_to_hourly(hourly_data: pd.DataFrame, noaa_data: pd.DataFrame) -> pd.Dat
1175
1184
  pd.DataFrame:
1176
1185
  A single, joined dataframe
1177
1186
  """
1187
+ #fixing pipelines for new years
1188
+ if 'OAT_NOAA' in noaa_data.columns and not noaa_data['OAT_NOAA'].notnull().any():
1189
+ return hourly_data
1178
1190
  out_df = hourly_data.join(noaa_data)
1179
1191
  return out_df
1180
1192
 
@@ -4,6 +4,9 @@ import mysql.connector
4
4
  import mysql.connector.cursor
5
5
  import requests
6
6
  from datetime import datetime
7
+ import base64
8
+ import hashlib
9
+ import hmac
7
10
 
8
11
  class ConfigManager:
9
12
  """
@@ -56,6 +59,8 @@ class ConfigManager:
56
59
  self.data_directory = data_directory
57
60
  self.api_usr = None
58
61
  self.api_pw = None
62
+ self.api_token = None
63
+ self.api_secret = None
59
64
  self.api_device_id = None
60
65
  if self.data_directory is None:
61
66
  configured_data_method = False
@@ -74,6 +79,11 @@ class ConfigManager:
74
79
  self.api_pw = configure.get('data', 'api_pw')
75
80
  self.api_device_id = configure.get('data','device_id')
76
81
  configured_data_method = True
82
+ elif 'api_token' in configure['data'] and 'api_secret' in configure['data']:
83
+ self.api_token = configure.get('data', 'api_token')
84
+ self.api_secret = configure.get('data', 'api_secret')
85
+ self.api_device_id = configure.get('data','device_id')
86
+ configured_data_method = True
77
87
  if not configured_data_method:
78
88
  raise Exception('data configuration section missing or incomplete in configuration file.')
79
89
 
@@ -261,4 +271,13 @@ class ConfigManager:
261
271
  def get_fm_device_id(self) -> str:
262
272
  if self.api_device_id is None:
263
273
  raise Exception("Field Manager device ID has not been configured.")
264
- return self.api_device_id
274
+ return self.api_device_id
275
+
276
+ def get_skycentrics_token(self, request_str = 'GET /api/devices/ HTTP/1.', date_str : str = None) -> tuple:
277
+ if date_str is None:
278
+ date_str = datetime.utcnow().strftime('%a, %d %b %H:%M:%S GMT')
279
+ signature = base64.b64encode(hmac.new(self.api_secret.encode(),
280
+ '{}\n{}\n{}\n{}'.format(request_str, date_str, '', hashlib.md5(''.encode()).hexdigest()).encode(),
281
+ hashlib.sha1).digest())
282
+ token = '{}:{}'.format(self.api_token, signature.decode())
283
+ return token, date_str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ecopipeline
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Contains functions for use in Ecotope Datapipelines
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: License :: OSI Approved :: GNU General Public License (GPL)
File without changes
File without changes
File without changes
File without changes
File without changes