ecopipeline 0.8.6__py3-none-any.whl → 0.8.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecopipeline/extract/extract.py +40 -4
- ecopipeline/transform/transform.py +13 -8
- {ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/METADATA +1 -1
- {ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/RECORD +7 -7
- {ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/WHEEL +0 -0
- {ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/licenses/LICENSE +0 -0
- {ecopipeline-0.8.6.dist-info → ecopipeline-0.8.8.dist-info}/top_level.txt +0 -0
ecopipeline/extract/extract.py
CHANGED
|
@@ -658,7 +658,7 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
|
|
|
658
658
|
|
|
659
659
|
return df_diff
|
|
660
660
|
|
|
661
|
-
def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None) -> pd.DataFrame:
|
|
661
|
+
def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
|
|
662
662
|
"""
|
|
663
663
|
Function connects to the field manager api to pull data and returns a dataframe.
|
|
664
664
|
|
|
@@ -674,6 +674,8 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
674
674
|
endTime: datetime
|
|
675
675
|
The point in time for which we want to end the data extraction. This
|
|
676
676
|
is local time from the data's index.
|
|
677
|
+
create_csv : bool
|
|
678
|
+
create csv files as you process such that API need not be relied upon for reprocessing
|
|
677
679
|
|
|
678
680
|
Returns
|
|
679
681
|
-------
|
|
@@ -715,6 +717,12 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
715
717
|
df.set_index('time_pt', inplace=True)
|
|
716
718
|
df = df.sort_index()
|
|
717
719
|
df = df.groupby(df.index).mean()
|
|
720
|
+
if create_csv:
|
|
721
|
+
filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
|
|
722
|
+
original_directory = os.getcwd()
|
|
723
|
+
os.chdir(config.data_directory)
|
|
724
|
+
df.to_csv(filename, index_label='time_pt')
|
|
725
|
+
os.chdir(original_directory)
|
|
718
726
|
return df
|
|
719
727
|
elif response.status_code == 500:
|
|
720
728
|
json_message = response.json()
|
|
@@ -753,7 +761,30 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
|
|
|
753
761
|
|
|
754
762
|
os.chdir(original_directory)
|
|
755
763
|
|
|
756
|
-
def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None):
|
|
764
|
+
def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True):
|
|
765
|
+
"""
|
|
766
|
+
Function connects to the things board manager api to pull data and returns a dataframe.
|
|
767
|
+
|
|
768
|
+
Parameters
|
|
769
|
+
----------
|
|
770
|
+
config : ecopipeline.ConfigManager
|
|
771
|
+
The ConfigManager object that holds configuration data for the pipeline. The config manager
|
|
772
|
+
must contain information to connect to the api, i.e. the api user name and password as well as
|
|
773
|
+
the device id for the device the data is being pulled from.
|
|
774
|
+
startTime: datetime
|
|
775
|
+
The point in time for which we want to start the data extraction from. This
|
|
776
|
+
is local time from the data's index.
|
|
777
|
+
endTime: datetime
|
|
778
|
+
The point in time for which we want to end the data extraction. This
|
|
779
|
+
is local time from the data's index.
|
|
780
|
+
create_csv : bool
|
|
781
|
+
create csv files as you process such that API need not be relied upon for reprocessing
|
|
782
|
+
|
|
783
|
+
Returns
|
|
784
|
+
-------
|
|
785
|
+
pd.DataFrame:
|
|
786
|
+
Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull
|
|
787
|
+
"""
|
|
757
788
|
if endTime is None:
|
|
758
789
|
endTime = datetime.now()
|
|
759
790
|
if startTime is None:
|
|
@@ -770,7 +801,6 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
770
801
|
df = df.sort_index()
|
|
771
802
|
df = df.groupby(df.index).mean()
|
|
772
803
|
return df
|
|
773
|
-
|
|
774
804
|
url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
|
|
775
805
|
token = config.get_thingsboard_token()
|
|
776
806
|
keys = _get_tb_keys(config, token)
|
|
@@ -808,8 +838,14 @@ def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
808
838
|
df = pd.DataFrame(data)
|
|
809
839
|
df.index = pd.to_datetime(df.index, unit='ms')
|
|
810
840
|
df = df.sort_index()
|
|
841
|
+
# save to file
|
|
842
|
+
if create_csv:
|
|
843
|
+
filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
|
|
844
|
+
original_directory = os.getcwd()
|
|
845
|
+
os.chdir(config.data_directory)
|
|
846
|
+
df.to_csv(filename, index_label='time_pt')
|
|
847
|
+
os.chdir(original_directory)
|
|
811
848
|
return df
|
|
812
|
-
|
|
813
849
|
print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
|
|
814
850
|
return pd.DataFrame()
|
|
815
851
|
except Exception as e:
|
|
@@ -1104,19 +1104,23 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
1104
1104
|
|
|
1105
1105
|
# Reindex to include any completely missing minutes
|
|
1106
1106
|
df_full = df.reindex(full_index)
|
|
1107
|
-
|
|
1107
|
+
# df_full = df_full.select_dtypes(include='number')
|
|
1108
|
+
# print("1",df_full)
|
|
1108
1109
|
# Resample daily to count missing values per column
|
|
1109
1110
|
total_missing = df_full.isna().resample('D').sum().astype(int)
|
|
1110
|
-
|
|
1111
1111
|
# Function to calculate max consecutive missing values
|
|
1112
1112
|
def max_consecutive_nans(x):
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1113
|
+
try:
|
|
1114
|
+
# print(f"here is x :{x}")
|
|
1115
|
+
is_na = pd.Series(x).isna().reset_index(drop=True)
|
|
1116
|
+
groups = (is_na != is_na.shift()).cumsum()
|
|
1117
|
+
return is_na.groupby(groups).sum().max() or 0
|
|
1118
|
+
except:
|
|
1119
|
+
raise Exception(f"{x} is the problem!")
|
|
1116
1120
|
|
|
1117
1121
|
# Function to calculate average consecutive missing values
|
|
1118
1122
|
def avg_consecutive_nans(x):
|
|
1119
|
-
is_na = x.isna()
|
|
1123
|
+
is_na = pd.Series(x).isna().reset_index(drop=True)
|
|
1120
1124
|
groups = (is_na != is_na.shift()).cumsum()
|
|
1121
1125
|
gap_lengths = is_na.groupby(groups).sum()
|
|
1122
1126
|
gap_lengths = gap_lengths[gap_lengths > 0]
|
|
@@ -1125,8 +1129,9 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
1125
1129
|
return gap_lengths.mean()
|
|
1126
1130
|
|
|
1127
1131
|
# Apply daily, per column
|
|
1128
|
-
|
|
1129
|
-
|
|
1132
|
+
# print("hello?",type(df_full.index))
|
|
1133
|
+
max_consec_missing = df_full.resample('D').agg(max_consecutive_nans)
|
|
1134
|
+
avg_consec_missing = df_full.resample('D').agg(avg_consecutive_nans)
|
|
1130
1135
|
|
|
1131
1136
|
# Rename columns to include a suffix
|
|
1132
1137
|
total_missing = total_missing.add_suffix('_missing_mins')
|
|
@@ -2,18 +2,18 @@ ecopipeline/__init__.py,sha256=d48mO5La6OrQDkRe_qqoY6lUx7x-e8krOH388jmWjwU,218
|
|
|
2
2
|
ecopipeline/event_tracking/__init__.py,sha256=q49j46fXMUjNUPzL4FvXEppB93i3lUni-QUZpp61tt0,64
|
|
3
3
|
ecopipeline/event_tracking/event_tracking.py,sha256=LOCLE7ju320O7CrwnWRIqHRa2uAqoq-KvXZ3zWQ2S74,13224
|
|
4
4
|
ecopipeline/extract/__init__.py,sha256=gQ3sak6NJ63Gpo-hZXrtZfeKOTHLRyAVXfTgxxRpqPo,675
|
|
5
|
-
ecopipeline/extract/extract.py,sha256=
|
|
5
|
+
ecopipeline/extract/extract.py,sha256=EAe1oSmmSMd-mG9QY-4wTzXwY4ziUaQw5Vrkgt4ZXNY,48542
|
|
6
6
|
ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
|
|
7
7
|
ecopipeline/load/load.py,sha256=Ptxr0MOjns_HeVSmZsLLApHJGB-z6XOB2m8LNiVaD7E,23860
|
|
8
8
|
ecopipeline/transform/__init__.py,sha256=hYb4F64fXdXtjBSYCqv6gLFBwKZjjnl0z7s291pFE98,2505
|
|
9
9
|
ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
|
|
10
10
|
ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
|
|
11
|
-
ecopipeline/transform/transform.py,sha256=
|
|
11
|
+
ecopipeline/transform/transform.py,sha256=J-QxcwjWkocPrHJ_mQD4m57HenwwQZnFYgU0BSMTkgQ,49229
|
|
12
12
|
ecopipeline/utils/ConfigManager.py,sha256=-g1wtExdvhYO5Y6Q3cRbywa__DxRMFruLrB4YanwaPY,12168
|
|
13
13
|
ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
|
|
14
14
|
ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
|
|
15
|
-
ecopipeline-0.8.
|
|
16
|
-
ecopipeline-0.8.
|
|
17
|
-
ecopipeline-0.8.
|
|
18
|
-
ecopipeline-0.8.
|
|
19
|
-
ecopipeline-0.8.
|
|
15
|
+
ecopipeline-0.8.8.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
ecopipeline-0.8.8.dist-info/METADATA,sha256=bmYXzXG0itQmMRAMBIBbstHteeYgiFVpbRVF5gyMOf8,2329
|
|
17
|
+
ecopipeline-0.8.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
ecopipeline-0.8.8.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
|
|
19
|
+
ecopipeline-0.8.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|