ecopipeline 0.8.7__py3-none-any.whl → 0.8.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ecopipeline/extract/extract.py +54 -7
- ecopipeline/transform/transform.py +7 -6
- {ecopipeline-0.8.7.dist-info → ecopipeline-0.8.9.dist-info}/METADATA +1 -1
- {ecopipeline-0.8.7.dist-info → ecopipeline-0.8.9.dist-info}/RECORD +7 -7
- {ecopipeline-0.8.7.dist-info → ecopipeline-0.8.9.dist-info}/WHEEL +0 -0
- {ecopipeline-0.8.7.dist-info → ecopipeline-0.8.9.dist-info}/licenses/LICENSE +0 -0
- {ecopipeline-0.8.7.dist-info → ecopipeline-0.8.9.dist-info}/top_level.txt +0 -0
ecopipeline/extract/extract.py
CHANGED
|
@@ -658,7 +658,7 @@ def egauge_csv_to_df(csv_filenames: List[str]) -> pd.DataFrame:
|
|
|
658
658
|
|
|
659
659
|
return df_diff
|
|
660
660
|
|
|
661
|
-
def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None) -> pd.DataFrame:
|
|
661
|
+
def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True) -> pd.DataFrame:
|
|
662
662
|
"""
|
|
663
663
|
Function connects to the field manager api to pull data and returns a dataframe.
|
|
664
664
|
|
|
@@ -674,6 +674,8 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
674
674
|
endTime: datetime
|
|
675
675
|
The point in time for which we want to end the data extraction. This
|
|
676
676
|
is local time from the data's index.
|
|
677
|
+
create_csv : bool
|
|
678
|
+
create csv files as you process such that API need not be relied upon for reprocessing
|
|
677
679
|
|
|
678
680
|
Returns
|
|
679
681
|
-------
|
|
@@ -715,6 +717,12 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
715
717
|
df.set_index('time_pt', inplace=True)
|
|
716
718
|
df = df.sort_index()
|
|
717
719
|
df = df.groupby(df.index).mean()
|
|
720
|
+
if create_csv:
|
|
721
|
+
filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
|
|
722
|
+
original_directory = os.getcwd()
|
|
723
|
+
os.chdir(config.data_directory)
|
|
724
|
+
df.to_csv(filename, index_label='time_pt')
|
|
725
|
+
os.chdir(original_directory)
|
|
718
726
|
return df
|
|
719
727
|
elif response.status_code == 500:
|
|
720
728
|
json_message = response.json()
|
|
@@ -724,11 +732,17 @@ def fm_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: dat
|
|
|
724
732
|
time_diff = endTime - startTime
|
|
725
733
|
midpointTime = startTime + time_diff / 2
|
|
726
734
|
# recursively construct the df
|
|
727
|
-
df_1 = fm_api_to_df(config, startTime, midpointTime)
|
|
728
|
-
df_2 = fm_api_to_df(config, midpointTime, endTime)
|
|
735
|
+
df_1 = fm_api_to_df(config, startTime, midpointTime, create_csv=False)
|
|
736
|
+
df_2 = fm_api_to_df(config, midpointTime, endTime, create_csv=False)
|
|
729
737
|
df = pd.concat([df_1, df_2])
|
|
730
738
|
df = df.sort_index()
|
|
731
739
|
df = df.groupby(df.index).mean()
|
|
740
|
+
if create_csv:
|
|
741
|
+
filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
|
|
742
|
+
original_directory = os.getcwd()
|
|
743
|
+
os.chdir(config.data_directory)
|
|
744
|
+
df.to_csv(filename, index_label='time_pt')
|
|
745
|
+
os.chdir(original_directory)
|
|
732
746
|
return df
|
|
733
747
|
|
|
734
748
|
print(f"Failed to make GET request. Status code: {response.status_code} {response.json()}")
|
|
@@ -753,22 +767,55 @@ def pull_egauge_data(config: ConfigManager, eGauge_ids: list, eGauge_usr : str,
|
|
|
753
767
|
|
|
754
768
|
os.chdir(original_directory)
|
|
755
769
|
|
|
756
|
-
def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv = True):
|
|
770
|
+
def tb_api_to_df(config: ConfigManager, startTime: datetime = None, endTime: datetime = None, create_csv : bool = True, query_hours : int = 12):
|
|
771
|
+
"""
|
|
772
|
+
Function connects to the things board manager api to pull data and returns a dataframe.
|
|
773
|
+
|
|
774
|
+
Parameters
|
|
775
|
+
----------
|
|
776
|
+
config : ecopipeline.ConfigManager
|
|
777
|
+
The ConfigManager object that holds configuration data for the pipeline. The config manager
|
|
778
|
+
must contain information to connect to the api, i.e. the api user name and password as well as
|
|
779
|
+
the device id for the device the data is being pulled from.
|
|
780
|
+
startTime: datetime
|
|
781
|
+
The point in time for which we want to start the data extraction from. This
|
|
782
|
+
is local time from the data's index.
|
|
783
|
+
endTime: datetime
|
|
784
|
+
The point in time for which we want to end the data extraction. This
|
|
785
|
+
is local time from the data's index.
|
|
786
|
+
create_csv : bool
|
|
787
|
+
create csv files as you process such that API need not be relied upon for reprocessing
|
|
788
|
+
query_hours : int
|
|
789
|
+
number of hours to query at a time from ThingsBoard API
|
|
790
|
+
|
|
791
|
+
Returns
|
|
792
|
+
-------
|
|
793
|
+
pd.DataFrame:
|
|
794
|
+
Pandas Dataframe containing data from the API pull with column headers the same as the variable names in the data from the pull.
|
|
795
|
+
Will return with index in UTC so needs to be converted after to appropriate timezone
|
|
796
|
+
"""
|
|
757
797
|
if endTime is None:
|
|
758
798
|
endTime = datetime.now()
|
|
759
799
|
if startTime is None:
|
|
760
800
|
# 28 hours to ensure encapsulation of last day
|
|
761
801
|
startTime = endTime - timedelta(hours=28)
|
|
762
802
|
|
|
763
|
-
if endTime - timedelta(hours=
|
|
803
|
+
if endTime - timedelta(hours=query_hours) > startTime:
|
|
764
804
|
time_diff = endTime - startTime
|
|
765
805
|
midpointTime = startTime + time_diff / 2
|
|
766
806
|
# recursively construct the df
|
|
767
|
-
df_1 = tb_api_to_df(config, startTime, midpointTime)
|
|
768
|
-
df_2 = tb_api_to_df(config, midpointTime, endTime)
|
|
807
|
+
df_1 = tb_api_to_df(config, startTime, midpointTime, create_csv=False)
|
|
808
|
+
df_2 = tb_api_to_df(config, midpointTime, endTime, create_csv=False)
|
|
769
809
|
df = pd.concat([df_1, df_2])
|
|
770
810
|
df = df.sort_index()
|
|
771
811
|
df = df.groupby(df.index).mean()
|
|
812
|
+
if create_csv:
|
|
813
|
+
filename = f"{startTime.strftime('%Y%m%d%H%M%S')}.csv"
|
|
814
|
+
original_directory = os.getcwd()
|
|
815
|
+
os.chdir(config.data_directory)
|
|
816
|
+
df.to_csv(filename, index_label='time_pt')
|
|
817
|
+
os.chdir(original_directory)
|
|
818
|
+
|
|
772
819
|
return df
|
|
773
820
|
url = f'https://thingsboard.cloud/api/plugins/telemetry/DEVICE/{config.api_device_id}/values/timeseries'
|
|
774
821
|
token = config.get_thingsboard_token()
|
|
@@ -1104,19 +1104,19 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
1104
1104
|
|
|
1105
1105
|
# Reindex to include any completely missing minutes
|
|
1106
1106
|
df_full = df.reindex(full_index)
|
|
1107
|
-
|
|
1107
|
+
# df_full = df_full.select_dtypes(include='number')
|
|
1108
|
+
# print("1",df_full)
|
|
1108
1109
|
# Resample daily to count missing values per column
|
|
1109
1110
|
total_missing = df_full.isna().resample('D').sum().astype(int)
|
|
1110
|
-
|
|
1111
1111
|
# Function to calculate max consecutive missing values
|
|
1112
1112
|
def max_consecutive_nans(x):
|
|
1113
|
-
is_na = x.isna()
|
|
1113
|
+
is_na = pd.Series(x).isna().reset_index(drop=True)
|
|
1114
1114
|
groups = (is_na != is_na.shift()).cumsum()
|
|
1115
1115
|
return is_na.groupby(groups).sum().max() or 0
|
|
1116
1116
|
|
|
1117
1117
|
# Function to calculate average consecutive missing values
|
|
1118
1118
|
def avg_consecutive_nans(x):
|
|
1119
|
-
is_na = x.isna()
|
|
1119
|
+
is_na = pd.Series(x).isna().reset_index(drop=True)
|
|
1120
1120
|
groups = (is_na != is_na.shift()).cumsum()
|
|
1121
1121
|
gap_lengths = is_na.groupby(groups).sum()
|
|
1122
1122
|
gap_lengths = gap_lengths[gap_lengths > 0]
|
|
@@ -1125,8 +1125,9 @@ def create_data_statistics_df(df: pd.DataFrame) -> pd.DataFrame:
|
|
|
1125
1125
|
return gap_lengths.mean()
|
|
1126
1126
|
|
|
1127
1127
|
# Apply daily, per column
|
|
1128
|
-
|
|
1129
|
-
|
|
1128
|
+
# print("hello?",type(df_full.index))
|
|
1129
|
+
max_consec_missing = df_full.resample('D').agg(max_consecutive_nans)
|
|
1130
|
+
avg_consec_missing = df_full.resample('D').agg(avg_consecutive_nans)
|
|
1130
1131
|
|
|
1131
1132
|
# Rename columns to include a suffix
|
|
1132
1133
|
total_missing = total_missing.add_suffix('_missing_mins')
|
|
@@ -2,18 +2,18 @@ ecopipeline/__init__.py,sha256=d48mO5La6OrQDkRe_qqoY6lUx7x-e8krOH388jmWjwU,218
|
|
|
2
2
|
ecopipeline/event_tracking/__init__.py,sha256=q49j46fXMUjNUPzL4FvXEppB93i3lUni-QUZpp61tt0,64
|
|
3
3
|
ecopipeline/event_tracking/event_tracking.py,sha256=LOCLE7ju320O7CrwnWRIqHRa2uAqoq-KvXZ3zWQ2S74,13224
|
|
4
4
|
ecopipeline/extract/__init__.py,sha256=gQ3sak6NJ63Gpo-hZXrtZfeKOTHLRyAVXfTgxxRpqPo,675
|
|
5
|
-
ecopipeline/extract/extract.py,sha256=
|
|
5
|
+
ecopipeline/extract/extract.py,sha256=_ugA9AU6jAPrg_RdEsvDN27Igtx2_5W88u9LRyF6Kng,49446
|
|
6
6
|
ecopipeline/load/__init__.py,sha256=NLa_efQJZ8aP-J0Y5xx9DP7mtfRH9jY6Jz1ZMZN_BAA,292
|
|
7
7
|
ecopipeline/load/load.py,sha256=Ptxr0MOjns_HeVSmZsLLApHJGB-z6XOB2m8LNiVaD7E,23860
|
|
8
8
|
ecopipeline/transform/__init__.py,sha256=hYb4F64fXdXtjBSYCqv6gLFBwKZjjnl0z7s291pFE98,2505
|
|
9
9
|
ecopipeline/transform/bayview.py,sha256=TP24dnTsUD95X-f6732egPZKjepFLJgDm9ImGr-fppY,17899
|
|
10
10
|
ecopipeline/transform/lbnl.py,sha256=EQ54G4rJXaZ7pwVusKcdK2KBehSdCsNo2ybphtMGs7o,33400
|
|
11
|
-
ecopipeline/transform/transform.py,sha256=
|
|
11
|
+
ecopipeline/transform/transform.py,sha256=S8fpAb45XBcYzeGNkxELiHM8-1jlNQqADV7_m-2oeWI,49097
|
|
12
12
|
ecopipeline/utils/ConfigManager.py,sha256=-g1wtExdvhYO5Y6Q3cRbywa__DxRMFruLrB4YanwaPY,12168
|
|
13
13
|
ecopipeline/utils/__init__.py,sha256=ccWUR0m7gD9DfcgsxBCLOfi4lho6RdYuB2Ugy_g6ZdQ,28
|
|
14
14
|
ecopipeline/utils/unit_convert.py,sha256=VFh1we2Y8KV3u21BeWb-U3TlZJXo83q5vdxxkpgcuME,3064
|
|
15
|
-
ecopipeline-0.8.
|
|
16
|
-
ecopipeline-0.8.
|
|
17
|
-
ecopipeline-0.8.
|
|
18
|
-
ecopipeline-0.8.
|
|
19
|
-
ecopipeline-0.8.
|
|
15
|
+
ecopipeline-0.8.9.dist-info/licenses/LICENSE,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
ecopipeline-0.8.9.dist-info/METADATA,sha256=BOKcoDGSM3uE7YWhm4lOfM-yqJSaEB2m-UhBvPbOkwM,2329
|
|
17
|
+
ecopipeline-0.8.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
ecopipeline-0.8.9.dist-info/top_level.txt,sha256=WOPFJH2LIgKqm4lk2OnFF5cgVkYibkaBxIxgvLgO7y0,12
|
|
19
|
+
ecopipeline-0.8.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|