multifunctionplotter 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multifunctionplotter/mfp.py +989 -0
- multifunctionplotter/mfp_data_manipulator.py +192 -0
- multifunctionplotter/mfp_dmanp.py +931 -0
- multifunctionplotter/mfp_dmanp_help.py +741 -0
- multifunctionplotter/mfp_help.py +396 -0
- multifunctionplotter/mfp_server.py +603 -0
- multifunctionplotter/prophet_pred.py +214 -0
- multifunctionplotter-1.0.3.dist-info/METADATA +881 -0
- multifunctionplotter-1.0.3.dist-info/RECORD +13 -0
- multifunctionplotter-1.0.3.dist-info/WHEEL +5 -0
- multifunctionplotter-1.0.3.dist-info/entry_points.txt +3 -0
- multifunctionplotter-1.0.3.dist-info/licenses/LICENSE +201 -0
- multifunctionplotter-1.0.3.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import numpy as np
|
|
3
|
+
from scipy.fft import fft
|
|
4
|
+
from scipy.signal import find_peaks
|
|
5
|
+
from prophet import Prophet
|
|
6
|
+
from statsmodels.tsa.seasonal import seasonal_decompose
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
import sys, re, warnings
|
|
9
|
+
|
|
10
|
+
warnings.filterwarnings("ignore")
|
|
11
|
+
|
|
12
|
+
# =============================================================================
|
|
13
|
+
print ("Prophet Prediction")
|
|
14
|
+
print ("="*70)
|
|
15
|
+
print ("Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well.")
|
|
16
|
+
print ("="*70)
|
|
17
|
+
print ("Parameters to include: 'datafile.csv u 0:4', 'split_percentage=0.8', 'show_fft=False', 'show_decompose=False', 'daily_seasonality=True, 'frequency'=D")
|
|
18
|
+
print ("="*70)
|
|
19
|
+
|
|
20
|
+
# =============================================================================
|
|
21
|
+
# Take user input otherwise use default values except for datafile
|
|
22
|
+
data_stx = input("Data Syntax: ")
|
|
23
|
+
|
|
24
|
+
# Parse the data syntax into datafile, col1, col2
|
|
25
|
+
datafile = data_stx.split(" ")[0]
|
|
26
|
+
col1 = int(data_stx.split(" ")[2].split(":")[0])-1
|
|
27
|
+
col2 = int(data_stx.split(" ")[2].split(":")[1])-1
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
split_percentage = float(input("Split Percentage: "))
|
|
31
|
+
except ValueError:
|
|
32
|
+
split_percentage = 0.8
|
|
33
|
+
|
|
34
|
+
show_fft = True if input("Show FFT: ") == "True" else False
|
|
35
|
+
show_decompose = True if input("Show Decompose: ") == "True" else False
|
|
36
|
+
daily_seasonality = True if input("Daily Seasonality: ") == "True" else False
|
|
37
|
+
frequency = input("Frequency: ") or "D"
|
|
38
|
+
|
|
39
|
+
print ("Given Parameters: ", datafile, split_percentage, show_fft, show_decompose, daily_seasonality, frequency)
|
|
40
|
+
|
|
41
|
+
# =============================================================================
|
|
42
|
+
# Function to check if the first column contains dates
|
|
43
|
+
def is_first_column_date(df):
|
|
44
|
+
first_row_first_col = df.iloc[1, 0]
|
|
45
|
+
|
|
46
|
+
# Regex to check if the first row of the first column is a date
|
|
47
|
+
date_regex = re.compile(r'\d{4}-\d{2}-\d{2}')
|
|
48
|
+
# Regex to check if the first row of the first column is a datetime
|
|
49
|
+
datetime_regex = re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
|
|
50
|
+
|
|
51
|
+
is_date = bool(date_regex.match(str(first_row_first_col)))
|
|
52
|
+
is_datetime = bool(datetime_regex.match(str(first_row_first_col)))
|
|
53
|
+
|
|
54
|
+
if is_date or is_datetime:
|
|
55
|
+
print ("> Date column detected.")
|
|
56
|
+
return True
|
|
57
|
+
else:
|
|
58
|
+
print ("> Date column not detected.")
|
|
59
|
+
return False
|
|
60
|
+
|
|
61
|
+
# If datafile extension is csv
|
|
62
|
+
if datafile.endswith('.csv'):
|
|
63
|
+
df_data = pd.read_csv(datafile)
|
|
64
|
+
|
|
65
|
+
elif datafile.endswith('.dat') or datafile.endswith('.txt'):
|
|
66
|
+
df_data = pd.read_csv(datafile, sep="\s+", header=None)
|
|
67
|
+
|
|
68
|
+
else:
|
|
69
|
+
print (">> Invalid datafile extension. Please provide a csv, dat or txt file.")
|
|
70
|
+
sys.exit()
|
|
71
|
+
|
|
72
|
+
print ("> Datafile loaded successfully")
|
|
73
|
+
|
|
74
|
+
# Check if the first column contains dates
|
|
75
|
+
if not is_first_column_date(df_data):
|
|
76
|
+
# Define the starting date
|
|
77
|
+
start_date = pd.to_datetime('1970-01-01')
|
|
78
|
+
|
|
79
|
+
# Create a new date column
|
|
80
|
+
df_data['Date'] = start_date + pd.to_timedelta(df_data.index, unit='D')
|
|
81
|
+
|
|
82
|
+
# Remove the original index column
|
|
83
|
+
df_data = df_data.drop(df_data.columns[0], axis=1)
|
|
84
|
+
|
|
85
|
+
# Reorder the columns
|
|
86
|
+
df_data = df_data[['Date'] + [col for col in df_data.columns if col != 'Date']]
|
|
87
|
+
|
|
88
|
+
# =============================================================================
|
|
89
|
+
#Select the columns to be used for the analysis
|
|
90
|
+
df_data = df_data.iloc[:, [col1, col2]]
|
|
91
|
+
|
|
92
|
+
# Rename second column to y_data and first column to Date
|
|
93
|
+
df_data = df_data.rename(columns={df_data.columns[0]: "Date", df_data.columns[1]: "y_data"})
|
|
94
|
+
|
|
95
|
+
# Convert the Date column to datetime
|
|
96
|
+
df_data["Date"] = pd.to_datetime(df_data["Date"])
|
|
97
|
+
|
|
98
|
+
# Split the data by percentage
|
|
99
|
+
split_index = int(len(df_data) * split_percentage)
|
|
100
|
+
|
|
101
|
+
# Split the data into training and test sets
|
|
102
|
+
df_data_train = df_data.iloc[:split_index]
|
|
103
|
+
df_data_test = df_data.iloc[split_index:]
|
|
104
|
+
|
|
105
|
+
# =============================================================================
|
|
106
|
+
# Detect Seasonality Period
|
|
107
|
+
y = df_data_train['y_data'].dropna().values
|
|
108
|
+
|
|
109
|
+
# Perform Fourier Transform
|
|
110
|
+
fft_result = fft(y)
|
|
111
|
+
frequencies = np.fft.fftfreq(len(y))
|
|
112
|
+
|
|
113
|
+
if show_fft:
|
|
114
|
+
# Plot the magnitude of the FFT result
|
|
115
|
+
plt.figure(figsize=(12,6))
|
|
116
|
+
plt.plot(np.abs(frequencies), np.abs(fft_result))
|
|
117
|
+
plt.xlabel('Frequency', fontsize=18)
|
|
118
|
+
plt.ylabel('Magnitude', fontsize=18)
|
|
119
|
+
plt.title('Fourier Transform', fontsize=18)
|
|
120
|
+
plt.tick_params(axis='both', which='major', labelsize=16)
|
|
121
|
+
plt.tight_layout()
|
|
122
|
+
plt.show()
|
|
123
|
+
|
|
124
|
+
# Filter out zero and negative frequencies
|
|
125
|
+
positive_frequencies = frequencies[frequencies > 0]
|
|
126
|
+
positive_magnitudes = np.abs(fft_result)[frequencies > 0]
|
|
127
|
+
|
|
128
|
+
# Find peaks in the FFT magnitude spectrum
|
|
129
|
+
peaks, _ = find_peaks(positive_magnitudes)
|
|
130
|
+
|
|
131
|
+
# If no peaks are found, use the maximum frequency
|
|
132
|
+
if len(peaks) == 0:
|
|
133
|
+
peak_frequency = positive_frequencies[np.argmax(positive_magnitudes)]
|
|
134
|
+
|
|
135
|
+
else:
|
|
136
|
+
peak_frequency = positive_frequencies[peaks[np.argmax(positive_magnitudes[peaks])]]
|
|
137
|
+
|
|
138
|
+
# Calculate seasonality period
|
|
139
|
+
seasonality_period = int(1 / peak_frequency) if peak_frequency != 0 else None
|
|
140
|
+
print("Detected seasonality period:", seasonality_period)
|
|
141
|
+
|
|
142
|
+
# =============================================================================
|
|
143
|
+
if show_decompose:
|
|
144
|
+
try:
|
|
145
|
+
decompose = seasonal_decompose(df_data_train.y_data)
|
|
146
|
+
except:
|
|
147
|
+
print ("Decompose failed due to missing values. Interpolating missing values")
|
|
148
|
+
df_data_train.loc[df_data_train['y_data'].isnull(), 'y_data'] = df_data_train['y_data'].interpolate()
|
|
149
|
+
|
|
150
|
+
decompose = seasonal_decompose(df_data_train.y_data, model='additive', extrapolate_trend='freq', period=seasonality_period)
|
|
151
|
+
|
|
152
|
+
# Plot the decomposed time series
|
|
153
|
+
plt.figure(figsize=(12,6))
|
|
154
|
+
plt.subplot(411)
|
|
155
|
+
plt.plot(df_data_train.y_data, label='Original', color="green")
|
|
156
|
+
plt.legend(loc='upper left', frameon=False)
|
|
157
|
+
plt.subplot(412)
|
|
158
|
+
plt.plot(decompose.trend, label='Trend', color="blue")
|
|
159
|
+
plt.legend(loc='upper left', frameon=False)
|
|
160
|
+
plt.subplot(413)
|
|
161
|
+
plt.plot(decompose.seasonal,label='Seasonality', color="magenta")
|
|
162
|
+
plt.legend(loc='upper left', frameon=False)
|
|
163
|
+
plt.subplot(414)
|
|
164
|
+
plt.plot(decompose.resid, label='Residuals', color="orange")
|
|
165
|
+
plt.axhline(0, linestyle='--', color='gray')
|
|
166
|
+
plt.legend(loc='upper left', frameon=False)
|
|
167
|
+
plt.tight_layout()
|
|
168
|
+
|
|
169
|
+
# =============================================================================
|
|
170
|
+
df_train_prophet = df_data_train.copy()
|
|
171
|
+
|
|
172
|
+
# Date variable needs to be named "ds" for prophet
|
|
173
|
+
df_train_prophet = df_train_prophet.rename(columns={"Date": "ds"})
|
|
174
|
+
|
|
175
|
+
# Target variable needs to be named "y" for prophet
|
|
176
|
+
df_train_prophet = df_train_prophet.rename(columns={"y_data": "y"})
|
|
177
|
+
|
|
178
|
+
model_prophet = Prophet(daily_seasonality=daily_seasonality)
|
|
179
|
+
model_prophet.fit(df_train_prophet)
|
|
180
|
+
|
|
181
|
+
period_in_future = len(df_data_test)
|
|
182
|
+
df_future = model_prophet.make_future_dataframe(periods=period_in_future, freq=frequency)
|
|
183
|
+
|
|
184
|
+
forecast_prophet = model_prophet.predict(df_future)
|
|
185
|
+
print ("Forecasted values:")
|
|
186
|
+
print ('='*70)
|
|
187
|
+
print(forecast_prophet[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].round().tail())
|
|
188
|
+
print ('='*70)
|
|
189
|
+
|
|
190
|
+
# =============================================================================
|
|
191
|
+
# Plot the time series
|
|
192
|
+
forecast_plot = model_prophet.plot(forecast_prophet, figsize=(12, 6))
|
|
193
|
+
|
|
194
|
+
# Add a vertical line at the end of the training period
|
|
195
|
+
axes = forecast_plot.gca()
|
|
196
|
+
last_training_date = forecast_prophet['ds'].iloc[-period_in_future]
|
|
197
|
+
axes.axvline(x=last_training_date, color='red', linestyle='--', label='Training End')
|
|
198
|
+
|
|
199
|
+
plt.plot(df_data_test['Date'], df_data_test['y_data'], 'ro', markersize=3, label='True Test Data')
|
|
200
|
+
plt.xlabel('Date', fontsize=18)
|
|
201
|
+
plt.ylabel('y', fontsize=18)
|
|
202
|
+
plt.tick_params(axis='both', which='major', labelsize=16)
|
|
203
|
+
plt.legend(frameon=False, fontsize=14)
|
|
204
|
+
|
|
205
|
+
plt.tight_layout()
|
|
206
|
+
plt.show()
|
|
207
|
+
|
|
208
|
+
# =============================================================================
|
|
209
|
+
# Save the forecasted values
|
|
210
|
+
save_forecast = input("Save Forecasted Values (True/False):")
|
|
211
|
+
if save_forecast == "True" or save_forecast == "true" or save_forecast == "T" or save_forecast == "t":
|
|
212
|
+
forecast_prophet.to_csv("forecast_prophet.csv", index=False)
|
|
213
|
+
print ("Forecasted values saved as forecast_prophet.csv")
|
|
214
|
+
plt.savefig("forecast_prophet.png")
|