multifunctionplotter 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,214 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from scipy.fft import fft
4
+ from scipy.signal import find_peaks
5
+ from prophet import Prophet
6
+ from statsmodels.tsa.seasonal import seasonal_decompose
7
+ import matplotlib.pyplot as plt
8
+ import sys, re, warnings
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ # =============================================================================
13
+ print ("Prophet Prediction")
14
+ print ("="*70)
15
+ print ("Prophet is a procedure for forecasting time series data based on an additive model where non-linear trends are fit with yearly, weekly, and daily seasonality, plus holiday effects. It works best with time series that have strong seasonal effects and several seasons of historical data. Prophet is robust to missing data and shifts in the trend, and typically handles outliers well.")
16
+ print ("="*70)
17
+ print ("Parameters to include: 'datafile.csv u 0:4', 'split_percentage=0.8', 'show_fft=False', 'show_decompose=False', 'daily_seasonality=True, 'frequency'=D")
18
+ print ("="*70)
19
+
20
+ # =============================================================================
21
+ # Take user input otherwise use default values except for datafile
22
+ data_stx = input("Data Syntax: ")
23
+
24
+ # Parse the data syntax into datafile, col1, col2
25
+ datafile = data_stx.split(" ")[0]
26
+ col1 = int(data_stx.split(" ")[2].split(":")[0])-1
27
+ col2 = int(data_stx.split(" ")[2].split(":")[1])-1
28
+
29
+ try:
30
+ split_percentage = float(input("Split Percentage: "))
31
+ except ValueError:
32
+ split_percentage = 0.8
33
+
34
+ show_fft = True if input("Show FFT: ") == "True" else False
35
+ show_decompose = True if input("Show Decompose: ") == "True" else False
36
+ daily_seasonality = True if input("Daily Seasonality: ") == "True" else False
37
+ frequency = input("Frequency: ") or "D"
38
+
39
+ print ("Given Parameters: ", datafile, split_percentage, show_fft, show_decompose, daily_seasonality, frequency)
40
+
41
+ # =============================================================================
42
+ # Function to check if the first column contains dates
43
+ def is_first_column_date(df):
44
+ first_row_first_col = df.iloc[1, 0]
45
+
46
+ # Regex to check if the first row of the first column is a date
47
+ date_regex = re.compile(r'\d{4}-\d{2}-\d{2}')
48
+ # Regex to check if the first row of the first column is a datetime
49
+ datetime_regex = re.compile(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}')
50
+
51
+ is_date = bool(date_regex.match(str(first_row_first_col)))
52
+ is_datetime = bool(datetime_regex.match(str(first_row_first_col)))
53
+
54
+ if is_date or is_datetime:
55
+ print ("> Date column detected.")
56
+ return True
57
+ else:
58
+ print ("> Date column not detected.")
59
+ return False
60
+
61
+ # If datafile extension is csv
62
+ if datafile.endswith('.csv'):
63
+ df_data = pd.read_csv(datafile)
64
+
65
+ elif datafile.endswith('.dat') or datafile.endswith('.txt'):
66
+ df_data = pd.read_csv(datafile, sep="\s+", header=None)
67
+
68
+ else:
69
+ print (">> Invalid datafile extension. Please provide a csv, dat or txt file.")
70
+ sys.exit()
71
+
72
+ print ("> Datafile loaded successfully")
73
+
74
+ # Check if the first column contains dates
75
+ if not is_first_column_date(df_data):
76
+ # Define the starting date
77
+ start_date = pd.to_datetime('1970-01-01')
78
+
79
+ # Create a new date column
80
+ df_data['Date'] = start_date + pd.to_timedelta(df_data.index, unit='D')
81
+
82
+ # Remove the original index column
83
+ df_data = df_data.drop(df_data.columns[0], axis=1)
84
+
85
+ # Reorder the columns
86
+ df_data = df_data[['Date'] + [col for col in df_data.columns if col != 'Date']]
87
+
88
+ # =============================================================================
89
+ #Select the columns to be used for the analysis
90
+ df_data = df_data.iloc[:, [col1, col2]]
91
+
92
+ # Rename second column to y_data and first column to Date
93
+ df_data = df_data.rename(columns={df_data.columns[0]: "Date", df_data.columns[1]: "y_data"})
94
+
95
+ # Convert the Date column to datetime
96
+ df_data["Date"] = pd.to_datetime(df_data["Date"])
97
+
98
+ # Split the data by percentage
99
+ split_index = int(len(df_data) * split_percentage)
100
+
101
+ # Split the data into training and test sets
102
+ df_data_train = df_data.iloc[:split_index]
103
+ df_data_test = df_data.iloc[split_index:]
104
+
105
+ # =============================================================================
106
+ # Detect Seasonality Period
107
+ y = df_data_train['y_data'].dropna().values
108
+
109
+ # Perform Fourier Transform
110
+ fft_result = fft(y)
111
+ frequencies = np.fft.fftfreq(len(y))
112
+
113
+ if show_fft:
114
+ # Plot the magnitude of the FFT result
115
+ plt.figure(figsize=(12,6))
116
+ plt.plot(np.abs(frequencies), np.abs(fft_result))
117
+ plt.xlabel('Frequency', fontsize=18)
118
+ plt.ylabel('Magnitude', fontsize=18)
119
+ plt.title('Fourier Transform', fontsize=18)
120
+ plt.tick_params(axis='both', which='major', labelsize=16)
121
+ plt.tight_layout()
122
+ plt.show()
123
+
124
+ # Filter out zero and negative frequencies
125
+ positive_frequencies = frequencies[frequencies > 0]
126
+ positive_magnitudes = np.abs(fft_result)[frequencies > 0]
127
+
128
+ # Find peaks in the FFT magnitude spectrum
129
+ peaks, _ = find_peaks(positive_magnitudes)
130
+
131
+ # If no peaks are found, use the maximum frequency
132
+ if len(peaks) == 0:
133
+ peak_frequency = positive_frequencies[np.argmax(positive_magnitudes)]
134
+
135
+ else:
136
+ peak_frequency = positive_frequencies[peaks[np.argmax(positive_magnitudes[peaks])]]
137
+
138
+ # Calculate seasonality period
139
+ seasonality_period = int(1 / peak_frequency) if peak_frequency != 0 else None
140
+ print("Detected seasonality period:", seasonality_period)
141
+
142
+ # =============================================================================
143
+ if show_decompose:
144
+ try:
145
+ decompose = seasonal_decompose(df_data_train.y_data)
146
+ except:
147
+ print ("Decompose failed due to missing values. Interpolating missing values")
148
+ df_data_train.loc[df_data_train['y_data'].isnull(), 'y_data'] = df_data_train['y_data'].interpolate()
149
+
150
+ decompose = seasonal_decompose(df_data_train.y_data, model='additive', extrapolate_trend='freq', period=seasonality_period)
151
+
152
+ # Plot the decomposed time series
153
+ plt.figure(figsize=(12,6))
154
+ plt.subplot(411)
155
+ plt.plot(df_data_train.y_data, label='Original', color="green")
156
+ plt.legend(loc='upper left', frameon=False)
157
+ plt.subplot(412)
158
+ plt.plot(decompose.trend, label='Trend', color="blue")
159
+ plt.legend(loc='upper left', frameon=False)
160
+ plt.subplot(413)
161
+ plt.plot(decompose.seasonal,label='Seasonality', color="magenta")
162
+ plt.legend(loc='upper left', frameon=False)
163
+ plt.subplot(414)
164
+ plt.plot(decompose.resid, label='Residuals', color="orange")
165
+ plt.axhline(0, linestyle='--', color='gray')
166
+ plt.legend(loc='upper left', frameon=False)
167
+ plt.tight_layout()
168
+
169
+ # =============================================================================
170
+ df_train_prophet = df_data_train.copy()
171
+
172
+ # Date variable needs to be named "ds" for prophet
173
+ df_train_prophet = df_train_prophet.rename(columns={"Date": "ds"})
174
+
175
+ # Target variable needs to be named "y" for prophet
176
+ df_train_prophet = df_train_prophet.rename(columns={"y_data": "y"})
177
+
178
+ model_prophet = Prophet(daily_seasonality=daily_seasonality)
179
+ model_prophet.fit(df_train_prophet)
180
+
181
+ period_in_future = len(df_data_test)
182
+ df_future = model_prophet.make_future_dataframe(periods=period_in_future, freq=frequency)
183
+
184
+ forecast_prophet = model_prophet.predict(df_future)
185
+ print ("Forecasted values:")
186
+ print ('='*70)
187
+ print(forecast_prophet[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].round().tail())
188
+ print ('='*70)
189
+
190
+ # =============================================================================
191
+ # Plot the time series
192
+ forecast_plot = model_prophet.plot(forecast_prophet, figsize=(12, 6))
193
+
194
+ # Add a vertical line at the end of the training period
195
+ axes = forecast_plot.gca()
196
+ last_training_date = forecast_prophet['ds'].iloc[-period_in_future]
197
+ axes.axvline(x=last_training_date, color='red', linestyle='--', label='Training End')
198
+
199
+ plt.plot(df_data_test['Date'], df_data_test['y_data'], 'ro', markersize=3, label='True Test Data')
200
+ plt.xlabel('Date', fontsize=18)
201
+ plt.ylabel('y', fontsize=18)
202
+ plt.tick_params(axis='both', which='major', labelsize=16)
203
+ plt.legend(frameon=False, fontsize=14)
204
+
205
+ plt.tight_layout()
206
+ plt.show()
207
+
208
+ # =============================================================================
209
+ # Save the forecasted values
210
+ save_forecast = input("Save Forecasted Values (True/False):")
211
+ if save_forecast == "True" or save_forecast == "true" or save_forecast == "T" or save_forecast == "t":
212
+ forecast_prophet.to_csv("forecast_prophet.csv", index=False)
213
+ print ("Forecasted values saved as forecast_prophet.csv")
214
+ plt.savefig("forecast_prophet.png")