myawesomepkg 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- myawesomepkg/TSAPY1/Practical No 1.py +148 -0
- myawesomepkg/TSAPY1/Practical No 2.py +115 -0
- myawesomepkg/TSAPY1/Practical No 3.py +168 -0
- myawesomepkg/TSAPY1/Practical No 4 A.py +233 -0
- myawesomepkg/TSAPY1/Practical No 4 B.py +137 -0
- myawesomepkg/TSAPY1/Practical No 5.py +52 -0
- myawesomepkg/TSAPY1/Practical No 6.py +29 -0
- myawesomepkg/TSAPY1/Practical No 7.py +67 -0
- myawesomepkg/TSAPY1/Practical No 8.py +108 -0
- myawesomepkg/TSAPY1/__init__.py +0 -0
- myawesomepkg/TSAPY1/practical_no_3.py +167 -0
- myawesomepkg/TSAPY1/practical_no_4.py +215 -0
- myawesomepkg/TSAPY1/practical_no_4b.py +78 -0
- myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py +39 -0
- myawesomepkg/TSAPY1/practical_no_6.py +37 -0
- myawesomepkg/TSAPY1/practical_no_7.py +69 -0
- myawesomepkg/TSAPY1/practical_no_8.py +79 -0
- myawesomepkg/TSAPY1/tsa_practical_no_1.py +287 -0
- myawesomepkg/TSAPY1/tsa_practical_no_2.py +121 -0
- myawesomepkg-0.1.4.dist-info/METADATA +12 -0
- myawesomepkg-0.1.4.dist-info/RECORD +25 -0
- myawesomepkg/d.py +0 -36
- myawesomepkg-0.1.2.dist-info/METADATA +0 -7
- myawesomepkg-0.1.2.dist-info/RECORD +0 -7
- {myawesomepkg-0.1.2.dist-info → myawesomepkg-0.1.4.dist-info}/WHEEL +0 -0
- {myawesomepkg-0.1.2.dist-info → myawesomepkg-0.1.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,215 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 4.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1DnqwmMcr1ypxLue43og-x_3DEDaI1g7Q
|
8
|
+
|
9
|
+
# Aim: Working with stationary and non stationary timeseries
|
10
|
+
|
11
|
+
# Checks for Stationarity
|
12
|
+
There are many methods to check whether a time series (direct observations, residuals, otherwise)
|
13
|
+
is stationary or non-stationary.
|
14
|
+
|
15
|
+
**A] Look at Plots:** You can review a time series plot of your data and visually check if there are any obvious trends or seasonality.
|
16
|
+
|
17
|
+
**Stationary Time Series**
|
18
|
+
"""
|
19
|
+
|
20
|
+
# load time series data
|
21
|
+
from pandas import read_csv
|
22
|
+
from matplotlib import pyplot
|
23
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
|
24
|
+
squeeze=True)
|
25
|
+
series.plot()
|
26
|
+
pyplot.show()
|
27
|
+
|
28
|
+
"""**Non-Stationary Time Series**"""
|
29
|
+
|
30
|
+
# load time series data
|
31
|
+
from pandas import read_csv
|
32
|
+
from matplotlib import pyplot
|
33
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
|
34
|
+
squeeze=True)
|
35
|
+
series.plot()
|
36
|
+
pyplot.show()
|
37
|
+
|
38
|
+
"""**B] Summary Statistics:** You can review the summary statistics for your data for seasons or random partitions and check for obvious or significant differences
|
39
|
+
|
40
|
+
|
41
|
+
"""
|
42
|
+
|
43
|
+
# plot a histogram of a time series
|
44
|
+
from pandas import read_csv
|
45
|
+
from matplotlib import pyplot
|
46
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
|
47
|
+
squeeze=True)
|
48
|
+
series.hist()
|
49
|
+
pyplot.show()
|
50
|
+
|
51
|
+
"""We clearly see the bell curve-like shape of the Gaussian distribution, perhaps with a longer right tail.
|
52
|
+
|
53
|
+
we can split the time series into two contiguous sequences. We can then calculate the mean and variance of each group of numbers and compare the values.
|
54
|
+
"""
|
55
|
+
|
56
|
+
# calculate statistics of partitioned time series data
|
57
|
+
from pandas import read_csv
|
58
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
|
59
|
+
|
60
|
+
X = series
|
61
|
+
2))
|
62
|
+
|
63
|
+
"""This example shows that the mean and variance values are different, but in the
|
64
|
+
same ball-park(range).
|
65
|
+
"""
|
66
|
+
|
67
|
+
# calculate statistics of partitioned time series data
|
68
|
+
from pandas import read_csv
|
69
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
|
70
|
+
squeeze=True)
|
71
|
+
X = series.values
|
72
|
+
split = int(len(X) / 2)
|
73
|
+
X1, X2 = X[0:split], X[split:]
|
74
|
+
mean1, mean2 = X1.mean(), X2.mean()
|
75
|
+
var1, var2 = X1.var(), X2.var()
|
76
|
+
print('mean1=%f, mean2=%f' % (mean1, mean2))
|
77
|
+
print('variance1=%f, variance2=%f' % (var1, var2))
|
78
|
+
|
79
|
+
"""The example, we can see the mean and variance look very different. We have a
|
80
|
+
non-stationary time series.
|
81
|
+
|
82
|
+
**C] Statistical Tests:** You can use statistical tests to check if the expectations of stationarity are met or have been violated
|
83
|
+
|
84
|
+
Calculating the Augmented Dickey-Fuller test on the Daily Female Births dataset.
|
85
|
+
"""
|
86
|
+
|
87
|
+
# calculate stationarity test of time series data
|
88
|
+
from pandas import read_csv
|
89
|
+
from statsmodels.tsa.stattools import adfuller
|
90
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
|
91
|
+
squeeze=True)
|
92
|
+
X = series.values
|
93
|
+
result = adfuller(X)
|
94
|
+
print('ADF Statistic: %f' % result[0])
|
95
|
+
print('p-value: %f' % result[1])
|
96
|
+
print('Critical Values:')
|
97
|
+
for key, value in result[4].items():
|
98
|
+
print('\t%s: %.3f' % (key, value))
|
99
|
+
|
100
|
+
"""Running the example prints the test statistic value of -4. The more negative this statistic,
|
101
|
+
the more likely we are to reject the null hypothesis (we have a stationary dataset). As part of
|
102
|
+
the output, we get a look-up table to help determine the ADF statistic. We can see that our
|
103
|
+
statistic value of -4 is less than the value of -3.449 at 1%.
|
104
|
+
This suggests that we can reject the null hypothesis with a significance level of less than 1%
|
105
|
+
(i.e. a low probability that the result is a statistical fluke). Rejecting the null hypothesis means
|
106
|
+
that the process has no unit root, and in turn that the time series is stationary or does not have
|
107
|
+
time-dependent structure.
|
108
|
+
|
109
|
+
"""
|
110
|
+
|
111
|
+
#Importing the libraries:
|
112
|
+
|
113
|
+
from statsmodels.tsa.stattools import adfuller
|
114
|
+
import pandas as pd
|
115
|
+
import numpy as np
|
116
|
+
|
117
|
+
#Reading the airline-passengers data
|
118
|
+
|
119
|
+
data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
|
120
|
+
|
121
|
+
#Checking for some values of the data.
|
122
|
+
|
123
|
+
data.head()
|
124
|
+
|
125
|
+
#Plotting the data.
|
126
|
+
|
127
|
+
data.plot(figsize=(14,8), title='data series')
|
128
|
+
|
129
|
+
#Taking out the passengers number as a series.
|
130
|
+
|
131
|
+
series = data['#Passengers'].values
|
132
|
+
#print(series)
|
133
|
+
|
134
|
+
#Performing the ADF test on the series:
|
135
|
+
|
136
|
+
# ADF Test
|
137
|
+
result = adfuller(series, autolag='AIC')
|
138
|
+
#Extracting the values from the results:
|
139
|
+
|
140
|
+
print('ADF Statistic: %f' % result[0])
|
141
|
+
|
142
|
+
print('p-value: %f' % result[1])
|
143
|
+
|
144
|
+
print('Critical Values:')
|
145
|
+
|
146
|
+
for key, value in result[4].items():
|
147
|
+
print('\t%s: %.3f' % (key, value))
|
148
|
+
if result[0] < result[4]["5%"]:
|
149
|
+
print ("Reject Ho - Time Series is Stationary")
|
150
|
+
else:
|
151
|
+
print ("Failed to Reject Ho - Time Series is Non-Stationary")
|
152
|
+
|
153
|
+
"""The test statistic is positive,
|
154
|
+
meaning we are much less likely to reject the null hypothesis (it looks non-stationary). Comparing
|
155
|
+
the test statistic to the critical values, it looks like we would have to fail to reject the null
|
156
|
+
hypothesis that the time series is non-stationary and does have time-dependent structure.
|
157
|
+
|
158
|
+
"""
|
159
|
+
|
160
|
+
#Kwiatkowski Phillips Schmidt Shin (KPSS) test:
|
161
|
+
|
162
|
+
#Importing the libraries:
|
163
|
+
|
164
|
+
from statsmodels.tsa.stattools import kpss
|
165
|
+
import pandas as pd
|
166
|
+
import numpy as np
|
167
|
+
import warnings
|
168
|
+
warnings.filterwarnings("ignore")
|
169
|
+
|
170
|
+
result_kpss_ct=kpss(series,regression="ct")
|
171
|
+
print('Test Statistic: %f' %result_kpss_ct[0])
|
172
|
+
print('p-value: %f' %result_kpss_ct[1])
|
173
|
+
print('Critical values:')
|
174
|
+
for key, value in result_kpss_ct[3].items():
|
175
|
+
print('\t%s: %.3f' %(key, value))
|
176
|
+
|
177
|
+
"""As the test statistics value is greater than the critical value, the null hypothesis is rejected. This indicates that the data is non-stationary."""
|
178
|
+
|
179
|
+
#Loading the data.
|
180
|
+
|
181
|
+
path = '/content/daily-min-temperatures.csv'
|
182
|
+
data = pd.read_csv(path, index_col='Date')
|
183
|
+
|
184
|
+
#Checking for some head values of the data:
|
185
|
+
|
186
|
+
data.head()
|
187
|
+
|
188
|
+
#Plotting the data.
|
189
|
+
|
190
|
+
data.plot(figsize=(14,8), title='temperature data series')
|
191
|
+
|
192
|
+
#Extracting temperature in a series.
|
193
|
+
|
194
|
+
series = data['Temp'].values
|
195
|
+
series
|
196
|
+
|
197
|
+
#Performing ADF test.
|
198
|
+
|
199
|
+
result = adfuller(series, autolag='AIC')
|
200
|
+
|
201
|
+
#Checking the results:
|
202
|
+
|
203
|
+
print('ADF Statistic: %f' % result[0])
|
204
|
+
|
205
|
+
print('p-value: %f' % result[1])
|
206
|
+
|
207
|
+
print('Critical Values:')
|
208
|
+
|
209
|
+
for key, value in result[4].items():
|
210
|
+
print('\t%s: %.3f' % (key, value))
|
211
|
+
if result[0] > result[4]["5%"]:
|
212
|
+
print ("Reject Ho - Time Series is Stationary")
|
213
|
+
else:
|
214
|
+
print ("Failed to Reject Ho - Time Series is Stationary")
|
215
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 4B.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1BIAOhJMWQry0k6SDmofT9ESz5f45YX4f
|
8
|
+
"""
|
9
|
+
|
10
|
+
def test_stationarity(dataFrame, var):
|
11
|
+
dataFrame['rollMean']=dataFrame[var].rolling(window=12).mean()
|
12
|
+
dataFrame['rollStd']=dataFrame[var].rolling(window=12).std()
|
13
|
+
|
14
|
+
from statsmodels.tsa.stattools import adfuller
|
15
|
+
import seaborn as sns
|
16
|
+
adfTest = adfuller(dataFrame[var], autolag='AIC')
|
17
|
+
stats=pd.Series(adfTest[0:4],index=['Test Statistic','p-value', '#lags used', 'number of observations used'])
|
18
|
+
print(stats)
|
19
|
+
|
20
|
+
for key, value in adfTest[4].items():
|
21
|
+
print('\t%s: %.3f' % (key, value))
|
22
|
+
|
23
|
+
sns.lineplot(data=dataFrame, x=dataFrame.index, y=var)
|
24
|
+
sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollMean')
|
25
|
+
sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollStd')
|
26
|
+
|
27
|
+
import pandas as pd
|
28
|
+
import numpy as np
|
29
|
+
|
30
|
+
#Reading the airline-passengers data
|
31
|
+
|
32
|
+
data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
|
33
|
+
|
34
|
+
#Checking for some values of the data.
|
35
|
+
|
36
|
+
data.head()
|
37
|
+
|
38
|
+
test_stationarity(data,'Passengers')
|
39
|
+
|
40
|
+
air_df=data[['Passengers']]
|
41
|
+
air_df.head()
|
42
|
+
|
43
|
+
air_df['shift']=air_df.Passengers.shift()
|
44
|
+
air_df['shiftDiff']=air_df.Passengers - air_df['shift']
|
45
|
+
air_df.head()
|
46
|
+
|
47
|
+
test_stationarity(air_df.dropna(),'shiftDiff')
|
48
|
+
|
49
|
+
log_df=air_df[['Passengers']]
|
50
|
+
log_df['log']=np.log(log_df['Passengers'])
|
51
|
+
log_df.head()
|
52
|
+
|
53
|
+
test_stationarity(log_df,'log')
|
54
|
+
|
55
|
+
sqrt_df=air_df[['Passengers']]
|
56
|
+
sqrt_df['sqrt']=np.sqrt(air_df['Passengers'])
|
57
|
+
sqrt_df.head()
|
58
|
+
|
59
|
+
test_stationarity(sqrt_df,'sqrt')
|
60
|
+
|
61
|
+
cbrt_df=air_df[['Passengers']]
|
62
|
+
cbrt_df['cbrt']=np.cbrt(air_df['Passengers'])
|
63
|
+
cbrt_df.head()
|
64
|
+
|
65
|
+
test_stationarity(cbrt_df,'cbrt')
|
66
|
+
|
67
|
+
log_df2=log_df[['Passengers','log']]
|
68
|
+
log_df2['log_sqrt']=np.sqrt(log_df['log'])
|
69
|
+
log_df2.head()
|
70
|
+
|
71
|
+
test_stationarity(log_df2,'log_sqrt')
|
72
|
+
|
73
|
+
log_df2=log_df[['Passengers','log']]
|
74
|
+
log_df2['log_sqrt']=np.sqrt(log_df['log'])
|
75
|
+
log_df2['logShiftDiff']=log_df2['log_sqrt']-log_df2['log_sqrt'].shift()
|
76
|
+
log_df2.head()
|
77
|
+
|
78
|
+
test_stationarity(log_df2.dropna(),'logShiftDiff')
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 5 AC and PCA.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1sGHsmswvo180eVjgUJh53fzwK1eJoZRQ
|
8
|
+
|
9
|
+
# Aim: Implementing auto correlation and partial auto-correlation on timeseries
|
10
|
+
"""
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
# ACF plot of time series
|
15
|
+
from pandas import read_csv
|
16
|
+
from matplotlib import pyplot
|
17
|
+
#from statsmodels.graphics.tsaplots import plot_acf
|
18
|
+
from pandas.plotting import autocorrelation_plot
|
19
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
|
20
|
+
#plot_acf(series)
|
21
|
+
autocorrelation_plot(series)
|
22
|
+
pyplot.show()
|
23
|
+
|
24
|
+
# zoomed-in ACF plot of time series
|
25
|
+
from pandas import read_csv
|
26
|
+
from matplotlib import pyplot
|
27
|
+
from statsmodels.graphics.tsaplots import plot_acf
|
28
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
|
29
|
+
plot_acf(series, lags=50)
|
30
|
+
pyplot.show()
|
31
|
+
|
32
|
+
# PACF plot of time series
|
33
|
+
from pandas import read_csv
|
34
|
+
from matplotlib import pyplot
|
35
|
+
from statsmodels.graphics.tsaplots import plot_pacf
|
36
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,
|
37
|
+
parse_dates=True, squeeze=True)
|
38
|
+
plot_pacf(series, lags=50)
|
39
|
+
pyplot.show()
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 6.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1xuSGmR5e0t1nj5TDFB9ASYxhiG_yKZyx
|
8
|
+
|
9
|
+
# Aim: Perform autoregression on time series data
|
10
|
+
"""
|
11
|
+
|
12
|
+
# create and evaluate a static autoregressive model
|
13
|
+
from pandas import read_csv
|
14
|
+
from matplotlib import pyplot
|
15
|
+
from statsmodels.tsa.ar_model import AutoReg
|
16
|
+
from sklearn.metrics import mean_squared_error
|
17
|
+
from math import sqrt
|
18
|
+
# load dataset
|
19
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
|
20
|
+
# split dataset
|
21
|
+
X = series.values
|
22
|
+
train, test = X[1:len(X)-7], X[len(X)-7:]
|
23
|
+
# train autoregression
|
24
|
+
model = AutoReg(train,30)
|
25
|
+
model_fit = model.fit()
|
26
|
+
print('Lag: %s' % model_fit.ar_lags)
|
27
|
+
print('Coefficients: %s' % model_fit.params)
|
28
|
+
# make predictions
|
29
|
+
predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
|
30
|
+
for i in range(len(predictions)):
|
31
|
+
print('predicted=%f, expected=%f' % (predictions[i], test[i]))
|
32
|
+
rmse = sqrt(mean_squared_error(test, predictions))
|
33
|
+
print('Test RMSE: %.3f' % rmse)
|
34
|
+
# plot results
|
35
|
+
pyplot.plot(test)
|
36
|
+
pyplot.plot(predictions, color='red')
|
37
|
+
pyplot.show()
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 7.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1rOzVCBXjN31-6HIb6h8zRRSH3R-MoGvG
|
8
|
+
|
9
|
+
# Aim: Forecasting using MA model.
|
10
|
+
"""
|
11
|
+
|
12
|
+
# correct forecasts with a model of forecast residual errors
|
13
|
+
from pandas import read_csv
|
14
|
+
from pandas import DataFrame
|
15
|
+
from pandas import concat
|
16
|
+
from statsmodels.tsa.ar_model import AutoReg
|
17
|
+
from matplotlib import pyplot
|
18
|
+
from sklearn.metrics import mean_squared_error
|
19
|
+
from math import sqrt
|
20
|
+
# load data
|
21
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births-CA.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
|
22
|
+
# create lagged dataset
|
23
|
+
values = DataFrame(series.values)
|
24
|
+
dataframe = concat([values.shift(1), values], axis=1)
|
25
|
+
dataframe.columns = ['t', 't+1']
|
26
|
+
print(dataframe)
|
27
|
+
X = dataframe.values
|
28
|
+
|
29
|
+
# split into train and test sets
|
30
|
+
X = dataframe.values
|
31
|
+
train_size = int(len(X) * 0.66)
|
32
|
+
train, test = X[1:train_size], X[train_size:]
|
33
|
+
train_X, train_y = train[:,0], train[:,1]
|
34
|
+
test_X, test_y = test[:,0], test[:,1]
|
35
|
+
# persistence model on training set
|
36
|
+
train_pred = [x for x in train_X]
|
37
|
+
# calculate residuals
|
38
|
+
train_resid = [train_y[i]-train_pred[i] for i in range(len(train_pred))]
|
39
|
+
# model the training set residuals
|
40
|
+
model = AutoReg(train_resid,20)
|
41
|
+
model_fit = model.fit()
|
42
|
+
window = len(model_fit.ar_lags)
|
43
|
+
coef = model_fit.params
|
44
|
+
# walk forward over time steps in test
|
45
|
+
history = train_resid[len(train_resid)-window:]
|
46
|
+
history = [history[i] for i in range(len(history))]
|
47
|
+
predictions = list()
|
48
|
+
for t in range(len(test_y)):
|
49
|
+
# persistence
|
50
|
+
yhat = test_X[t]
|
51
|
+
error = test_y[t] - yhat
|
52
|
+
# predict error
|
53
|
+
length = len(history)
|
54
|
+
lag = [history[i] for i in range(length-window,length)]
|
55
|
+
pred_error = coef[0]
|
56
|
+
for d in range(window):
|
57
|
+
pred_error += coef[d+1] * lag[window-d-1]
|
58
|
+
# correct the prediction
|
59
|
+
yhat = yhat + pred_error
|
60
|
+
predictions.append(yhat)
|
61
|
+
history.append(error)
|
62
|
+
print('predicted=%f, expected=%f' % (yhat, test_y[t]))
|
63
|
+
# error
|
64
|
+
rmse = sqrt(mean_squared_error(test_y, predictions))
|
65
|
+
print('Test RMSE: %.3f' % rmse)
|
66
|
+
# plot predicted error
|
67
|
+
pyplot.plot(test_y)
|
68
|
+
pyplot.plot(predictions, color='red')
|
69
|
+
pyplot.show()
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 8.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1Ai_ZPo-aPrKa7A_iQFtal5vinfQbYQvY
|
8
|
+
|
9
|
+
# Aim: Forecasting using ARIMA model
|
10
|
+
|
11
|
+
Time Series Forecasting With ARIMA Model in Python for Temperature Prediction.
|
12
|
+
|
13
|
+
**1) Reading Time Series Data in Python using Pandas library**
|
14
|
+
"""
|
15
|
+
|
16
|
+
import pandas as pd
|
17
|
+
df=pd.read_csv('/content/drive/MyDrive/MScDS TSA/MaunaLoaDailyTemps.csv',index_col='DATE',parse_dates=True)
|
18
|
+
df=df.dropna()
|
19
|
+
print('Shape of data',df.shape)
|
20
|
+
df.head()
|
21
|
+
df
|
22
|
+
|
23
|
+
"""Plot Your data"""
|
24
|
+
|
25
|
+
df['AvgTemp'].plot(figsize=(12,5))
|
26
|
+
|
27
|
+
"""**2) Checking for stationarity of time series model**"""
|
28
|
+
|
29
|
+
from statsmodels.tsa.stattools import adfuller
|
30
|
+
def adf_test(dataset):
|
31
|
+
dftest = adfuller(dataset, autolag = 'AIC')
|
32
|
+
print("1. ADF : ",dftest[0])
|
33
|
+
print("2. P-Value : ", dftest[1])
|
34
|
+
print("3. Num Of Lags : ", dftest[2])
|
35
|
+
print("4. Num Of Observations Used For ADF Regression:", dftest[3])
|
36
|
+
print("5. Critical Values :")
|
37
|
+
for key, val in dftest[4].items():
|
38
|
+
print("\t",key, ": ", val)
|
39
|
+
adf_test(df['AvgTemp'])
|
40
|
+
|
41
|
+
"""3) Auto Arima Function to select order of Auto Regression Model"""
|
42
|
+
|
43
|
+
pip install pmdarima
|
44
|
+
|
45
|
+
from pmdarima import auto_arima
|
46
|
+
import warnings
|
47
|
+
warnings.filterwarnings("ignore")
|
48
|
+
stepwise_fit=auto_arima(df['AvgTemp'],trace=True,suppress_warnings=True)
|
49
|
+
stepwise_fit.summary()
|
50
|
+
|
51
|
+
"""Split Your Dataset"""
|
52
|
+
|
53
|
+
print(df.shape)
|
54
|
+
train=df.iloc[:-30]
|
55
|
+
test=df.iloc[-30:]
|
56
|
+
print(train.shape,test.shape)
|
57
|
+
|
58
|
+
from statsmodels.tsa.arima.model import ARIMA
|
59
|
+
model=ARIMA(train['AvgTemp'],order=(1,0,5))
|
60
|
+
model=model.fit()
|
61
|
+
model.summary()
|
62
|
+
|
63
|
+
"""Check How Good Your Model Is"""
|
64
|
+
|
65
|
+
start=len(train)
|
66
|
+
end=len(train)+len(test)-1
|
67
|
+
pred=model.predict(start=start,end=end,typ='levels').rename('ARIMA Predictions')
|
68
|
+
print(pred)
|
69
|
+
pred.index=df.index[start:end+1]
|
70
|
+
pred.plot(legend=True)
|
71
|
+
test['AvgTemp'].plot(legend=True)
|
72
|
+
|
73
|
+
"""Check your Accuracy Metric"""
|
74
|
+
|
75
|
+
from sklearn.metrics import mean_squared_error
|
76
|
+
from math import sqrt
|
77
|
+
test['AvgTemp'].mean()
|
78
|
+
rmse=sqrt(mean_squared_error(pred,test['AvgTemp']))
|
79
|
+
print(rmse)
|