myawesomepkg 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,215 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 4.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1DnqwmMcr1ypxLue43og-x_3DEDaI1g7Q
8
+
9
+ # Aim: Working with stationary and non stationary timeseries
10
+
11
+ # Checks for Stationarity
12
+ There are many methods to check whether a time series (direct observations, residuals, otherwise)
13
+ is stationary or non-stationary.
14
+
15
+ **A] Look at Plots:** You can review a time series plot of your data and visually check if there are any obvious trends or seasonality.
16
+
17
+ **Stationary Time Series**
18
+ """
19
+
20
+ # load time series data
21
+ from pandas import read_csv
22
+ from matplotlib import pyplot
23
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
24
+ squeeze=True)
25
+ series.plot()
26
+ pyplot.show()
27
+
28
+ """**Non-Stationary Time Series**"""
29
+
30
+ # load time series data
31
+ from pandas import read_csv
32
+ from matplotlib import pyplot
33
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
34
+ squeeze=True)
35
+ series.plot()
36
+ pyplot.show()
37
+
38
+ """**B] Summary Statistics:** You can review the summary statistics for your data for seasons or random partitions and check for obvious or significant differences
39
+
40
+
41
+ """
42
+
43
+ # plot a histogram of a time series
44
+ from pandas import read_csv
45
+ from matplotlib import pyplot
46
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
47
+ squeeze=True)
48
+ series.hist()
49
+ pyplot.show()
50
+
51
+ """We clearly see the bell curve-like shape of the Gaussian distribution, perhaps with a longer right tail.
52
+
53
+ we can split the time series into two contiguous sequences. We can then calculate the mean and variance of each group of numbers and compare the values.
54
+ """
55
+
56
+ # calculate statistics of partitioned time series data
57
+ from pandas import read_csv
58
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
59
+
60
+ X = series
61
+ 2))
62
+
63
+ """This example shows that the mean and variance values are different, but in the
64
+ same ball-park(range).
65
+ """
66
+
67
+ # calculate statistics of partitioned time series data
68
+ from pandas import read_csv
69
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
70
+ squeeze=True)
71
+ X = series.values
72
+ split = int(len(X) / 2)
73
+ X1, X2 = X[0:split], X[split:]
74
+ mean1, mean2 = X1.mean(), X2.mean()
75
+ var1, var2 = X1.var(), X2.var()
76
+ print('mean1=%f, mean2=%f' % (mean1, mean2))
77
+ print('variance1=%f, variance2=%f' % (var1, var2))
78
+
79
+ """The example, we can see the mean and variance look very different. We have a
80
+ non-stationary time series.
81
+
82
+ **C] Statistical Tests:** You can use statistical tests to check if the expectations of stationarity are met or have been violated
83
+
84
+ Calculating the Augmented Dickey-Fuller test on the Daily Female Births dataset.
85
+ """
86
+
87
+ # calculate stationarity test of time series data
88
+ from pandas import read_csv
89
+ from statsmodels.tsa.stattools import adfuller
90
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
91
+ squeeze=True)
92
+ X = series.values
93
+ result = adfuller(X)
94
+ print('ADF Statistic: %f' % result[0])
95
+ print('p-value: %f' % result[1])
96
+ print('Critical Values:')
97
+ for key, value in result[4].items():
98
+ print('\t%s: %.3f' % (key, value))
99
+
100
+ """Running the example prints the test statistic value of -4. The more negative this statistic,
101
+ the more likely we are to reject the null hypothesis (we have a stationary dataset). As part of
102
+ the output, we get a look-up table to help determine the ADF statistic. We can see that our
103
+ statistic value of -4 is less than the value of -3.449 at 1%.
104
+ This suggests that we can reject the null hypothesis with a significance level of less than 1%
105
+ (i.e. a low probability that the result is a statistical fluke). Rejecting the null hypothesis means
106
+ that the process has no unit root, and in turn that the time series is stationary or does not have
107
+ time-dependent structure.
108
+
109
+ """
110
+
111
+ #Importing the libraries:
112
+
113
+ from statsmodels.tsa.stattools import adfuller
114
+ import pandas as pd
115
+ import numpy as np
116
+
117
+ #Reading the airline-passengers data
118
+
119
+ data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
120
+
121
+ #Checking for some values of the data.
122
+
123
+ data.head()
124
+
125
+ #Plotting the data.
126
+
127
+ data.plot(figsize=(14,8), title='data series')
128
+
129
+ #Taking out the passengers number as a series.
130
+
131
+ series = data['#Passengers'].values
132
+ #print(series)
133
+
134
+ #Performing the ADF test on the series:
135
+
136
+ # ADF Test
137
+ result = adfuller(series, autolag='AIC')
138
+ #Extracting the values from the results:
139
+
140
+ print('ADF Statistic: %f' % result[0])
141
+
142
+ print('p-value: %f' % result[1])
143
+
144
+ print('Critical Values:')
145
+
146
+ for key, value in result[4].items():
147
+ print('\t%s: %.3f' % (key, value))
148
+ if result[0] < result[4]["5%"]:
149
+ print ("Reject Ho - Time Series is Stationary")
150
+ else:
151
+ print ("Failed to Reject Ho - Time Series is Non-Stationary")
152
+
153
+ """The test statistic is positive,
154
+ meaning we are much less likely to reject the null hypothesis (it looks non-stationary). Comparing
155
+ the test statistic to the critical values, it looks like we would have to fail to reject the null
156
+ hypothesis that the time series is non-stationary and does have time-dependent structure.
157
+
158
+ """
159
+
160
+ #Kwiatkowski Phillips Schmidt Shin (KPSS) test:
161
+
162
+ #Importing the libraries:
163
+
164
+ from statsmodels.tsa.stattools import kpss
165
+ import pandas as pd
166
+ import numpy as np
167
+ import warnings
168
+ warnings.filterwarnings("ignore")
169
+
170
+ result_kpss_ct=kpss(series,regression="ct")
171
+ print('Test Statistic: %f' %result_kpss_ct[0])
172
+ print('p-value: %f' %result_kpss_ct[1])
173
+ print('Critical values:')
174
+ for key, value in result_kpss_ct[3].items():
175
+ print('\t%s: %.3f' %(key, value))
176
+
177
+ """As the test statistics value is greater than the critical value, the null hypothesis is rejected. This indicates that the data is non-stationary."""
178
+
179
+ #Loading the data.
180
+
181
+ path = '/content/daily-min-temperatures.csv'
182
+ data = pd.read_csv(path, index_col='Date')
183
+
184
+ #Checking for some head values of the data:
185
+
186
+ data.head()
187
+
188
+ #Plotting the data.
189
+
190
+ data.plot(figsize=(14,8), title='temperature data series')
191
+
192
+ #Extracting temperature in a series.
193
+
194
+ series = data['Temp'].values
195
+ series
196
+
197
+ #Performing ADF test.
198
+
199
+ result = adfuller(series, autolag='AIC')
200
+
201
+ #Checking the results:
202
+
203
+ print('ADF Statistic: %f' % result[0])
204
+
205
+ print('p-value: %f' % result[1])
206
+
207
+ print('Critical Values:')
208
+
209
+ for key, value in result[4].items():
210
+ print('\t%s: %.3f' % (key, value))
211
+ if result[0] > result[4]["5%"]:
212
+ print ("Reject Ho - Time Series is Stationary")
213
+ else:
214
+ print ("Failed to Reject Ho - Time Series is Stationary")
215
+
@@ -0,0 +1,78 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 4B.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1BIAOhJMWQry0k6SDmofT9ESz5f45YX4f
8
+ """
9
+
10
+ def test_stationarity(dataFrame, var):
11
+ dataFrame['rollMean']=dataFrame[var].rolling(window=12).mean()
12
+ dataFrame['rollStd']=dataFrame[var].rolling(window=12).std()
13
+
14
+ from statsmodels.tsa.stattools import adfuller
15
+ import seaborn as sns
16
+ adfTest = adfuller(dataFrame[var], autolag='AIC')
17
+ stats=pd.Series(adfTest[0:4],index=['Test Statistic','p-value', '#lags used', 'number of observations used'])
18
+ print(stats)
19
+
20
+ for key, value in adfTest[4].items():
21
+ print('\t%s: %.3f' % (key, value))
22
+
23
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y=var)
24
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollMean')
25
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollStd')
26
+
27
+ import pandas as pd
28
+ import numpy as np
29
+
30
+ #Reading the airline-passengers data
31
+
32
+ data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
33
+
34
+ #Checking for some values of the data.
35
+
36
+ data.head()
37
+
38
+ test_stationarity(data,'Passengers')
39
+
40
+ air_df=data[['Passengers']]
41
+ air_df.head()
42
+
43
+ air_df['shift']=air_df.Passengers.shift()
44
+ air_df['shiftDiff']=air_df.Passengers - air_df['shift']
45
+ air_df.head()
46
+
47
+ test_stationarity(air_df.dropna(),'shiftDiff')
48
+
49
+ log_df=air_df[['Passengers']]
50
+ log_df['log']=np.log(log_df['Passengers'])
51
+ log_df.head()
52
+
53
+ test_stationarity(log_df,'log')
54
+
55
+ sqrt_df=air_df[['Passengers']]
56
+ sqrt_df['sqrt']=np.sqrt(air_df['Passengers'])
57
+ sqrt_df.head()
58
+
59
+ test_stationarity(sqrt_df,'sqrt')
60
+
61
+ cbrt_df=air_df[['Passengers']]
62
+ cbrt_df['cbrt']=np.cbrt(air_df['Passengers'])
63
+ cbrt_df.head()
64
+
65
+ test_stationarity(cbrt_df,'cbrt')
66
+
67
+ log_df2=log_df[['Passengers','log']]
68
+ log_df2['log_sqrt']=np.sqrt(log_df['log'])
69
+ log_df2.head()
70
+
71
+ test_stationarity(log_df2,'log_sqrt')
72
+
73
+ log_df2=log_df[['Passengers','log']]
74
+ log_df2['log_sqrt']=np.sqrt(log_df['log'])
75
+ log_df2['logShiftDiff']=log_df2['log_sqrt']-log_df2['log_sqrt'].shift()
76
+ log_df2.head()
77
+
78
+ test_stationarity(log_df2.dropna(),'logShiftDiff')
@@ -0,0 +1,39 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 5 AC and PCA.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1sGHsmswvo180eVjgUJh53fzwK1eJoZRQ
8
+
9
+ # Aim: Implementing auto correlation and partial auto-correlation on timeseries
10
+ """
11
+
12
+
13
+
14
+ # ACF plot of time series
15
+ from pandas import read_csv
16
+ from matplotlib import pyplot
17
+ #from statsmodels.graphics.tsaplots import plot_acf
18
+ from pandas.plotting import autocorrelation_plot
19
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
20
+ #plot_acf(series)
21
+ autocorrelation_plot(series)
22
+ pyplot.show()
23
+
24
+ # zoomed-in ACF plot of time series
25
+ from pandas import read_csv
26
+ from matplotlib import pyplot
27
+ from statsmodels.graphics.tsaplots import plot_acf
28
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
29
+ plot_acf(series, lags=50)
30
+ pyplot.show()
31
+
32
+ # PACF plot of time series
33
+ from pandas import read_csv
34
+ from matplotlib import pyplot
35
+ from statsmodels.graphics.tsaplots import plot_pacf
36
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,
37
+ parse_dates=True, squeeze=True)
38
+ plot_pacf(series, lags=50)
39
+ pyplot.show()
@@ -0,0 +1,37 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 6.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1xuSGmR5e0t1nj5TDFB9ASYxhiG_yKZyx
8
+
9
+ # Aim: Perform autoregression on time series data
10
+ """
11
+
12
+ # create and evaluate a static autoregressive model
13
+ from pandas import read_csv
14
+ from matplotlib import pyplot
15
+ from statsmodels.tsa.ar_model import AutoReg
16
+ from sklearn.metrics import mean_squared_error
17
+ from math import sqrt
18
+ # load dataset
19
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
20
+ # split dataset
21
+ X = series.values
22
+ train, test = X[1:len(X)-7], X[len(X)-7:]
23
+ # train autoregression
24
+ model = AutoReg(train,30)
25
+ model_fit = model.fit()
26
+ print('Lag: %s' % model_fit.ar_lags)
27
+ print('Coefficients: %s' % model_fit.params)
28
+ # make predictions
29
+ predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
30
+ for i in range(len(predictions)):
31
+ print('predicted=%f, expected=%f' % (predictions[i], test[i]))
32
+ rmse = sqrt(mean_squared_error(test, predictions))
33
+ print('Test RMSE: %.3f' % rmse)
34
+ # plot results
35
+ pyplot.plot(test)
36
+ pyplot.plot(predictions, color='red')
37
+ pyplot.show()
@@ -0,0 +1,69 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 7.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1rOzVCBXjN31-6HIb6h8zRRSH3R-MoGvG
8
+
9
+ # Aim: Forecasting using MA model.
10
+ """
11
+
12
+ # correct forecasts with a model of forecast residual errors
13
+ from pandas import read_csv
14
+ from pandas import DataFrame
15
+ from pandas import concat
16
+ from statsmodels.tsa.ar_model import AutoReg
17
+ from matplotlib import pyplot
18
+ from sklearn.metrics import mean_squared_error
19
+ from math import sqrt
20
+ # load data
21
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births-CA.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
22
+ # create lagged dataset
23
+ values = DataFrame(series.values)
24
+ dataframe = concat([values.shift(1), values], axis=1)
25
+ dataframe.columns = ['t', 't+1']
26
+ print(dataframe)
27
+ X = dataframe.values
28
+
29
+ # split into train and test sets
30
+ X = dataframe.values
31
+ train_size = int(len(X) * 0.66)
32
+ train, test = X[1:train_size], X[train_size:]
33
+ train_X, train_y = train[:,0], train[:,1]
34
+ test_X, test_y = test[:,0], test[:,1]
35
+ # persistence model on training set
36
+ train_pred = [x for x in train_X]
37
+ # calculate residuals
38
+ train_resid = [train_y[i]-train_pred[i] for i in range(len(train_pred))]
39
+ # model the training set residuals
40
+ model = AutoReg(train_resid,20)
41
+ model_fit = model.fit()
42
+ window = len(model_fit.ar_lags)
43
+ coef = model_fit.params
44
+ # walk forward over time steps in test
45
+ history = train_resid[len(train_resid)-window:]
46
+ history = [history[i] for i in range(len(history))]
47
+ predictions = list()
48
+ for t in range(len(test_y)):
49
+ # persistence
50
+ yhat = test_X[t]
51
+ error = test_y[t] - yhat
52
+ # predict error
53
+ length = len(history)
54
+ lag = [history[i] for i in range(length-window,length)]
55
+ pred_error = coef[0]
56
+ for d in range(window):
57
+ pred_error += coef[d+1] * lag[window-d-1]
58
+ # correct the prediction
59
+ yhat = yhat + pred_error
60
+ predictions.append(yhat)
61
+ history.append(error)
62
+ print('predicted=%f, expected=%f' % (yhat, test_y[t]))
63
+ # error
64
+ rmse = sqrt(mean_squared_error(test_y, predictions))
65
+ print('Test RMSE: %.3f' % rmse)
66
+ # plot predicted error
67
+ pyplot.plot(test_y)
68
+ pyplot.plot(predictions, color='red')
69
+ pyplot.show()
@@ -0,0 +1,79 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 8.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1Ai_ZPo-aPrKa7A_iQFtal5vinfQbYQvY
8
+
9
+ # Aim: Forecasting using ARIMA model
10
+
11
+ Time Series Forecasting With ARIMA Model in Python for Temperature Prediction.
12
+
13
+ **1) Reading Time Series Data in Python using Pandas library**
14
+ """
15
+
16
+ import pandas as pd
17
+ df=pd.read_csv('/content/drive/MyDrive/MScDS TSA/MaunaLoaDailyTemps.csv',index_col='DATE',parse_dates=True)
18
+ df=df.dropna()
19
+ print('Shape of data',df.shape)
20
+ df.head()
21
+ df
22
+
23
+ """Plot Your data"""
24
+
25
+ df['AvgTemp'].plot(figsize=(12,5))
26
+
27
+ """**2) Checking for stationarity of time series model**"""
28
+
29
+ from statsmodels.tsa.stattools import adfuller
30
+ def adf_test(dataset):
31
+ dftest = adfuller(dataset, autolag = 'AIC')
32
+ print("1. ADF : ",dftest[0])
33
+ print("2. P-Value : ", dftest[1])
34
+ print("3. Num Of Lags : ", dftest[2])
35
+ print("4. Num Of Observations Used For ADF Regression:", dftest[3])
36
+ print("5. Critical Values :")
37
+ for key, val in dftest[4].items():
38
+ print("\t",key, ": ", val)
39
+ adf_test(df['AvgTemp'])
40
+
41
+ """3) Auto Arima Function to select order of Auto Regression Model"""
42
+
43
+ pip install pmdarima
44
+
45
+ from pmdarima import auto_arima
46
+ import warnings
47
+ warnings.filterwarnings("ignore")
48
+ stepwise_fit=auto_arima(df['AvgTemp'],trace=True,suppress_warnings=True)
49
+ stepwise_fit.summary()
50
+
51
+ """Split Your Dataset"""
52
+
53
+ print(df.shape)
54
+ train=df.iloc[:-30]
55
+ test=df.iloc[-30:]
56
+ print(train.shape,test.shape)
57
+
58
+ from statsmodels.tsa.arima.model import ARIMA
59
+ model=ARIMA(train['AvgTemp'],order=(1,0,5))
60
+ model=model.fit()
61
+ model.summary()
62
+
63
+ """Check How Good Your Model Is"""
64
+
65
+ start=len(train)
66
+ end=len(train)+len(test)-1
67
+ pred=model.predict(start=start,end=end,typ='levels').rename('ARIMA Predictions')
68
+ print(pred)
69
+ pred.index=df.index[start:end+1]
70
+ pred.plot(legend=True)
71
+ test['AvgTemp'].plot(legend=True)
72
+
73
+ """Check your Accuracy Metric"""
74
+
75
+ from sklearn.metrics import mean_squared_error
76
+ from math import sqrt
77
+ test['AvgTemp'].mean()
78
+ rmse=sqrt(mean_squared_error(pred,test['AvgTemp']))
79
+ print(rmse)