myawesomepkg 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ PRCTICAL 4 B
2
+
3
+
4
+
5
+ def test_stationarity(dataFrame, var):
6
+ dataFrame['rollMean']=dataFrame[var].rolling(window=12).mean()
7
+ dataFrame['rollStd']=dataFrame[var].rolling(window=12).std()
8
+
9
+ from statsmodels.tsa.stattools import adfuller
10
+ import seaborn as sns
11
+ adfTest = adfuller(dataFrame[var], autolag='AIC')
12
+ stats=pd.Series(adfTest[0:4],index=['Test Statistic','p-value', '#lags used', 'number of observations used'])
13
+ print(stats)
14
+
15
+ for key, value in adfTest[4].items():
16
+ print('\t%s: %.3f' % (key, value))
17
+
18
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y=var)
19
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollMean')
20
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollStd')
21
+
22
+
23
+ ********
24
+
25
+
26
+
27
+ import pandas as pd
28
+ import numpy as np
29
+
30
+ #Reading the airline-passengers data
31
+
32
+ data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
33
+
34
+ #Checking for some values of the data.
35
+
36
+ data.head()
37
+
38
+
39
+
40
+ ************
41
+
42
+
43
+
44
+ air_df=data[['Passengers']]
45
+ air_df.head()
46
+
47
+
48
+
49
+
50
+ **********
51
+
52
+ air_df['shift']=air_df.Passengers.shift()
53
+ air_df['shiftDiff']=air_df.Passengers - air_df['shift']
54
+ air_df.head()
55
+
56
+
57
+ **********
58
+
59
+ test_stationarity(air_df.dropna(),'shiftDiff')
60
+
61
+
62
+ ***********
63
+
64
+ log_df=air_df[['Passengers']]
65
+ log_df['log']=np.log(log_df['Passengers'])
66
+ log_df.head()
67
+
68
+
69
+ ************
70
+
71
+ test_stationarity(log_df,'log')
72
+
73
+
74
+
75
+ sqrt_df=air_df[['Passengers']]
76
+ sqrt_df['sqrt']=np.sqrt(air_df['Passengers'])
77
+ sqrt_df.head()
78
+
79
+
80
+ ********
81
+
82
+ test_stationarity(sqrt_df,'sqrt')
83
+
84
+
85
+ ***********
86
+
87
+ cbrt_df=air_df[['Passengers']]
88
+ cbrt_df['cbrt']=np.cbrt(air_df['Passengers'])
89
+ cbrt_df.head()
90
+
91
+ ***********
92
+
93
+ test_stationarity(cbrt_df,'cbrt')
94
+
95
+
96
+
97
+
98
+ ************
99
+
100
+
101
+ log_df2=log_df[['Passengers','log']]
102
+ log_df2['log_sqrt']=np.sqrt(log_df['log'])
103
+ log_df2.head()
104
+
105
+ **********
106
+
107
+
108
+ test_stationarity(log_df2,'log_sqrt')
109
+
110
+
111
+
112
+ ********
113
+
114
+
115
+ log_df2=log_df[['Passengers','log']]
116
+ log_df2['log_sqrt']=np.sqrt(log_df['log'])
117
+ log_df2['logShiftDiff']=log_df2['log_sqrt']-log_df2['log_sqrt'].shift()
118
+ log_df2.head()
119
+
120
+
121
+
122
+ *********
123
+
124
+
125
+ test_stationarity(log_df2.dropna(),'logShiftDiff')
126
+
127
+ *************88
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
@@ -0,0 +1,52 @@
1
+ Aim: Implementing auto correlation and partial auto-correlation on timeseries
2
+
3
+
4
+ # ACF plot of time series
5
+ from pandas import read_csv
6
+ from matplotlib import pyplot
7
+ #from statsmodels.graphics.tsaplots import plot_acf
8
+ from pandas.plotting import autocorrelation_plot
9
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
10
+ #plot_acf(series)
11
+ autocorrelation_plot(series)
12
+ pyplot.show()
13
+
14
+
15
+
16
+
17
+ *********
18
+
19
+
20
+ # zoomed-in ACF plot of time series
21
+ from pandas import read_csv
22
+ from matplotlib import pyplot
23
+ from statsmodels.graphics.tsaplots import plot_acf
24
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
25
+ plot_acf(series, lags=50)
26
+ pyplot.show()
27
+
28
+
29
+
30
+ **************
31
+
32
+
33
+
34
+ # PACF plot of time series
35
+ from pandas import read_csv
36
+ from matplotlib import pyplot
37
+ from statsmodels.graphics.tsaplots import plot_pacf
38
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,
39
+ parse_dates=True, squeeze=True)
40
+ plot_pacf(series, lags=50)
41
+ pyplot.show()
42
+
43
+
44
+
45
+ ***************8
46
+
47
+
48
+
49
+
50
+
51
+
52
+
@@ -0,0 +1,29 @@
1
+ Aim: Perform autoregression on time series data
2
+
3
+
4
+ # create and evaluate a static autoregressive model
5
+ from pandas import read_csv
6
+ from matplotlib import pyplot
7
+ from statsmodels.tsa.ar_model import AutoReg
8
+ from sklearn.metrics import mean_squared_error
9
+ from math import sqrt
10
+ # load dataset
11
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
12
+ # split dataset
13
+ X = series.values
14
+ train, test = X[1:len(X)-7], X[len(X)-7:]
15
+ # train autoregression
16
+ model = AutoReg(train,30)
17
+ model_fit = model.fit()
18
+ print('Lag: %s' % model_fit.ar_lags)
19
+ print('Coefficients: %s' % model_fit.params)
20
+ # make predictions
21
+ predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
22
+ for i in range(len(predictions)):
23
+ print('predicted=%f, expected=%f' % (predictions[i], test[i]))
24
+ rmse = sqrt(mean_squared_error(test, predictions))
25
+ print('Test RMSE: %.3f' % rmse)
26
+ # plot results
27
+ pyplot.plot(test)
28
+ pyplot.plot(predictions, color='red')
29
+ pyplot.show()
@@ -0,0 +1,67 @@
1
+ Aim: Forecasting using MA model.
2
+
3
+
4
+ # correct forecasts with a model of forecast residual errors
5
+ from pandas import read_csv
6
+ from pandas import DataFrame
7
+ from pandas import concat
8
+ from statsmodels.tsa.ar_model import AutoReg
9
+ from matplotlib import pyplot
10
+ from sklearn.metrics import mean_squared_error
11
+ from math import sqrt
12
+ # load data
13
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births-CA.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
14
+ # create lagged dataset
15
+ values = DataFrame(series.values)
16
+ dataframe = concat([values.shift(1), values], axis=1)
17
+ dataframe.columns = ['t', 't+1']
18
+ print(dataframe)
19
+ X = dataframe.values
20
+
21
+
22
+ *******
23
+
24
+
25
+ # split into train and test sets
26
+ X = dataframe.values
27
+ train_size = int(len(X) * 0.66)
28
+ train, test = X[1:train_size], X[train_size:]
29
+ train_X, train_y = train[:,0], train[:,1]
30
+ test_X, test_y = test[:,0], test[:,1]
31
+ # persistence model on training set
32
+ train_pred = [x for x in train_X]
33
+ # calculate residuals
34
+ train_resid = [train_y[i]-train_pred[i] for i in range(len(train_pred))]
35
+ # model the training set residuals
36
+ model = AutoReg(train_resid,20)
37
+ model_fit = model.fit()
38
+ window = len(model_fit.ar_lags)
39
+ coef = model_fit.params
40
+ # walk forward over time steps in test
41
+ history = train_resid[len(train_resid)-window:]
42
+ history = [history[i] for i in range(len(history))]
43
+ predictions = list()
44
+ for t in range(len(test_y)):
45
+ # persistence
46
+ yhat = test_X[t]
47
+ error = test_y[t] - yhat
48
+ # predict error
49
+ length = len(history)
50
+ lag = [history[i] for i in range(length-window,length)]
51
+ pred_error = coef[0]
52
+ for d in range(window):
53
+ pred_error += coef[d+1] * lag[window-d-1]
54
+ # correct the prediction
55
+ yhat = yhat + pred_error
56
+ predictions.append(yhat)
57
+ history.append(error)
58
+ print('predicted=%f, expected=%f' % (yhat, test_y[t]))
59
+ # error
60
+ rmse = sqrt(mean_squared_error(test_y, predictions))
61
+ print('Test RMSE: %.3f' % rmse)
62
+ # plot predicted error
63
+ pyplot.plot(test_y)
64
+ pyplot.plot(predictions, color='red')
65
+ pyplot.show()
66
+
67
+
@@ -0,0 +1,108 @@
1
+ Aim: Forecasting using ARIMA model --TEMPERATURE
2
+ Time Series Forecasting With ARIMA Model in Python for Temperature Prediction.
3
+
4
+ 1) Reading Time Series Data in Python using Pandas library
5
+
6
+ import pandas as pd
7
+ df=pd.read_csv('/content/drive/MyDrive/MScDS TSA/MaunaLoaDailyTemps.csv',index_col='DATE',parse_dates=True)
8
+ df=df.dropna()
9
+ print('Shape of data',df.shape)
10
+ df.head()
11
+ df
12
+
13
+ **********
14
+
15
+ df['AvgTemp'].plot(figsize=(12,5))
16
+
17
+
18
+ *******
19
+
20
+ 2) Checking for stationarity of time series model
21
+
22
+
23
+ from statsmodels.tsa.stattools import adfuller
24
+ def adf_test(dataset):
25
+ dftest = adfuller(dataset, autolag = 'AIC')
26
+ print("1. ADF : ",dftest[0])
27
+ print("2. P-Value : ", dftest[1])
28
+ print("3. Num Of Lags : ", dftest[2])
29
+ print("4. Num Of Observations Used For ADF Regression:", dftest[3])
30
+ print("5. Critical Values :")
31
+ for key, val in dftest[4].items():
32
+ print("\t",key, ": ", val)
33
+ adf_test(df['AvgTemp'])
34
+
35
+
36
+ *************
37
+
38
+
39
+ Auto Arima Function to select order of Auto Regression Model
40
+
41
+ pip install pmdarima
42
+
43
+
44
+
45
+ from pmdarima import auto_arima
46
+ import warnings
47
+ warnings.filterwarnings("ignore")
48
+ stepwise_fit=auto_arima(df['AvgTemp'],trace=True,suppress_warnings=True)
49
+ stepwise_fit.summary()
50
+
51
+
52
+
53
+ ************8
54
+
55
+
56
+ Split Your Dataset
57
+
58
+ print(df.shape)
59
+ train=df.iloc[:-30]
60
+ test=df.iloc[-30:]
61
+ print(train.shape,test.shape)
62
+
63
+
64
+
65
+
66
+
67
+ from statsmodels.tsa.arima.model import ARIMA
68
+ model=ARIMA(train['AvgTemp'],order=(1,0,5))
69
+ model=model.fit()
70
+ model.summary()
71
+
72
+
73
+
74
+
75
+ **************
76
+
77
+
78
+
79
+ Check How Good Your Model Is
80
+
81
+
82
+ start=len(train)
83
+ end=len(train)+len(test)-1
84
+ pred=model.predict(start=start,end=end,typ='levels').rename('ARIMA Predictions')
85
+ print(pred)
86
+ pred.index=df.index[start:end+1]
87
+ pred.plot(legend=True)
88
+ test['AvgTemp'].plot(legend=True)
89
+
90
+
91
+
92
+
93
+
94
+ ***********8
95
+
96
+
97
+ Check your Accuracy Metric
98
+
99
+
100
+ from sklearn.metrics import mean_squared_error
101
+ from math import sqrt
102
+ test['AvgTemp'].mean()
103
+ rmse=sqrt(mean_squared_error(pred,test['AvgTemp']))
104
+ print(rmse)
105
+
106
+
107
+
108
+
File without changes
@@ -0,0 +1,167 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Practical No 3.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1aCeoKiFV3QbdraoAWb562bOjZDdBw0be
8
+
9
+ ## **Practical No 3:**
10
+ # Aim: Detrending, deseasonalizing timeseries, detecting Cyclic variations and decomposing Time Series.
11
+
12
+ ## Trend
13
+ """
14
+
15
+ # Commented out IPython magic to ensure Python compatibility.
16
+ import pandas as pd
17
+ # %matplotlib inline
18
+ from statsmodels.tsa.filters.hp_filter import hpfilter
19
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
20
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
21
+ EXINUS_trend.plot(figsize=(15,6)).autoscale(axis='x',tight=True)
22
+
23
+ """**Detrending using Differencing**"""
24
+
25
+ # Commented out IPython magic to ensure Python compatibility.
26
+ import pandas as pd
27
+ import matplotlib.pyplot as plt
28
+ import warnings
29
+ warnings.filterwarnings("ignore")
30
+ # %matplotlib inline
31
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
32
+ diff = df.EXINUS.diff()
33
+ plt.figure(figsize=(15,6))
34
+ plt.plot(diff)
35
+ plt.title('Detrending using Differencing', fontsize=16)
36
+ plt.xlabel('Year')
37
+ plt.ylabel('EXINUS exchange rate')
38
+ plt.show()
39
+
40
+ """** **bold text**Detrending using Scipy Signal**"""
41
+
42
+ # Commented out IPython magic to ensure Python compatibility.
43
+ import pandas as pd
44
+ import matplotlib.pyplot as plt
45
+ from scipy import signal
46
+ import warnings
47
+ warnings.filterwarnings("ignore")
48
+ # %matplotlib inline
49
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
50
+ detrended = signal.detrend(df.EXINUS.values)
51
+ plt.figure(figsize=(15,6))
52
+ plt.plot(detrended)
53
+ plt.xlabel('EXINUS')
54
+ plt.ylabel('Frequency')
55
+ plt.title('Detrending using Scipy Signal', fontsize=16)
56
+ plt.show()
57
+
58
+ """**Detrending using HP Filter**"""
59
+
60
+ # Commented out IPython magic to ensure Python compatibility.
61
+ import pandas as pd
62
+ import matplotlib.pyplot as plt
63
+ from statsmodels.tsa.filters.hp_filter import hpfilter
64
+ import warnings
65
+ warnings.filterwarnings("ignore")
66
+ # %matplotlib inline
67
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
68
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
69
+ df['trend'] = EXINUS_trend
70
+ detrended = df.EXINUS - df['trend']
71
+ plt.figure(figsize=(15,6))
72
+ plt.plot(detrended)
73
+ plt.title('Detrending using HP Filter', fontsize=16)
74
+ plt.xlabel('Year')
75
+ plt.ylabel('EXINUS exchange rate')
76
+ plt.show()
77
+
78
+ """## Seasonality
79
+
80
+ Methods can be used to detect seasonality:
81
+ a. Multiple box plots
82
+ b. Autocorrelation plots
83
+
84
+ A. **Multi Month-wise Box Plot**
85
+ """
86
+
87
+ # Commented out IPython magic to ensure Python compatibility.
88
+ import pandas as pd
89
+ import seaborn as sns
90
+ import matplotlib.pyplot as plt
91
+ from statsmodels.tsa.filters.hp_filter import hpfilter
92
+ import warnings
93
+ warnings.filterwarnings("ignore")
94
+ # %matplotlib inline
95
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',parse_dates=True)
96
+ df['month'] = df['observation_date'].dt.strftime('%b')
97
+ df['year'] = [d.year for d in df.observation_date]
98
+ df['month'] = [d.strftime('%b') for d in df.observation_date]
99
+ years = df['year'].unique()
100
+ plt.figure(figsize=(15,6))
101
+ sns.boxplot(x='month', y='EXINUS', data=df).set_title("Multi Month-wise Box Plot")
102
+ plt.show()
103
+
104
+ """B. **Autocorrelation plot for seasonality**"""
105
+
106
+ # Commented out IPython magic to ensure Python compatibility.
107
+ from pandas.plotting import autocorrelation_plot
108
+ import pandas as pd
109
+ import matplotlib.pyplot as plt
110
+ # %matplotlib inline
111
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
112
+ #plt.rcParams.update({'figure.figsize':(15,6), 'figure.dpi':220})
113
+ autocorrelation_plot(df.EXINUS.tolist())
114
+
115
+ """**Deseasoning Time series**"""
116
+
117
+ # Commented out IPython magic to ensure Python compatibility.
118
+ import pandas as pd
119
+ import matplotlib.pyplot as plt
120
+ from statsmodels.tsa.seasonal import seasonal_decompose
121
+ import warnings
122
+ warnings.filterwarnings("ignore")
123
+ # %matplotlib inline
124
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
125
+ result_mul = seasonal_decompose(df['EXINUS'], model='multiplicative', extrapolate_trend='freq')
126
+ deseason = df['EXINUS'] - result_mul.seasonal
127
+ plt.figure(figsize=(15,6))
128
+ plt.plot(deseason)
129
+ plt.title('Deseasoning using seasonal_decompose', fontsize=16)
130
+ plt.xlabel('Year')
131
+ plt.ylabel('EXINUS exchange rate')
132
+ plt.show()
133
+
134
+ """**Detecting cyclical variation**"""
135
+
136
+ # Commented out IPython magic to ensure Python compatibility.
137
+ from statsmodels.tsa.filters.hp_filter import hpfilter
138
+ import pandas as pd
139
+ import matplotlib.pyplot as plt
140
+ import warnings
141
+ warnings.filterwarnings("ignore")
142
+ # %matplotlib inline
143
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
144
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
145
+ df['cycle'] =EXINUS_cycle
146
+ df['trend'] =EXINUS_trend
147
+ df[['cycle']].plot(figsize=(15,6)).autoscale(axis='x',tight=True)
148
+ plt.title('Extracting Cyclic Variations', fontsize=16)
149
+ plt.xlabel('Year')
150
+ plt.ylabel('EXINUS exchange rate')
151
+ plt.show()
152
+
153
+ """**Decompose Time series**"""
154
+
155
+ # Commented out IPython magic to ensure Python compatibility.
156
+ from statsmodels.tsa.seasonal import seasonal_decompose
157
+ import pandas as pd
158
+ import matplotlib.pyplot as plt
159
+ import warnings
160
+ warnings.filterwarnings("ignore")
161
+ # %matplotlib inline
162
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',
163
+ index_col=0,parse_dates=True)
164
+ result = seasonal_decompose(df['EXINUS'], model='add')
165
+ result.plot();
166
+ result = seasonal_decompose(df['EXINUS'], model='mul')
167
+ result.plot();