myawesomepkg 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ Practical No 1: Aim: Handling timeseries data
2
+
3
+ A. Load and Explore Time Series Data
4
+
5
+ from pandas import read_csv
6
+ series = read_csv('/content/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True)
7
+ print(type(series))
8
+ print(series.head())
9
+
10
+ You can use the head() function to peek at the first 5 records
11
+
12
+ print(series.head(10))
13
+
14
+
15
+ Number of Observations
16
+
17
+ print(series.size)
18
+
19
+
20
+ Querying By Time
21
+
22
+ print(series.loc["1959-01"])
23
+
24
+
25
+ The describe() function creates a 7 number summary of the loaded time series including mean, standard deviation, median, minimum, and maximum of the observations
26
+
27
+ print(series.describe())
28
+
29
+
30
+
31
+ """B. Data Visualization"""
32
+
33
+
34
+ Minimum Daily Temperatures Dataset
35
+
36
+ from pandas import read_csv
37
+ from matplotlib import pyplot
38
+ series = read_csv('daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True)
39
+ print(series.head())
40
+ series=series.squeeze()
41
+ type(series)
42
+ print(series.describe())
43
+
44
+
45
+ Line Plot
46
+
47
+ series.plot()
48
+ pyplot.show()
49
+
50
+ &&&
51
+
52
+ series.plot(style='k.')
53
+ pyplot.show()
54
+
55
+ &&&
56
+
57
+ series.plot(style='k--')
58
+ pyplot.show()
59
+
60
+
61
+ A Grouper allows the user to specify a groupby instruction for an object.
62
+ The squeeze() method converts a single column DataFrame into a Series.
63
+
64
+
65
+ from pandas import read_csv
66
+ from pandas import DataFrame
67
+ from pandas import Grouper
68
+ from matplotlib import pyplot
69
+ series = read_csv('/content/daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
70
+ #print(series.head())
71
+
72
+ series=series.squeeze()
73
+ #print(series.head())
74
+ groups = series.groupby(Grouper(freq='A'))
75
+ #print(groups)
76
+ years = DataFrame()
77
+ #print(years)
78
+ for name, group in groups:
79
+ years[name.year] = group.values
80
+ print(years)
81
+ years.plot(subplots=True, legend=False)
82
+ pyplot.show()
83
+
84
+
85
+ Histogram and Density Plots
86
+
87
+ series.hist()
88
+ pyplot.show()
89
+
90
+
91
+ Generate Kernel Density Estimate plot using Gaussian kernels.
92
+
93
+ series.plot(kind='kde')
94
+ pyplot.show()
95
+
96
+
97
+ years.boxplot()
98
+ pyplot.show()
99
+
100
+
101
+ Box and Whisker Plots by Interval
102
+
103
+ from pandas import read_csv
104
+ from pandas import DataFrame
105
+ from pandas import Grouper
106
+ from matplotlib import pyplot
107
+ series = read_csv('daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
108
+ series=series.squeeze()
109
+ groups = series.groupby(Grouper(freq='A'))
110
+ years = DataFrame()
111
+ for name, group in groups:
112
+ years[name.year] = group.values
113
+ years.boxplot()
114
+ pyplot.show()
115
+
116
+
117
+ Heat Maps
118
+ from pandas import read_csv
119
+ from pandas import DataFrame
120
+ from pandas import Grouper
121
+ from matplotlib import pyplot
122
+ series = read_csv('daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
123
+ series=series.squeeze()
124
+ groups = series.groupby(Grouper(freq='A'))
125
+ years = DataFrame()
126
+ for name, group in groups:
127
+ years[name.year] = group.values
128
+ years = years.T
129
+ print(years)
130
+ pyplot.matshow(years, interpolation=None, aspect='auto')
131
+ pyplot.show()
132
+
133
+
134
+ Lag Scatter Plots
135
+
136
+ from pandas.plotting import lag_plot
137
+ lag_plot(series)
138
+ pyplot.show()
139
+
140
+
141
+ Autocorrelation Plots
142
+
143
+ from pandas.plotting import autocorrelation_plot
144
+ autocorrelation_plot(series)
145
+ pyplot.show()
146
+
147
+
148
+
@@ -0,0 +1,115 @@
1
+ Practical No 2 Aim: Implementing timeseries components
2
+ Seasonality
3
+ Trend
4
+ Pattern
5
+ Cyclic
6
+
7
+
8
+ Draw random samples from a normal (Gaussian) distribution.
9
+ upword downword horizontal and non-lenear trend
10
+
11
+ import numpy as np
12
+ import matplotlib.pyplot as plt
13
+
14
+ # Upward Trend
15
+ t = np.arange(0, 10, 0.1)
16
+ data = t + np.random.normal(0, 0.5, len(t))
17
+ plt.plot(t, data, label='Upward Trend')
18
+
19
+ # Downward Trend
20
+ t = np.arange(0, 10, 0.1)
21
+ data = -t + np.random.normal(0, 0.5, len(t))
22
+ plt.plot(t, data, label='Downward Trend')
23
+
24
+ # Horizontal Trend
25
+ t = np.arange(0, 10, 0.1)
26
+ data = np.zeros(len(t)) + np.random.normal(0, 0.5, len(t))
27
+ plt.plot(t, data, label='Horizontal Trend')
28
+
29
+ # Non-linear Trend
30
+ t = np.arange(0, 10, 0.1)
31
+ data = t**2 + np.random.normal(0, 0.5, len(t))
32
+ plt.plot(t, data, label='Non-linear Trend')
33
+
34
+ plt.legend()
35
+ plt.show()
36
+
37
+
38
+
39
+ weekly monthly yearly seasonality
40
+
41
+ import numpy as np
42
+ import matplotlib.pyplot as plt
43
+
44
+ # generate sample data with different types of seasonality
45
+ np.random.seed(1)
46
+ time = np.arange(0, 366)
47
+
48
+ # weekly seasonality
49
+ weekly_seasonality = np.sin(2 * np.pi * time / 7)
50
+ weekly_data = 5 + weekly_seasonality
51
+
52
+ # monthly seasonality
53
+ monthly_seasonality = np.sin(2 * np.pi * time / 30)
54
+ monthly_data = 5 + monthly_seasonality
55
+
56
+ # annual seasonality
57
+ annual_seasonality = np.sin(2 * np.pi * time / 365)
58
+ annual_data = 5 + annual_seasonality
59
+
60
+ # plot the data
61
+ plt.figure(figsize=(12, 8))
62
+ plt.plot(time, weekly_data,label='Weekly Seasonality')
63
+ plt.plot(time, monthly_data,label='Monthly Seasonality')
64
+ plt.plot(time, annual_data,label='Annual Seasonality')
65
+ plt.legend(loc='upper left')
66
+ plt.show()
67
+
68
+
69
+
70
+ cyclic time series data
71
+
72
+ import numpy as np
73
+ import matplotlib.pyplot as plt
74
+
75
+ # Generate sample data with cyclic patterns
76
+ np.random.seed(1)
77
+ time = np.array([0, 30, 60, 90, 120,
78
+ 150, 180, 210, 240,
79
+ 270, 300, 330])
80
+ data = 10 * np.sin(2 * np.pi * time / 50) + 20 * np.sin(2 * np.pi * time / 100)
81
+
82
+ # Plot the data
83
+ plt.figure(figsize=(12, 8))
84
+ plt.plot(time, data, label='Cyclic Data')
85
+ plt.legend(loc='upper left')
86
+ plt.xlabel('Time (days)')
87
+ plt.ylabel('Value')
88
+ plt.title('Cyclic Time Series Data')
89
+ plt.show()
90
+
91
+
92
+
93
+ original data and data with irregularity
94
+
95
+ import numpy as np
96
+ import matplotlib.pyplot as plt
97
+
98
+ # Generate sample time series data
99
+ np.random.seed(1)
100
+ time = np.arange(0, 100)
101
+ #data = 5 * np.sin(2 * np.pi * time / 20) + 2 * time
102
+ data=np.sin(2 * np.pi * time / 30)+time
103
+
104
+ # Introduce irregularities by adding random noise
105
+ irregularities = np.random.normal(0, 5, len(data))
106
+ irregular_data = data + irregularities
107
+
108
+ # Plot the original data and the data with irregularities
109
+ plt.figure(figsize=(12, 8))
110
+ plt.plot(time, data, label='Original Data')
111
+ plt.plot(time, irregular_data,label='Data with Irregularities')
112
+ plt.legend(loc='upper left')
113
+ plt.show()
114
+
115
+
@@ -0,0 +1,168 @@
1
+ Practical No 3:
2
+ Aim: Detrending, deseasonalizing timeseries, detecting Cyclic variations and decomposing Time Series.
3
+
4
+
5
+ Trend
6
+
7
+ import pandas as pd
8
+ %matplotlib inline
9
+ from statsmodels.tsa.filters.hp_filter import hpfilter
10
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
11
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
12
+ EXINUS_trend.plot(figsize=(15,6)).autoscale(axis='x',tight=True)
13
+
14
+
15
+
16
+ Detrending using Differencing
17
+
18
+ import pandas as pd
19
+ import matplotlib.pyplot as plt
20
+ import warnings
21
+ warnings.filterwarnings("ignore")
22
+ %matplotlib inline
23
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
24
+ diff = df.EXINUS.diff()
25
+ plt.figure(figsize=(15,6))
26
+ plt.plot(diff)
27
+ plt.title('Detrending using Differencing', fontsize=16)
28
+ plt.xlabel('Year')
29
+ plt.ylabel('EXINUS exchange rate')
30
+ plt.show()
31
+
32
+
33
+ Detrending using Scipy Signal*
34
+
35
+ import pandas as pd
36
+ import matplotlib.pyplot as plt
37
+ from scipy import signal
38
+ import warnings
39
+ warnings.filterwarnings("ignore")
40
+ %matplotlib inline
41
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
42
+ detrended = signal.detrend(df.EXINUS.values)
43
+ plt.figure(figsize=(15,6))
44
+ plt.plot(detrended)
45
+ plt.xlabel('EXINUS')
46
+ plt.ylabel('Frequency')
47
+ plt.title('Detrending using Scipy Signal', fontsize=16)
48
+ plt.show()
49
+
50
+
51
+
52
+ Detrending using HP Filter
53
+
54
+
55
+ import pandas as pd
56
+ import matplotlib.pyplot as plt
57
+ from statsmodels.tsa.filters.hp_filter import hpfilter
58
+ import warnings
59
+ warnings.filterwarnings("ignore")
60
+ %matplotlib inline
61
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
62
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
63
+ df['trend'] = EXINUS_trend
64
+ detrended = df.EXINUS - df['trend']
65
+ plt.figure(figsize=(15,6))
66
+ plt.plot(detrended)
67
+ plt.title('Detrending using HP Filter', fontsize=16)
68
+ plt.xlabel('Year')
69
+ plt.ylabel('EXINUS exchange rate')
70
+ plt.show()
71
+
72
+
73
+
74
+ Seasonality
75
+
76
+ A. Multi Month-wise Box Plot
77
+
78
+ import pandas as pd
79
+ import seaborn as sns
80
+ import matplotlib.pyplot as plt
81
+ from statsmodels.tsa.filters.hp_filter import hpfilter
82
+ import warnings
83
+ warnings.filterwarnings("ignore")
84
+ %matplotlib inline
85
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',parse_dates=True)
86
+ df['month'] = df['observation_date'].dt.strftime('%b')
87
+ df['year'] = [d.year for d in df.observation_date]
88
+ df['month'] = [d.strftime('%b') for d in df.observation_date]
89
+ years = df['year'].unique()
90
+ plt.figure(figsize=(15,6))
91
+ sns.boxplot(x='month', y='EXINUS', data=df).set_title("Multi Month-wise Box Plot")
92
+ plt.show()
93
+
94
+
95
+ B. Autocorrelation plot for seasonality
96
+
97
+ from pandas.plotting import autocorrelation_plot
98
+ import pandas as pd
99
+ import matplotlib.pyplot as plt
100
+ %matplotlib inline
101
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
102
+ #plt.rcParams.update({'figure.figsize':(15,6), 'figure.dpi':220})
103
+ autocorrelation_plot(df.EXINUS.tolist())
104
+
105
+
106
+ Deseasoning Time series
107
+
108
+
109
+ import pandas as pd
110
+ import matplotlib.pyplot as plt
111
+ from statsmodels.tsa.seasonal import seasonal_decompose
112
+ import warnings
113
+ warnings.filterwarnings("ignore")
114
+ %matplotlib inline
115
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
116
+ result_mul = seasonal_decompose(df['EXINUS'], model='multiplicative', extrapolate_trend='freq')
117
+ deseason = df['EXINUS'] - result_mul.seasonal
118
+ plt.figure(figsize=(15,6))
119
+ plt.plot(deseason)
120
+ plt.title('Deseasoning using seasonal_decompose', fontsize=16)
121
+ plt.xlabel('Year')
122
+ plt.ylabel('EXINUS exchange rate')
123
+ plt.show()
124
+
125
+
126
+ Detecting cyclical variation
127
+
128
+ from statsmodels.tsa.filters.hp_filter import hpfilter
129
+ import pandas as pd
130
+ import matplotlib.pyplot as plt
131
+ import warnings
132
+ warnings.filterwarnings("ignore")
133
+ %matplotlib inline
134
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
135
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
136
+ df['cycle'] =EXINUS_cycle
137
+ df['trend'] =EXINUS_trend
138
+ df[['cycle']].plot(figsize=(15,6)).autoscale(axis='x',tight=True)
139
+ plt.title('Extracting Cyclic Variations', fontsize=16)
140
+ plt.xlabel('Year')
141
+ plt.ylabel('EXINUS exchange rate')
142
+ plt.show()
143
+
144
+
145
+
146
+ Decompose Time series
147
+
148
+ from statsmodels.tsa.seasonal import seasonal_decompose
149
+ import pandas as pd
150
+ import matplotlib.pyplot as plt
151
+ import warnings
152
+ warnings.filterwarnings("ignore")
153
+ %matplotlib inline
154
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',
155
+ index_col=0,parse_dates=True)
156
+ result = seasonal_decompose(df['EXINUS'], model='add')
157
+ result.plot();
158
+ result = seasonal_decompose(df['EXINUS'], model='mul')
159
+ result.plot();
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
@@ -0,0 +1,233 @@
1
+ Practical no 4
2
+ Aim: Working with stationary and non stationary timeseries
3
+
4
+
5
+ Stationary Time Series
6
+
7
+ # load time series data
8
+ from pandas import read_csv
9
+ from matplotlib import pyplot
10
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
11
+ squeeze=True)
12
+ series.plot()
13
+ pyplot.show()
14
+
15
+
16
+
17
+ *********
18
+
19
+
20
+
21
+ Non-Stationary Time Series
22
+
23
+
24
+ # load time series data
25
+ from pandas import read_csv
26
+ from matplotlib import pyplot
27
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
28
+ squeeze=True)
29
+ series.plot()
30
+ pyplot.show()
31
+
32
+
33
+
34
+ **********
35
+
36
+
37
+
38
+ Summary Statistics: You can review the summary statistics for your data for seasons or random partitions and check for obvious or significant differences
39
+
40
+ # plot a histogram of a time series
41
+ from pandas import read_csv
42
+ from matplotlib import pyplot
43
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
44
+ squeeze=True)
45
+ series.hist()
46
+ pyplot.show()
47
+
48
+
49
+
50
+ ***********
51
+
52
+
53
+ we can split the time series into two contiguous sequences. We can then calculate the mean and variance of each group of numbers and compare the values.
54
+
55
+ PART1ST
56
+
57
+ # calculate statistics of partitioned time series data
58
+ from pandas import read_csv
59
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
60
+
61
+ X = series.values
62
+ split = int(len(X) / 2)
63
+ X1, X2 = X[0:split], X[split:]
64
+ mean1, mean2 = X1.mean(), X2.mean()
65
+ var1, var2 = X1.var(), X2.var()
66
+ print('mean1=%f, mean2=%f' % (mean1, mean2))
67
+ print('variance1=%f, variance2=%f' % (var1, var2))
68
+
69
+
70
+ PART 2ND
71
+
72
+ # calculate statistics of partitioned time series data
73
+ from pandas import read_csv
74
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
75
+ squeeze=True)
76
+ X = series.values
77
+ split = int(len(X) / 2)
78
+ X1, X2 = X[0:split], X[split:]
79
+ mean1, mean2 = X1.mean(), X2.mean()
80
+ var1, var2 = X1.var(), X2.var()
81
+ print('mean1=%f, mean2=%f' % (mean1, mean2))
82
+ print('variance1=%f, variance2=%f' % (var1, var2))
83
+
84
+
85
+
86
+ ***********
87
+
88
+
89
+ C] Statistical Tests: You can use statistical tests to check if the expectations of stationarity are met or have been violated
90
+
91
+ # calculate stationarity test of time series data
92
+ from pandas import read_csv
93
+ from statsmodels.tsa.stattools import adfuller
94
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
95
+ squeeze=True)
96
+ X = series.values
97
+ result = adfuller(X)
98
+ print('ADF Statistic: %f' % result[0])
99
+ print('p-value: %f' % result[1])
100
+ print('Critical Values:')
101
+ for key, value in result[4].items():
102
+ print('\t%s: %.3f' % (key, value))
103
+
104
+
105
+ **************
106
+
107
+
108
+ #Importing the libraries:
109
+
110
+ from statsmodels.tsa.stattools import adfuller
111
+ import pandas as pd
112
+ import numpy as np
113
+
114
+ #Reading the airline-passengers data
115
+
116
+ data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
117
+
118
+ #Checking for some values of the data.
119
+
120
+ data.head()
121
+
122
+
123
+ ************
124
+
125
+
126
+ #Plotting the data.
127
+
128
+ data.plot(figsize=(14,8), title='data series')
129
+
130
+ #Taking out the passengers number as a series.
131
+
132
+ series = data['#Passengers'].values
133
+ #print(series)
134
+
135
+
136
+ ***********
137
+
138
+
139
+
140
+ #Performing the ADF test on the series:
141
+
142
+ # ADF Test
143
+ result = adfuller(series, autolag='AIC')
144
+ #Extracting the values from the results:
145
+
146
+ print('ADF Statistic: %f' % result[0])
147
+
148
+ print('p-value: %f' % result[1])
149
+
150
+ print('Critical Values:')
151
+
152
+ for key, value in result[4].items():
153
+ print('\t%s: %.3f' % (key, value))
154
+ if result[0] < result[4]["5%"]:
155
+ print ("Reject Ho - Time Series is Stationary")
156
+ else:
157
+ print ("Failed to Reject Ho - Time Series is Non-Stationary")
158
+
159
+
160
+
161
+
162
+ The test statistic is positive, meaning we are much less likely to reject the null hypothesis (it looks non-stationary). Comparing the test statistic to the critical values, it looks like we would have to fail to reject the null hypothesis that the time series is non-stationary and does have time-dependent structure.
163
+
164
+
165
+ #Kwiatkowski Phillips Schmidt Shin (KPSS) test:
166
+
167
+ #Importing the libraries:
168
+
169
+ from statsmodels.tsa.stattools import kpss
170
+ import pandas as pd
171
+ import numpy as np
172
+ import warnings
173
+ warnings.filterwarnings("ignore")
174
+
175
+ result_kpss_ct=kpss(series,regression="ct")
176
+ print('Test Statistic: %f' %result_kpss_ct[0])
177
+ print('p-value: %f' %result_kpss_ct[1])
178
+ print('Critical values:')
179
+ for key, value in result_kpss_ct[3].items():
180
+ print('\t%s: %.3f' %(key, value))
181
+
182
+
183
+
184
+ **********
185
+
186
+ #Loading the data.
187
+
188
+ path = '/content/daily-min-temperatures.csv'
189
+ data = pd.read_csv(path, index_col='Date')
190
+
191
+ #Checking for some head values of the data:
192
+
193
+ data.head()
194
+
195
+
196
+
197
+ **********
198
+
199
+ #Plotting the data.
200
+
201
+ data.plot(figsize=(14,8), title='temperature data series')
202
+
203
+
204
+ **********
205
+
206
+
207
+ #Extracting temperature in a series.
208
+
209
+ series = data['Temp'].values
210
+ series
211
+
212
+ ***********
213
+
214
+
215
+ #Performing ADF test.
216
+
217
+ result = adfuller(series, autolag='AIC')
218
+
219
+ #Checking the results:
220
+
221
+ print('ADF Statistic: %f' % result[0])
222
+
223
+ print('p-value: %f' % result[1])
224
+
225
+ print('Critical Values:')
226
+
227
+ for key, value in result[4].items():
228
+ print('\t%s: %.3f' % (key, value))
229
+ if result[0] > result[4]["5%"]:
230
+ print ("Reject Ho - Time Series is Stationary")
231
+ else:
232
+ print ("Failed to Reject Ho - Time Series is Stationary")
233
+
@@ -0,0 +1,137 @@
1
+ PRCTICAL 4 B
2
+
3
+
4
+
5
+ def test_stationarity(dataFrame, var):
6
+ dataFrame['rollMean']=dataFrame[var].rolling(window=12).mean()
7
+ dataFrame['rollStd']=dataFrame[var].rolling(window=12).std()
8
+
9
+ from statsmodels.tsa.stattools import adfuller
10
+ import seaborn as sns
11
+ adfTest = adfuller(dataFrame[var], autolag='AIC')
12
+ stats=pd.Series(adfTest[0:4],index=['Test Statistic','p-value', '#lags used', 'number of observations used'])
13
+ print(stats)
14
+
15
+ for key, value in adfTest[4].items():
16
+ print('\t%s: %.3f' % (key, value))
17
+
18
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y=var)
19
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollMean')
20
+ sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollStd')
21
+
22
+
23
+ ********
24
+
25
+
26
+
27
+ import pandas as pd
28
+ import numpy as np
29
+
30
+ #Reading the airline-passengers data
31
+
32
+ data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
33
+
34
+ #Checking for some values of the data.
35
+
36
+ data.head()
37
+
38
+
39
+
40
+ ************
41
+
42
+
43
+
44
+ air_df=data[['Passengers']]
45
+ air_df.head()
46
+
47
+
48
+
49
+
50
+ **********
51
+
52
+ air_df['shift']=air_df.Passengers.shift()
53
+ air_df['shiftDiff']=air_df.Passengers - air_df['shift']
54
+ air_df.head()
55
+
56
+
57
+ **********
58
+
59
+ test_stationarity(air_df.dropna(),'shiftDiff')
60
+
61
+
62
+ ***********
63
+
64
+ log_df=air_df[['Passengers']]
65
+ log_df['log']=np.log(log_df['Passengers'])
66
+ log_df.head()
67
+
68
+
69
+ ************
70
+
71
+ test_stationarity(log_df,'log')
72
+
73
+
74
+
75
+ sqrt_df=air_df[['Passengers']]
76
+ sqrt_df['sqrt']=np.sqrt(air_df['Passengers'])
77
+ sqrt_df.head()
78
+
79
+
80
+ ********
81
+
82
+ test_stationarity(sqrt_df,'sqrt')
83
+
84
+
85
+ ***********
86
+
87
+ cbrt_df=air_df[['Passengers']]
88
+ cbrt_df['cbrt']=np.cbrt(air_df['Passengers'])
89
+ cbrt_df.head()
90
+
91
+ ***********
92
+
93
+ test_stationarity(cbrt_df,'cbrt')
94
+
95
+
96
+
97
+
98
+ ************
99
+
100
+
101
+ log_df2=log_df[['Passengers','log']]
102
+ log_df2['log_sqrt']=np.sqrt(log_df['log'])
103
+ log_df2.head()
104
+
105
+ **********
106
+
107
+
108
+ test_stationarity(log_df2,'log_sqrt')
109
+
110
+
111
+
112
+ ********
113
+
114
+
115
+ log_df2=log_df[['Passengers','log']]
116
+ log_df2['log_sqrt']=np.sqrt(log_df['log'])
117
+ log_df2['logShiftDiff']=log_df2['log_sqrt']-log_df2['log_sqrt'].shift()
118
+ log_df2.head()
119
+
120
+
121
+
122
+ *********
123
+
124
+
125
+ test_stationarity(log_df2.dropna(),'logShiftDiff')
126
+
127
+ *************88
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
@@ -0,0 +1,52 @@
1
+ Aim: Implementing auto correlation and partial auto-correlation on timeseries
2
+
3
+
4
+ # ACF plot of time series
5
+ from pandas import read_csv
6
+ from matplotlib import pyplot
7
+ #from statsmodels.graphics.tsaplots import plot_acf
8
+ from pandas.plotting import autocorrelation_plot
9
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
10
+ #plot_acf(series)
11
+ autocorrelation_plot(series)
12
+ pyplot.show()
13
+
14
+
15
+
16
+
17
+ *********
18
+
19
+
20
+ # zoomed-in ACF plot of time series
21
+ from pandas import read_csv
22
+ from matplotlib import pyplot
23
+ from statsmodels.graphics.tsaplots import plot_acf
24
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
25
+ plot_acf(series, lags=50)
26
+ pyplot.show()
27
+
28
+
29
+
30
+ **************
31
+
32
+
33
+
34
+ # PACF plot of time series
35
+ from pandas import read_csv
36
+ from matplotlib import pyplot
37
+ from statsmodels.graphics.tsaplots import plot_pacf
38
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,
39
+ parse_dates=True, squeeze=True)
40
+ plot_pacf(series, lags=50)
41
+ pyplot.show()
42
+
43
+
44
+
45
+ ***************8
46
+
47
+
48
+
49
+
50
+
51
+
52
+
@@ -0,0 +1,29 @@
1
+ Aim: Perform autoregression on time series data
2
+
3
+
4
+ # create and evaluate a static autoregressive model
5
+ from pandas import read_csv
6
+ from matplotlib import pyplot
7
+ from statsmodels.tsa.ar_model import AutoReg
8
+ from sklearn.metrics import mean_squared_error
9
+ from math import sqrt
10
+ # load dataset
11
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
12
+ # split dataset
13
+ X = series.values
14
+ train, test = X[1:len(X)-7], X[len(X)-7:]
15
+ # train autoregression
16
+ model = AutoReg(train,30)
17
+ model_fit = model.fit()
18
+ print('Lag: %s' % model_fit.ar_lags)
19
+ print('Coefficients: %s' % model_fit.params)
20
+ # make predictions
21
+ predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
22
+ for i in range(len(predictions)):
23
+ print('predicted=%f, expected=%f' % (predictions[i], test[i]))
24
+ rmse = sqrt(mean_squared_error(test, predictions))
25
+ print('Test RMSE: %.3f' % rmse)
26
+ # plot results
27
+ pyplot.plot(test)
28
+ pyplot.plot(predictions, color='red')
29
+ pyplot.show()
@@ -0,0 +1,67 @@
1
+ Aim: Forecasting using MA model.
2
+
3
+
4
+ # correct forecasts with a model of forecast residual errors
5
+ from pandas import read_csv
6
+ from pandas import DataFrame
7
+ from pandas import concat
8
+ from statsmodels.tsa.ar_model import AutoReg
9
+ from matplotlib import pyplot
10
+ from sklearn.metrics import mean_squared_error
11
+ from math import sqrt
12
+ # load data
13
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births-CA.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
14
+ # create lagged dataset
15
+ values = DataFrame(series.values)
16
+ dataframe = concat([values.shift(1), values], axis=1)
17
+ dataframe.columns = ['t', 't+1']
18
+ print(dataframe)
19
+ X = dataframe.values
20
+
21
+
22
+ *******
23
+
24
+
25
+ # split into train and test sets
26
+ X = dataframe.values
27
+ train_size = int(len(X) * 0.66)
28
+ train, test = X[1:train_size], X[train_size:]
29
+ train_X, train_y = train[:,0], train[:,1]
30
+ test_X, test_y = test[:,0], test[:,1]
31
+ # persistence model on training set
32
+ train_pred = [x for x in train_X]
33
+ # calculate residuals
34
+ train_resid = [train_y[i]-train_pred[i] for i in range(len(train_pred))]
35
+ # model the training set residuals
36
+ model = AutoReg(train_resid,20)
37
+ model_fit = model.fit()
38
+ window = len(model_fit.ar_lags)
39
+ coef = model_fit.params
40
+ # walk forward over time steps in test
41
+ history = train_resid[len(train_resid)-window:]
42
+ history = [history[i] for i in range(len(history))]
43
+ predictions = list()
44
+ for t in range(len(test_y)):
45
+ # persistence
46
+ yhat = test_X[t]
47
+ error = test_y[t] - yhat
48
+ # predict error
49
+ length = len(history)
50
+ lag = [history[i] for i in range(length-window,length)]
51
+ pred_error = coef[0]
52
+ for d in range(window):
53
+ pred_error += coef[d+1] * lag[window-d-1]
54
+ # correct the prediction
55
+ yhat = yhat + pred_error
56
+ predictions.append(yhat)
57
+ history.append(error)
58
+ print('predicted=%f, expected=%f' % (yhat, test_y[t]))
59
+ # error
60
+ rmse = sqrt(mean_squared_error(test_y, predictions))
61
+ print('Test RMSE: %.3f' % rmse)
62
+ # plot predicted error
63
+ pyplot.plot(test_y)
64
+ pyplot.plot(predictions, color='red')
65
+ pyplot.show()
66
+
67
+
@@ -0,0 +1,108 @@
1
+ Aim: Forecasting using ARIMA model --TEMPERATURE
2
+ Time Series Forecasting With ARIMA Model in Python for Temperature Prediction.
3
+
4
+ 1) Reading Time Series Data in Python using Pandas library
5
+
6
+ import pandas as pd
7
+ df=pd.read_csv('/content/drive/MyDrive/MScDS TSA/MaunaLoaDailyTemps.csv',index_col='DATE',parse_dates=True)
8
+ df=df.dropna()
9
+ print('Shape of data',df.shape)
10
+ df.head()
11
+ df
12
+
13
+ **********
14
+
15
+ df['AvgTemp'].plot(figsize=(12,5))
16
+
17
+
18
+ *******
19
+
20
+ 2) Checking for stationarity of time series model
21
+
22
+
23
+ from statsmodels.tsa.stattools import adfuller
24
+ def adf_test(dataset):
25
+ dftest = adfuller(dataset, autolag = 'AIC')
26
+ print("1. ADF : ",dftest[0])
27
+ print("2. P-Value : ", dftest[1])
28
+ print("3. Num Of Lags : ", dftest[2])
29
+ print("4. Num Of Observations Used For ADF Regression:", dftest[3])
30
+ print("5. Critical Values :")
31
+ for key, val in dftest[4].items():
32
+ print("\t",key, ": ", val)
33
+ adf_test(df['AvgTemp'])
34
+
35
+
36
+ *************
37
+
38
+
39
+ Auto Arima Function to select order of Auto Regression Model
40
+
41
+ pip install pmdarima
42
+
43
+
44
+
45
+ from pmdarima import auto_arima
46
+ import warnings
47
+ warnings.filterwarnings("ignore")
48
+ stepwise_fit=auto_arima(df['AvgTemp'],trace=True,suppress_warnings=True)
49
+ stepwise_fit.summary()
50
+
51
+
52
+
53
+ ************8
54
+
55
+
56
+ Split Your Dataset
57
+
58
+ print(df.shape)
59
+ train=df.iloc[:-30]
60
+ test=df.iloc[-30:]
61
+ print(train.shape,test.shape)
62
+
63
+
64
+
65
+
66
+
67
+ from statsmodels.tsa.arima.model import ARIMA
68
+ model=ARIMA(train['AvgTemp'],order=(1,0,5))
69
+ model=model.fit()
70
+ model.summary()
71
+
72
+
73
+
74
+
75
+ **************
76
+
77
+
78
+
79
+ Check How Good Your Model Is
80
+
81
+
82
+ start=len(train)
83
+ end=len(train)+len(test)-1
84
+ pred=model.predict(start=start,end=end,typ='levels').rename('ARIMA Predictions')
85
+ print(pred)
86
+ pred.index=df.index[start:end+1]
87
+ pred.plot(legend=True)
88
+ test['AvgTemp'].plot(legend=True)
89
+
90
+
91
+
92
+
93
+
94
+ ***********8
95
+
96
+
97
+ Check your Accuracy Metric
98
+
99
+
100
+ from sklearn.metrics import mean_squared_error
101
+ from math import sqrt
102
+ test['AvgTemp'].mean()
103
+ rmse=sqrt(mean_squared_error(pred,test['AvgTemp']))
104
+ print(rmse)
105
+
106
+
107
+
108
+
File without changes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: myawesomepkg
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: A simple greeting library
5
5
  Author: Your Name
6
6
  Requires-Python: >=3.6
@@ -0,0 +1,17 @@
1
+ myawesomepkg/__init__.py,sha256=gNi6noitr9U8Cfc2UcldtL4tyZk6QHS6MU8OKJOElCA,29
2
+ myawesomepkg/core.py,sha256=BrAMNx-AdBpoqCAJ_In7Z5ZJC3AZaseEg79JUzs16gs,52
3
+ myawesomepkg/d.py,sha256=9MYJrjyoIJxsjdkXwUNzEbTHIVTyYd8M9OsBJ4bLRXE,729
4
+ myawesomepkg/TSAPY/Practical No 1.py,sha256=gqBPwTi8BuG3D1CnFAzjPeyey5iEhDryoYWq1Wxc218,3140
5
+ myawesomepkg/TSAPY/Practical No 2.py,sha256=MF4a-5P_YX86uRPQPYhq3XxbBDJihFkuljAV2wN4qPc,2897
6
+ myawesomepkg/TSAPY/Practical No 3.py,sha256=x9mKHk0r9F_08geny0DsWU8VZqLDr0RjhxqAaAEaJuM,4994
7
+ myawesomepkg/TSAPY/Practical No 4 A.py,sha256=Mhdni1p1TPNSrK4SebO4vomVpJogmydFIsxKaMNAxwE,5575
8
+ myawesomepkg/TSAPY/Practical No 4 B.py,sha256=Nm9IDkRsyZkHzTNgkbaQjGX36kQyMqF6KPSxlIA7bho,2211
9
+ myawesomepkg/TSAPY/Practical No 5.py,sha256=UKIMzwpI2AAgQ7AdsGCMk1yjUSHna9fAx-rR-kI6N8k,1211
10
+ myawesomepkg/TSAPY/Practical No 6.py,sha256=SR3Z_D83Mj6xZu1_6aMWrLDBebcvLaJl4vWXHw2lTx0,1061
11
+ myawesomepkg/TSAPY/Practical No 7.py,sha256=oOokK-GegBum3v884JtbgBjqZJRKCngOuo2u7qopz1Q,2060
12
+ myawesomepkg/TSAPY/Practical No 8.py,sha256=Qmm--XEXDrsOi8z7NyfwU-2ubjXkYvxf_L--Z7CMjIA,2070
13
+ myawesomepkg/TSAPY/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ myawesomepkg-0.1.3.dist-info/METADATA,sha256=MU2ar_3CJAhFsEYxjPnvyg_0O1nqdIoB1FFT3BSbmsE,140
15
+ myawesomepkg-0.1.3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
16
+ myawesomepkg-0.1.3.dist-info/top_level.txt,sha256=Pngzshta5k3nST58NluFg5L7yoZth2MPR0huoroI7ao,13
17
+ myawesomepkg-0.1.3.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- myawesomepkg/__init__.py,sha256=gNi6noitr9U8Cfc2UcldtL4tyZk6QHS6MU8OKJOElCA,29
2
- myawesomepkg/core.py,sha256=BrAMNx-AdBpoqCAJ_In7Z5ZJC3AZaseEg79JUzs16gs,52
3
- myawesomepkg/d.py,sha256=9MYJrjyoIJxsjdkXwUNzEbTHIVTyYd8M9OsBJ4bLRXE,729
4
- myawesomepkg-0.1.2.dist-info/METADATA,sha256=DB4TiUNmfQyWXgRIUD9uKr0b2wAvYfZvyB8bzDNnmzY,140
5
- myawesomepkg-0.1.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
6
- myawesomepkg-0.1.2.dist-info/top_level.txt,sha256=Pngzshta5k3nST58NluFg5L7yoZth2MPR0huoroI7ao,13
7
- myawesomepkg-0.1.2.dist-info/RECORD,,