myawesomepkg 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ Practical No 1: Aim: Handling timeseries data
2
+
3
+ A. Load and Explore Time Series Data
4
+
5
+ from pandas import read_csv
6
+ series = read_csv('/content/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True)
7
+ print(type(series))
8
+ print(series.head())
9
+
10
+ You can use the head() function to peek at the first 5 records
11
+
12
+ print(series.head(10))
13
+
14
+
15
+ Number of Observations
16
+
17
+ print(series.size)
18
+
19
+
20
+ Querying By Time
21
+
22
+ print(series.loc["1959-01"])
23
+
24
+
25
+ The describe() function creates a 7 number summary of the loaded time series including mean, standard deviation, median, minimum, and maximum of the observations
26
+
27
+ print(series.describe())
28
+
29
+
30
+
31
+ """B. Data Visualization"""
32
+
33
+
34
+ Minimum Daily Temperatures Dataset
35
+
36
+ from pandas import read_csv
37
+ from matplotlib import pyplot
38
+ series = read_csv('daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True)
39
+ print(series.head())
40
+ series=series.squeeze()
41
+ type(series)
42
+ print(series.describe())
43
+
44
+
45
+ Line Plot
46
+
47
+ series.plot()
48
+ pyplot.show()
49
+
50
+ &&&
51
+
52
+ series.plot(style='k.')
53
+ pyplot.show()
54
+
55
+ &&&
56
+
57
+ series.plot(style='k--')
58
+ pyplot.show()
59
+
60
+
61
+ A Grouper allows the user to specify a groupby instruction for an object.
62
+ The squeeze() method converts a single column DataFrame into a Series.
63
+
64
+
65
+ from pandas import read_csv
66
+ from pandas import DataFrame
67
+ from pandas import Grouper
68
+ from matplotlib import pyplot
69
+ series = read_csv('/content/daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
70
+ #print(series.head())
71
+
72
+ series=series.squeeze()
73
+ #print(series.head())
74
+ groups = series.groupby(Grouper(freq='A'))
75
+ #print(groups)
76
+ years = DataFrame()
77
+ #print(years)
78
+ for name, group in groups:
79
+ years[name.year] = group.values
80
+ print(years)
81
+ years.plot(subplots=True, legend=False)
82
+ pyplot.show()
83
+
84
+
85
+ Histogram and Density Plots
86
+
87
+ series.hist()
88
+ pyplot.show()
89
+
90
+
91
+ Generate Kernel Density Estimate plot using Gaussian kernels.
92
+
93
+ series.plot(kind='kde')
94
+ pyplot.show()
95
+
96
+
97
+ years.boxplot()
98
+ pyplot.show()
99
+
100
+
101
+ Box and Whisker Plots by Interval
102
+
103
+ from pandas import read_csv
104
+ from pandas import DataFrame
105
+ from pandas import Grouper
106
+ from matplotlib import pyplot
107
+ series = read_csv('daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
108
+ series=series.squeeze()
109
+ groups = series.groupby(Grouper(freq='A'))
110
+ years = DataFrame()
111
+ for name, group in groups:
112
+ years[name.year] = group.values
113
+ years.boxplot()
114
+ pyplot.show()
115
+
116
+
117
+ Heat Maps
118
+ from pandas import read_csv
119
+ from pandas import DataFrame
120
+ from pandas import Grouper
121
+ from matplotlib import pyplot
122
+ series = read_csv('daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
123
+ series=series.squeeze()
124
+ groups = series.groupby(Grouper(freq='A'))
125
+ years = DataFrame()
126
+ for name, group in groups:
127
+ years[name.year] = group.values
128
+ years = years.T
129
+ print(years)
130
+ pyplot.matshow(years, interpolation=None, aspect='auto')
131
+ pyplot.show()
132
+
133
+
134
+ Lag Scatter Plots
135
+
136
+ from pandas.plotting import lag_plot
137
+ lag_plot(series)
138
+ pyplot.show()
139
+
140
+
141
+ Autocorrelation Plots
142
+
143
+ from pandas.plotting import autocorrelation_plot
144
+ autocorrelation_plot(series)
145
+ pyplot.show()
146
+
147
+
148
+
@@ -0,0 +1,115 @@
1
+ Practical No 2 Aim: Implementing timeseries components
2
+ Seasonality
3
+ Trend
4
+ Pattern
5
+ Cyclic
6
+
7
+
8
+ Draw random samples from a normal (Gaussian) distribution.
9
+ upword downword horizontal and non-lenear trend
10
+
11
+ import numpy as np
12
+ import matplotlib.pyplot as plt
13
+
14
+ # Upward Trend
15
+ t = np.arange(0, 10, 0.1)
16
+ data = t + np.random.normal(0, 0.5, len(t))
17
+ plt.plot(t, data, label='Upward Trend')
18
+
19
+ # Downward Trend
20
+ t = np.arange(0, 10, 0.1)
21
+ data = -t + np.random.normal(0, 0.5, len(t))
22
+ plt.plot(t, data, label='Downward Trend')
23
+
24
+ # Horizontal Trend
25
+ t = np.arange(0, 10, 0.1)
26
+ data = np.zeros(len(t)) + np.random.normal(0, 0.5, len(t))
27
+ plt.plot(t, data, label='Horizontal Trend')
28
+
29
+ # Non-linear Trend
30
+ t = np.arange(0, 10, 0.1)
31
+ data = t**2 + np.random.normal(0, 0.5, len(t))
32
+ plt.plot(t, data, label='Non-linear Trend')
33
+
34
+ plt.legend()
35
+ plt.show()
36
+
37
+
38
+
39
+ weekly monthly yearly seasonality
40
+
41
+ import numpy as np
42
+ import matplotlib.pyplot as plt
43
+
44
+ # generate sample data with different types of seasonality
45
+ np.random.seed(1)
46
+ time = np.arange(0, 366)
47
+
48
+ # weekly seasonality
49
+ weekly_seasonality = np.sin(2 * np.pi * time / 7)
50
+ weekly_data = 5 + weekly_seasonality
51
+
52
+ # monthly seasonality
53
+ monthly_seasonality = np.sin(2 * np.pi * time / 30)
54
+ monthly_data = 5 + monthly_seasonality
55
+
56
+ # annual seasonality
57
+ annual_seasonality = np.sin(2 * np.pi * time / 365)
58
+ annual_data = 5 + annual_seasonality
59
+
60
+ # plot the data
61
+ plt.figure(figsize=(12, 8))
62
+ plt.plot(time, weekly_data,label='Weekly Seasonality')
63
+ plt.plot(time, monthly_data,label='Monthly Seasonality')
64
+ plt.plot(time, annual_data,label='Annual Seasonality')
65
+ plt.legend(loc='upper left')
66
+ plt.show()
67
+
68
+
69
+
70
+ cyclic time series data
71
+
72
+ import numpy as np
73
+ import matplotlib.pyplot as plt
74
+
75
+ # Generate sample data with cyclic patterns
76
+ np.random.seed(1)
77
+ time = np.array([0, 30, 60, 90, 120,
78
+ 150, 180, 210, 240,
79
+ 270, 300, 330])
80
+ data = 10 * np.sin(2 * np.pi * time / 50) + 20 * np.sin(2 * np.pi * time / 100)
81
+
82
+ # Plot the data
83
+ plt.figure(figsize=(12, 8))
84
+ plt.plot(time, data, label='Cyclic Data')
85
+ plt.legend(loc='upper left')
86
+ plt.xlabel('Time (days)')
87
+ plt.ylabel('Value')
88
+ plt.title('Cyclic Time Series Data')
89
+ plt.show()
90
+
91
+
92
+
93
+ original data and data with irregularity
94
+
95
+ import numpy as np
96
+ import matplotlib.pyplot as plt
97
+
98
+ # Generate sample time series data
99
+ np.random.seed(1)
100
+ time = np.arange(0, 100)
101
+ #data = 5 * np.sin(2 * np.pi * time / 20) + 2 * time
102
+ data=np.sin(2 * np.pi * time / 30)+time
103
+
104
+ # Introduce irregularities by adding random noise
105
+ irregularities = np.random.normal(0, 5, len(data))
106
+ irregular_data = data + irregularities
107
+
108
+ # Plot the original data and the data with irregularities
109
+ plt.figure(figsize=(12, 8))
110
+ plt.plot(time, data, label='Original Data')
111
+ plt.plot(time, irregular_data,label='Data with Irregularities')
112
+ plt.legend(loc='upper left')
113
+ plt.show()
114
+
115
+
@@ -0,0 +1,168 @@
1
+ Practical No 3:
2
+ Aim: Detrending, deseasonalizing timeseries, detecting Cyclic variations and decomposing Time Series.
3
+
4
+
5
+ Trend
6
+
7
+ import pandas as pd
8
+ %matplotlib inline
9
+ from statsmodels.tsa.filters.hp_filter import hpfilter
10
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
11
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
12
+ EXINUS_trend.plot(figsize=(15,6)).autoscale(axis='x',tight=True)
13
+
14
+
15
+
16
+ Detrending using Differencing
17
+
18
+ import pandas as pd
19
+ import matplotlib.pyplot as plt
20
+ import warnings
21
+ warnings.filterwarnings("ignore")
22
+ %matplotlib inline
23
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
24
+ diff = df.EXINUS.diff()
25
+ plt.figure(figsize=(15,6))
26
+ plt.plot(diff)
27
+ plt.title('Detrending using Differencing', fontsize=16)
28
+ plt.xlabel('Year')
29
+ plt.ylabel('EXINUS exchange rate')
30
+ plt.show()
31
+
32
+
33
+ Detrending using Scipy Signal*
34
+
35
+ import pandas as pd
36
+ import matplotlib.pyplot as plt
37
+ from scipy import signal
38
+ import warnings
39
+ warnings.filterwarnings("ignore")
40
+ %matplotlib inline
41
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
42
+ detrended = signal.detrend(df.EXINUS.values)
43
+ plt.figure(figsize=(15,6))
44
+ plt.plot(detrended)
45
+ plt.xlabel('EXINUS')
46
+ plt.ylabel('Frequency')
47
+ plt.title('Detrending using Scipy Signal', fontsize=16)
48
+ plt.show()
49
+
50
+
51
+
52
+ Detrending using HP Filter
53
+
54
+
55
+ import pandas as pd
56
+ import matplotlib.pyplot as plt
57
+ from statsmodels.tsa.filters.hp_filter import hpfilter
58
+ import warnings
59
+ warnings.filterwarnings("ignore")
60
+ %matplotlib inline
61
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
62
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
63
+ df['trend'] = EXINUS_trend
64
+ detrended = df.EXINUS - df['trend']
65
+ plt.figure(figsize=(15,6))
66
+ plt.plot(detrended)
67
+ plt.title('Detrending using HP Filter', fontsize=16)
68
+ plt.xlabel('Year')
69
+ plt.ylabel('EXINUS exchange rate')
70
+ plt.show()
71
+
72
+
73
+
74
+ Seasonality
75
+
76
+ A. Multi Month-wise Box Plot
77
+
78
+ import pandas as pd
79
+ import seaborn as sns
80
+ import matplotlib.pyplot as plt
81
+ from statsmodels.tsa.filters.hp_filter import hpfilter
82
+ import warnings
83
+ warnings.filterwarnings("ignore")
84
+ %matplotlib inline
85
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',parse_dates=True)
86
+ df['month'] = df['observation_date'].dt.strftime('%b')
87
+ df['year'] = [d.year for d in df.observation_date]
88
+ df['month'] = [d.strftime('%b') for d in df.observation_date]
89
+ years = df['year'].unique()
90
+ plt.figure(figsize=(15,6))
91
+ sns.boxplot(x='month', y='EXINUS', data=df).set_title("Multi Month-wise Box Plot")
92
+ plt.show()
93
+
94
+
95
+ B. Autocorrelation plot for seasonality
96
+
97
+ from pandas.plotting import autocorrelation_plot
98
+ import pandas as pd
99
+ import matplotlib.pyplot as plt
100
+ %matplotlib inline
101
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
102
+ #plt.rcParams.update({'figure.figsize':(15,6), 'figure.dpi':220})
103
+ autocorrelation_plot(df.EXINUS.tolist())
104
+
105
+
106
+ Deseasoning Time series
107
+
108
+
109
+ import pandas as pd
110
+ import matplotlib.pyplot as plt
111
+ from statsmodels.tsa.seasonal import seasonal_decompose
112
+ import warnings
113
+ warnings.filterwarnings("ignore")
114
+ %matplotlib inline
115
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
116
+ result_mul = seasonal_decompose(df['EXINUS'], model='multiplicative', extrapolate_trend='freq')
117
+ deseason = df['EXINUS'] - result_mul.seasonal
118
+ plt.figure(figsize=(15,6))
119
+ plt.plot(deseason)
120
+ plt.title('Deseasoning using seasonal_decompose', fontsize=16)
121
+ plt.xlabel('Year')
122
+ plt.ylabel('EXINUS exchange rate')
123
+ plt.show()
124
+
125
+
126
+ Detecting cyclical variation
127
+
128
+ from statsmodels.tsa.filters.hp_filter import hpfilter
129
+ import pandas as pd
130
+ import matplotlib.pyplot as plt
131
+ import warnings
132
+ warnings.filterwarnings("ignore")
133
+ %matplotlib inline
134
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
135
+ EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
136
+ df['cycle'] =EXINUS_cycle
137
+ df['trend'] =EXINUS_trend
138
+ df[['cycle']].plot(figsize=(15,6)).autoscale(axis='x',tight=True)
139
+ plt.title('Extracting Cyclic Variations', fontsize=16)
140
+ plt.xlabel('Year')
141
+ plt.ylabel('EXINUS exchange rate')
142
+ plt.show()
143
+
144
+
145
+
146
+ Decompose Time series
147
+
148
+ from statsmodels.tsa.seasonal import seasonal_decompose
149
+ import pandas as pd
150
+ import matplotlib.pyplot as plt
151
+ import warnings
152
+ warnings.filterwarnings("ignore")
153
+ %matplotlib inline
154
+ df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',
155
+ index_col=0,parse_dates=True)
156
+ result = seasonal_decompose(df['EXINUS'], model='add')
157
+ result.plot();
158
+ result = seasonal_decompose(df['EXINUS'], model='mul')
159
+ result.plot();
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
@@ -0,0 +1,233 @@
1
+ Practical no 4
2
+ Aim: Working with stationary and non stationary timeseries
3
+
4
+
5
+ Stationary Time Series
6
+
7
+ # load time series data
8
+ from pandas import read_csv
9
+ from matplotlib import pyplot
10
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
11
+ squeeze=True)
12
+ series.plot()
13
+ pyplot.show()
14
+
15
+
16
+
17
+ *********
18
+
19
+
20
+
21
+ Non-Stationary Time Series
22
+
23
+
24
+ # load time series data
25
+ from pandas import read_csv
26
+ from matplotlib import pyplot
27
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
28
+ squeeze=True)
29
+ series.plot()
30
+ pyplot.show()
31
+
32
+
33
+
34
+ **********
35
+
36
+
37
+
38
+ Summary Statistics: You can review the summary statistics for your data for seasons or random partitions and check for obvious or significant differences
39
+
40
+ # plot a histogram of a time series
41
+ from pandas import read_csv
42
+ from matplotlib import pyplot
43
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
44
+ squeeze=True)
45
+ series.hist()
46
+ pyplot.show()
47
+
48
+
49
+
50
+ ***********
51
+
52
+
53
+ we can split the time series into two contiguous sequences. We can then calculate the mean and variance of each group of numbers and compare the values.
54
+
55
+ PART1ST
56
+
57
+ # calculate statistics of partitioned time series data
58
+ from pandas import read_csv
59
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
60
+
61
+ X = series.values
62
+ split = int(len(X) / 2)
63
+ X1, X2 = X[0:split], X[split:]
64
+ mean1, mean2 = X1.mean(), X2.mean()
65
+ var1, var2 = X1.var(), X2.var()
66
+ print('mean1=%f, mean2=%f' % (mean1, mean2))
67
+ print('variance1=%f, variance2=%f' % (var1, var2))
68
+
69
+
70
+ PART 2ND
71
+
72
+ # calculate statistics of partitioned time series data
73
+ from pandas import read_csv
74
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
75
+ squeeze=True)
76
+ X = series.values
77
+ split = int(len(X) / 2)
78
+ X1, X2 = X[0:split], X[split:]
79
+ mean1, mean2 = X1.mean(), X2.mean()
80
+ var1, var2 = X1.var(), X2.var()
81
+ print('mean1=%f, mean2=%f' % (mean1, mean2))
82
+ print('variance1=%f, variance2=%f' % (var1, var2))
83
+
84
+
85
+
86
+ ***********
87
+
88
+
89
+ C] Statistical Tests: You can use statistical tests to check if the expectations of stationarity are met or have been violated
90
+
91
+ # calculate stationarity test of time series data
92
+ from pandas import read_csv
93
+ from statsmodels.tsa.stattools import adfuller
94
+ series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
95
+ squeeze=True)
96
+ X = series.values
97
+ result = adfuller(X)
98
+ print('ADF Statistic: %f' % result[0])
99
+ print('p-value: %f' % result[1])
100
+ print('Critical Values:')
101
+ for key, value in result[4].items():
102
+ print('\t%s: %.3f' % (key, value))
103
+
104
+
105
+ **************
106
+
107
+
108
+ #Importing the libraries:
109
+
110
+ from statsmodels.tsa.stattools import adfuller
111
+ import pandas as pd
112
+ import numpy as np
113
+
114
+ #Reading the airline-passengers data
115
+
116
+ data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
117
+
118
+ #Checking for some values of the data.
119
+
120
+ data.head()
121
+
122
+
123
+ ************
124
+
125
+
126
+ #Plotting the data.
127
+
128
+ data.plot(figsize=(14,8), title='data series')
129
+
130
+ #Taking out the passengers number as a series.
131
+
132
+ series = data['#Passengers'].values
133
+ #print(series)
134
+
135
+
136
+ ***********
137
+
138
+
139
+
140
+ #Performing the ADF test on the series:
141
+
142
+ # ADF Test
143
+ result = adfuller(series, autolag='AIC')
144
+ #Extracting the values from the results:
145
+
146
+ print('ADF Statistic: %f' % result[0])
147
+
148
+ print('p-value: %f' % result[1])
149
+
150
+ print('Critical Values:')
151
+
152
+ for key, value in result[4].items():
153
+ print('\t%s: %.3f' % (key, value))
154
+ if result[0] < result[4]["5%"]:
155
+ print ("Reject Ho - Time Series is Stationary")
156
+ else:
157
+ print ("Failed to Reject Ho - Time Series is Non-Stationary")
158
+
159
+
160
+
161
+
162
+ The test statistic is positive, meaning we are much less likely to reject the null hypothesis (it looks non-stationary). Comparing the test statistic to the critical values, it looks like we would have to fail to reject the null hypothesis that the time series is non-stationary and does have time-dependent structure.
163
+
164
+
165
+ #Kwiatkowski Phillips Schmidt Shin (KPSS) test:
166
+
167
+ #Importing the libraries:
168
+
169
+ from statsmodels.tsa.stattools import kpss
170
+ import pandas as pd
171
+ import numpy as np
172
+ import warnings
173
+ warnings.filterwarnings("ignore")
174
+
175
+ result_kpss_ct=kpss(series,regression="ct")
176
+ print('Test Statistic: %f' %result_kpss_ct[0])
177
+ print('p-value: %f' %result_kpss_ct[1])
178
+ print('Critical values:')
179
+ for key, value in result_kpss_ct[3].items():
180
+ print('\t%s: %.3f' %(key, value))
181
+
182
+
183
+
184
+ **********
185
+
186
+ #Loading the data.
187
+
188
+ path = '/content/daily-min-temperatures.csv'
189
+ data = pd.read_csv(path, index_col='Date')
190
+
191
+ #Checking for some head values of the data:
192
+
193
+ data.head()
194
+
195
+
196
+
197
+ **********
198
+
199
+ #Plotting the data.
200
+
201
+ data.plot(figsize=(14,8), title='temperature data series')
202
+
203
+
204
+ **********
205
+
206
+
207
+ #Extracting temperature in a series.
208
+
209
+ series = data['Temp'].values
210
+ series
211
+
212
+ ***********
213
+
214
+
215
+ #Performing ADF test.
216
+
217
+ result = adfuller(series, autolag='AIC')
218
+
219
+ #Checking the results:
220
+
221
+ print('ADF Statistic: %f' % result[0])
222
+
223
+ print('p-value: %f' % result[1])
224
+
225
+ print('Critical Values:')
226
+
227
+ for key, value in result[4].items():
228
+ print('\t%s: %.3f' % (key, value))
229
+ if result[0] > result[4]["5%"]:
230
+ print ("Reject Ho - Time Series is Stationary")
231
+ else:
232
+ print ("Failed to Reject Ho - Time Series is Stationary")
233
+