myawesomepkg 0.1.3__tar.gz → 0.1.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- myawesomepkg-0.1.4/PKG-INFO +11 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_3.py +167 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_4.py +215 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_4b.py +78 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py +39 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_6.py +37 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_7.py +69 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/practical_no_8.py +79 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/tsa_practical_no_1.py +287 -0
- myawesomepkg-0.1.4/myawesomepkg/TSAPY1/tsa_practical_no_2.py +121 -0
- myawesomepkg-0.1.4/myawesomepkg.egg-info/PKG-INFO +11 -0
- myawesomepkg-0.1.4/myawesomepkg.egg-info/SOURCES.txt +26 -0
- myawesomepkg-0.1.4/setup.py +18 -0
- myawesomepkg-0.1.3/PKG-INFO +0 -6
- myawesomepkg-0.1.3/myawesomepkg/d.py +0 -36
- myawesomepkg-0.1.3/myawesomepkg.egg-info/PKG-INFO +0 -6
- myawesomepkg-0.1.3/myawesomepkg.egg-info/SOURCES.txt +0 -18
- myawesomepkg-0.1.3/setup.py +0 -11
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 1.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 2.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 3.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 4 A.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 4 B.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 5.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 6.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 7.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 8.py +0 -0
- {myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/__init__.py +0 -0
- {myawesomepkg-0.1.3 → myawesomepkg-0.1.4}/myawesomepkg/__init__.py +0 -0
- {myawesomepkg-0.1.3 → myawesomepkg-0.1.4}/myawesomepkg/core.py +0 -0
- {myawesomepkg-0.1.3 → myawesomepkg-0.1.4}/myawesomepkg.egg-info/dependency_links.txt +0 -0
- {myawesomepkg-0.1.3 → myawesomepkg-0.1.4}/myawesomepkg.egg-info/top_level.txt +0 -0
- {myawesomepkg-0.1.3 → myawesomepkg-0.1.4}/setup.cfg +0 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: myawesomepkg
|
3
|
+
Version: 0.1.4
|
4
|
+
Summary: A simple greeting library
|
5
|
+
Author: Your Name
|
6
|
+
Author-email: your.email@example.com
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.6
|
11
|
+
Description-Content-Type: text/markdown
|
@@ -0,0 +1,167 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 3.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1aCeoKiFV3QbdraoAWb562bOjZDdBw0be
|
8
|
+
|
9
|
+
## **Practical No 3:**
|
10
|
+
# Aim: Detrending, deseasonalizing timeseries, detecting Cyclic variations and decomposing Time Series.
|
11
|
+
|
12
|
+
## Trend
|
13
|
+
"""
|
14
|
+
|
15
|
+
# Commented out IPython magic to ensure Python compatibility.
|
16
|
+
import pandas as pd
|
17
|
+
# %matplotlib inline
|
18
|
+
from statsmodels.tsa.filters.hp_filter import hpfilter
|
19
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
20
|
+
EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
|
21
|
+
EXINUS_trend.plot(figsize=(15,6)).autoscale(axis='x',tight=True)
|
22
|
+
|
23
|
+
"""**Detrending using Differencing**"""
|
24
|
+
|
25
|
+
# Commented out IPython magic to ensure Python compatibility.
|
26
|
+
import pandas as pd
|
27
|
+
import matplotlib.pyplot as plt
|
28
|
+
import warnings
|
29
|
+
warnings.filterwarnings("ignore")
|
30
|
+
# %matplotlib inline
|
31
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
32
|
+
diff = df.EXINUS.diff()
|
33
|
+
plt.figure(figsize=(15,6))
|
34
|
+
plt.plot(diff)
|
35
|
+
plt.title('Detrending using Differencing', fontsize=16)
|
36
|
+
plt.xlabel('Year')
|
37
|
+
plt.ylabel('EXINUS exchange rate')
|
38
|
+
plt.show()
|
39
|
+
|
40
|
+
"""** **bold text**Detrending using Scipy Signal**"""
|
41
|
+
|
42
|
+
# Commented out IPython magic to ensure Python compatibility.
|
43
|
+
import pandas as pd
|
44
|
+
import matplotlib.pyplot as plt
|
45
|
+
from scipy import signal
|
46
|
+
import warnings
|
47
|
+
warnings.filterwarnings("ignore")
|
48
|
+
# %matplotlib inline
|
49
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
50
|
+
detrended = signal.detrend(df.EXINUS.values)
|
51
|
+
plt.figure(figsize=(15,6))
|
52
|
+
plt.plot(detrended)
|
53
|
+
plt.xlabel('EXINUS')
|
54
|
+
plt.ylabel('Frequency')
|
55
|
+
plt.title('Detrending using Scipy Signal', fontsize=16)
|
56
|
+
plt.show()
|
57
|
+
|
58
|
+
"""**Detrending using HP Filter**"""
|
59
|
+
|
60
|
+
# Commented out IPython magic to ensure Python compatibility.
|
61
|
+
import pandas as pd
|
62
|
+
import matplotlib.pyplot as plt
|
63
|
+
from statsmodels.tsa.filters.hp_filter import hpfilter
|
64
|
+
import warnings
|
65
|
+
warnings.filterwarnings("ignore")
|
66
|
+
# %matplotlib inline
|
67
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
68
|
+
EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
|
69
|
+
df['trend'] = EXINUS_trend
|
70
|
+
detrended = df.EXINUS - df['trend']
|
71
|
+
plt.figure(figsize=(15,6))
|
72
|
+
plt.plot(detrended)
|
73
|
+
plt.title('Detrending using HP Filter', fontsize=16)
|
74
|
+
plt.xlabel('Year')
|
75
|
+
plt.ylabel('EXINUS exchange rate')
|
76
|
+
plt.show()
|
77
|
+
|
78
|
+
"""## Seasonality
|
79
|
+
|
80
|
+
Methods can be used to detect seasonality:
|
81
|
+
a. Multiple box plots
|
82
|
+
b. Autocorrelation plots
|
83
|
+
|
84
|
+
A. **Multi Month-wise Box Plot**
|
85
|
+
"""
|
86
|
+
|
87
|
+
# Commented out IPython magic to ensure Python compatibility.
|
88
|
+
import pandas as pd
|
89
|
+
import seaborn as sns
|
90
|
+
import matplotlib.pyplot as plt
|
91
|
+
from statsmodels.tsa.filters.hp_filter import hpfilter
|
92
|
+
import warnings
|
93
|
+
warnings.filterwarnings("ignore")
|
94
|
+
# %matplotlib inline
|
95
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',parse_dates=True)
|
96
|
+
df['month'] = df['observation_date'].dt.strftime('%b')
|
97
|
+
df['year'] = [d.year for d in df.observation_date]
|
98
|
+
df['month'] = [d.strftime('%b') for d in df.observation_date]
|
99
|
+
years = df['year'].unique()
|
100
|
+
plt.figure(figsize=(15,6))
|
101
|
+
sns.boxplot(x='month', y='EXINUS', data=df).set_title("Multi Month-wise Box Plot")
|
102
|
+
plt.show()
|
103
|
+
|
104
|
+
"""B. **Autocorrelation plot for seasonality**"""
|
105
|
+
|
106
|
+
# Commented out IPython magic to ensure Python compatibility.
|
107
|
+
from pandas.plotting import autocorrelation_plot
|
108
|
+
import pandas as pd
|
109
|
+
import matplotlib.pyplot as plt
|
110
|
+
# %matplotlib inline
|
111
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
112
|
+
#plt.rcParams.update({'figure.figsize':(15,6), 'figure.dpi':220})
|
113
|
+
autocorrelation_plot(df.EXINUS.tolist())
|
114
|
+
|
115
|
+
"""**Deseasoning Time series**"""
|
116
|
+
|
117
|
+
# Commented out IPython magic to ensure Python compatibility.
|
118
|
+
import pandas as pd
|
119
|
+
import matplotlib.pyplot as plt
|
120
|
+
from statsmodels.tsa.seasonal import seasonal_decompose
|
121
|
+
import warnings
|
122
|
+
warnings.filterwarnings("ignore")
|
123
|
+
# %matplotlib inline
|
124
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
125
|
+
result_mul = seasonal_decompose(df['EXINUS'], model='multiplicative', extrapolate_trend='freq')
|
126
|
+
deseason = df['EXINUS'] - result_mul.seasonal
|
127
|
+
plt.figure(figsize=(15,6))
|
128
|
+
plt.plot(deseason)
|
129
|
+
plt.title('Deseasoning using seasonal_decompose', fontsize=16)
|
130
|
+
plt.xlabel('Year')
|
131
|
+
plt.ylabel('EXINUS exchange rate')
|
132
|
+
plt.show()
|
133
|
+
|
134
|
+
"""**Detecting cyclical variation**"""
|
135
|
+
|
136
|
+
# Commented out IPython magic to ensure Python compatibility.
|
137
|
+
from statsmodels.tsa.filters.hp_filter import hpfilter
|
138
|
+
import pandas as pd
|
139
|
+
import matplotlib.pyplot as plt
|
140
|
+
import warnings
|
141
|
+
warnings.filterwarnings("ignore")
|
142
|
+
# %matplotlib inline
|
143
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',index_col=0,parse_dates=True)
|
144
|
+
EXINUS_cycle,EXINUS_trend = hpfilter(df['EXINUS'], lamb=1600)
|
145
|
+
df['cycle'] =EXINUS_cycle
|
146
|
+
df['trend'] =EXINUS_trend
|
147
|
+
df[['cycle']].plot(figsize=(15,6)).autoscale(axis='x',tight=True)
|
148
|
+
plt.title('Extracting Cyclic Variations', fontsize=16)
|
149
|
+
plt.xlabel('Year')
|
150
|
+
plt.ylabel('EXINUS exchange rate')
|
151
|
+
plt.show()
|
152
|
+
|
153
|
+
"""**Decompose Time series**"""
|
154
|
+
|
155
|
+
# Commented out IPython magic to ensure Python compatibility.
|
156
|
+
from statsmodels.tsa.seasonal import seasonal_decompose
|
157
|
+
import pandas as pd
|
158
|
+
import matplotlib.pyplot as plt
|
159
|
+
import warnings
|
160
|
+
warnings.filterwarnings("ignore")
|
161
|
+
# %matplotlib inline
|
162
|
+
df = pd.read_excel(r'/content/drive/MyDrive/MScDS TSA/India_Exchange_Rate_Dataset.xls',
|
163
|
+
index_col=0,parse_dates=True)
|
164
|
+
result = seasonal_decompose(df['EXINUS'], model='add')
|
165
|
+
result.plot();
|
166
|
+
result = seasonal_decompose(df['EXINUS'], model='mul')
|
167
|
+
result.plot();
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 4.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1DnqwmMcr1ypxLue43og-x_3DEDaI1g7Q
|
8
|
+
|
9
|
+
# Aim: Working with stationary and non stationary timeseries
|
10
|
+
|
11
|
+
# Checks for Stationarity
|
12
|
+
There are many methods to check whether a time series (direct observations, residuals, otherwise)
|
13
|
+
is stationary or non-stationary.
|
14
|
+
|
15
|
+
**A] Look at Plots:** You can review a time series plot of your data and visually check if there are any obvious trends or seasonality.
|
16
|
+
|
17
|
+
**Stationary Time Series**
|
18
|
+
"""
|
19
|
+
|
20
|
+
# load time series data
|
21
|
+
from pandas import read_csv
|
22
|
+
from matplotlib import pyplot
|
23
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
|
24
|
+
squeeze=True)
|
25
|
+
series.plot()
|
26
|
+
pyplot.show()
|
27
|
+
|
28
|
+
"""**Non-Stationary Time Series**"""
|
29
|
+
|
30
|
+
# load time series data
|
31
|
+
from pandas import read_csv
|
32
|
+
from matplotlib import pyplot
|
33
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
|
34
|
+
squeeze=True)
|
35
|
+
series.plot()
|
36
|
+
pyplot.show()
|
37
|
+
|
38
|
+
"""**B] Summary Statistics:** You can review the summary statistics for your data for seasons or random partitions and check for obvious or significant differences
|
39
|
+
|
40
|
+
|
41
|
+
"""
|
42
|
+
|
43
|
+
# plot a histogram of a time series
|
44
|
+
from pandas import read_csv
|
45
|
+
from matplotlib import pyplot
|
46
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
|
47
|
+
squeeze=True)
|
48
|
+
series.hist()
|
49
|
+
pyplot.show()
|
50
|
+
|
51
|
+
"""We clearly see the bell curve-like shape of the Gaussian distribution, perhaps with a longer right tail.
|
52
|
+
|
53
|
+
we can split the time series into two contiguous sequences. We can then calculate the mean and variance of each group of numbers and compare the values.
|
54
|
+
"""
|
55
|
+
|
56
|
+
# calculate statistics of partitioned time series data
|
57
|
+
from pandas import read_csv
|
58
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
|
59
|
+
|
60
|
+
X = series
|
61
|
+
2))
|
62
|
+
|
63
|
+
"""This example shows that the mean and variance values are different, but in the
|
64
|
+
same ball-park(range).
|
65
|
+
"""
|
66
|
+
|
67
|
+
# calculate statistics of partitioned time series data
|
68
|
+
from pandas import read_csv
|
69
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', header=0, index_col=0, parse_dates=True,
|
70
|
+
squeeze=True)
|
71
|
+
X = series.values
|
72
|
+
split = int(len(X) / 2)
|
73
|
+
X1, X2 = X[0:split], X[split:]
|
74
|
+
mean1, mean2 = X1.mean(), X2.mean()
|
75
|
+
var1, var2 = X1.var(), X2.var()
|
76
|
+
print('mean1=%f, mean2=%f' % (mean1, mean2))
|
77
|
+
print('variance1=%f, variance2=%f' % (var1, var2))
|
78
|
+
|
79
|
+
"""The example, we can see the mean and variance look very different. We have a
|
80
|
+
non-stationary time series.
|
81
|
+
|
82
|
+
**C] Statistical Tests:** You can use statistical tests to check if the expectations of stationarity are met or have been violated
|
83
|
+
|
84
|
+
Calculating the Augmented Dickey-Fuller test on the Daily Female Births dataset.
|
85
|
+
"""
|
86
|
+
|
87
|
+
# calculate stationarity test of time series data
|
88
|
+
from pandas import read_csv
|
89
|
+
from statsmodels.tsa.stattools import adfuller
|
90
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True,
|
91
|
+
squeeze=True)
|
92
|
+
X = series.values
|
93
|
+
result = adfuller(X)
|
94
|
+
print('ADF Statistic: %f' % result[0])
|
95
|
+
print('p-value: %f' % result[1])
|
96
|
+
print('Critical Values:')
|
97
|
+
for key, value in result[4].items():
|
98
|
+
print('\t%s: %.3f' % (key, value))
|
99
|
+
|
100
|
+
"""Running the example prints the test statistic value of -4. The more negative this statistic,
|
101
|
+
the more likely we are to reject the null hypothesis (we have a stationary dataset). As part of
|
102
|
+
the output, we get a look-up table to help determine the ADF statistic. We can see that our
|
103
|
+
statistic value of -4 is less than the value of -3.449 at 1%.
|
104
|
+
This suggests that we can reject the null hypothesis with a significance level of less than 1%
|
105
|
+
(i.e. a low probability that the result is a statistical fluke). Rejecting the null hypothesis means
|
106
|
+
that the process has no unit root, and in turn that the time series is stationary or does not have
|
107
|
+
time-dependent structure.
|
108
|
+
|
109
|
+
"""
|
110
|
+
|
111
|
+
#Importing the libraries:
|
112
|
+
|
113
|
+
from statsmodels.tsa.stattools import adfuller
|
114
|
+
import pandas as pd
|
115
|
+
import numpy as np
|
116
|
+
|
117
|
+
#Reading the airline-passengers data
|
118
|
+
|
119
|
+
data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
|
120
|
+
|
121
|
+
#Checking for some values of the data.
|
122
|
+
|
123
|
+
data.head()
|
124
|
+
|
125
|
+
#Plotting the data.
|
126
|
+
|
127
|
+
data.plot(figsize=(14,8), title='data series')
|
128
|
+
|
129
|
+
#Taking out the passengers number as a series.
|
130
|
+
|
131
|
+
series = data['#Passengers'].values
|
132
|
+
#print(series)
|
133
|
+
|
134
|
+
#Performing the ADF test on the series:
|
135
|
+
|
136
|
+
# ADF Test
|
137
|
+
result = adfuller(series, autolag='AIC')
|
138
|
+
#Extracting the values from the results:
|
139
|
+
|
140
|
+
print('ADF Statistic: %f' % result[0])
|
141
|
+
|
142
|
+
print('p-value: %f' % result[1])
|
143
|
+
|
144
|
+
print('Critical Values:')
|
145
|
+
|
146
|
+
for key, value in result[4].items():
|
147
|
+
print('\t%s: %.3f' % (key, value))
|
148
|
+
if result[0] < result[4]["5%"]:
|
149
|
+
print ("Reject Ho - Time Series is Stationary")
|
150
|
+
else:
|
151
|
+
print ("Failed to Reject Ho - Time Series is Non-Stationary")
|
152
|
+
|
153
|
+
"""The test statistic is positive,
|
154
|
+
meaning we are much less likely to reject the null hypothesis (it looks non-stationary). Comparing
|
155
|
+
the test statistic to the critical values, it looks like we would have to fail to reject the null
|
156
|
+
hypothesis that the time series is non-stationary and does have time-dependent structure.
|
157
|
+
|
158
|
+
"""
|
159
|
+
|
160
|
+
#Kwiatkowski Phillips Schmidt Shin (KPSS) test:
|
161
|
+
|
162
|
+
#Importing the libraries:
|
163
|
+
|
164
|
+
from statsmodels.tsa.stattools import kpss
|
165
|
+
import pandas as pd
|
166
|
+
import numpy as np
|
167
|
+
import warnings
|
168
|
+
warnings.filterwarnings("ignore")
|
169
|
+
|
170
|
+
result_kpss_ct=kpss(series,regression="ct")
|
171
|
+
print('Test Statistic: %f' %result_kpss_ct[0])
|
172
|
+
print('p-value: %f' %result_kpss_ct[1])
|
173
|
+
print('Critical values:')
|
174
|
+
for key, value in result_kpss_ct[3].items():
|
175
|
+
print('\t%s: %.3f' %(key, value))
|
176
|
+
|
177
|
+
"""As the test statistics value is greater than the critical value, the null hypothesis is rejected. This indicates that the data is non-stationary."""
|
178
|
+
|
179
|
+
#Loading the data.
|
180
|
+
|
181
|
+
path = '/content/daily-min-temperatures.csv'
|
182
|
+
data = pd.read_csv(path, index_col='Date')
|
183
|
+
|
184
|
+
#Checking for some head values of the data:
|
185
|
+
|
186
|
+
data.head()
|
187
|
+
|
188
|
+
#Plotting the data.
|
189
|
+
|
190
|
+
data.plot(figsize=(14,8), title='temperature data series')
|
191
|
+
|
192
|
+
#Extracting temperature in a series.
|
193
|
+
|
194
|
+
series = data['Temp'].values
|
195
|
+
series
|
196
|
+
|
197
|
+
#Performing ADF test.
|
198
|
+
|
199
|
+
result = adfuller(series, autolag='AIC')
|
200
|
+
|
201
|
+
#Checking the results:
|
202
|
+
|
203
|
+
print('ADF Statistic: %f' % result[0])
|
204
|
+
|
205
|
+
print('p-value: %f' % result[1])
|
206
|
+
|
207
|
+
print('Critical Values:')
|
208
|
+
|
209
|
+
for key, value in result[4].items():
|
210
|
+
print('\t%s: %.3f' % (key, value))
|
211
|
+
if result[0] > result[4]["5%"]:
|
212
|
+
print ("Reject Ho - Time Series is Stationary")
|
213
|
+
else:
|
214
|
+
print ("Failed to Reject Ho - Time Series is Stationary")
|
215
|
+
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 4B.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1BIAOhJMWQry0k6SDmofT9ESz5f45YX4f
|
8
|
+
"""
|
9
|
+
|
10
|
+
def test_stationarity(dataFrame, var):
|
11
|
+
dataFrame['rollMean']=dataFrame[var].rolling(window=12).mean()
|
12
|
+
dataFrame['rollStd']=dataFrame[var].rolling(window=12).std()
|
13
|
+
|
14
|
+
from statsmodels.tsa.stattools import adfuller
|
15
|
+
import seaborn as sns
|
16
|
+
adfTest = adfuller(dataFrame[var], autolag='AIC')
|
17
|
+
stats=pd.Series(adfTest[0:4],index=['Test Statistic','p-value', '#lags used', 'number of observations used'])
|
18
|
+
print(stats)
|
19
|
+
|
20
|
+
for key, value in adfTest[4].items():
|
21
|
+
print('\t%s: %.3f' % (key, value))
|
22
|
+
|
23
|
+
sns.lineplot(data=dataFrame, x=dataFrame.index, y=var)
|
24
|
+
sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollMean')
|
25
|
+
sns.lineplot(data=dataFrame, x=dataFrame.index, y='rollStd')
|
26
|
+
|
27
|
+
import pandas as pd
|
28
|
+
import numpy as np
|
29
|
+
|
30
|
+
#Reading the airline-passengers data
|
31
|
+
|
32
|
+
data = pd.read_csv('/content/drive/MyDrive/MScDS TSA/AirPassengers.csv', index_col='Month')
|
33
|
+
|
34
|
+
#Checking for some values of the data.
|
35
|
+
|
36
|
+
data.head()
|
37
|
+
|
38
|
+
test_stationarity(data,'Passengers')
|
39
|
+
|
40
|
+
air_df=data[['Passengers']]
|
41
|
+
air_df.head()
|
42
|
+
|
43
|
+
air_df['shift']=air_df.Passengers.shift()
|
44
|
+
air_df['shiftDiff']=air_df.Passengers - air_df['shift']
|
45
|
+
air_df.head()
|
46
|
+
|
47
|
+
test_stationarity(air_df.dropna(),'shiftDiff')
|
48
|
+
|
49
|
+
log_df=air_df[['Passengers']]
|
50
|
+
log_df['log']=np.log(log_df['Passengers'])
|
51
|
+
log_df.head()
|
52
|
+
|
53
|
+
test_stationarity(log_df,'log')
|
54
|
+
|
55
|
+
sqrt_df=air_df[['Passengers']]
|
56
|
+
sqrt_df['sqrt']=np.sqrt(air_df['Passengers'])
|
57
|
+
sqrt_df.head()
|
58
|
+
|
59
|
+
test_stationarity(sqrt_df,'sqrt')
|
60
|
+
|
61
|
+
cbrt_df=air_df[['Passengers']]
|
62
|
+
cbrt_df['cbrt']=np.cbrt(air_df['Passengers'])
|
63
|
+
cbrt_df.head()
|
64
|
+
|
65
|
+
test_stationarity(cbrt_df,'cbrt')
|
66
|
+
|
67
|
+
log_df2=log_df[['Passengers','log']]
|
68
|
+
log_df2['log_sqrt']=np.sqrt(log_df['log'])
|
69
|
+
log_df2.head()
|
70
|
+
|
71
|
+
test_stationarity(log_df2,'log_sqrt')
|
72
|
+
|
73
|
+
log_df2=log_df[['Passengers','log']]
|
74
|
+
log_df2['log_sqrt']=np.sqrt(log_df['log'])
|
75
|
+
log_df2['logShiftDiff']=log_df2['log_sqrt']-log_df2['log_sqrt'].shift()
|
76
|
+
log_df2.head()
|
77
|
+
|
78
|
+
test_stationarity(log_df2.dropna(),'logShiftDiff')
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 5 AC and PCA.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1sGHsmswvo180eVjgUJh53fzwK1eJoZRQ
|
8
|
+
|
9
|
+
# Aim: Implementing auto correlation and partial auto-correlation on timeseries
|
10
|
+
"""
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
# ACF plot of time series
|
15
|
+
from pandas import read_csv
|
16
|
+
from matplotlib import pyplot
|
17
|
+
#from statsmodels.graphics.tsaplots import plot_acf
|
18
|
+
from pandas.plotting import autocorrelation_plot
|
19
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
|
20
|
+
#plot_acf(series)
|
21
|
+
autocorrelation_plot(series)
|
22
|
+
pyplot.show()
|
23
|
+
|
24
|
+
# zoomed-in ACF plot of time series
|
25
|
+
from pandas import read_csv
|
26
|
+
from matplotlib import pyplot
|
27
|
+
from statsmodels.graphics.tsaplots import plot_acf
|
28
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
|
29
|
+
plot_acf(series, lags=50)
|
30
|
+
pyplot.show()
|
31
|
+
|
32
|
+
# PACF plot of time series
|
33
|
+
from pandas import read_csv
|
34
|
+
from matplotlib import pyplot
|
35
|
+
from statsmodels.graphics.tsaplots import plot_pacf
|
36
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,
|
37
|
+
parse_dates=True, squeeze=True)
|
38
|
+
plot_pacf(series, lags=50)
|
39
|
+
pyplot.show()
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 6.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1xuSGmR5e0t1nj5TDFB9ASYxhiG_yKZyx
|
8
|
+
|
9
|
+
# Aim: Perform autoregression on time series data
|
10
|
+
"""
|
11
|
+
|
12
|
+
# create and evaluate a static autoregressive model
|
13
|
+
from pandas import read_csv
|
14
|
+
from matplotlib import pyplot
|
15
|
+
from statsmodels.tsa.ar_model import AutoReg
|
16
|
+
from sklearn.metrics import mean_squared_error
|
17
|
+
from math import sqrt
|
18
|
+
# load dataset
|
19
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True, squeeze=True)
|
20
|
+
# split dataset
|
21
|
+
X = series.values
|
22
|
+
train, test = X[1:len(X)-7], X[len(X)-7:]
|
23
|
+
# train autoregression
|
24
|
+
model = AutoReg(train,30)
|
25
|
+
model_fit = model.fit()
|
26
|
+
print('Lag: %s' % model_fit.ar_lags)
|
27
|
+
print('Coefficients: %s' % model_fit.params)
|
28
|
+
# make predictions
|
29
|
+
predictions = model_fit.predict(start=len(train), end=len(train)+len(test)-1, dynamic=False)
|
30
|
+
for i in range(len(predictions)):
|
31
|
+
print('predicted=%f, expected=%f' % (predictions[i], test[i]))
|
32
|
+
rmse = sqrt(mean_squared_error(test, predictions))
|
33
|
+
print('Test RMSE: %.3f' % rmse)
|
34
|
+
# plot results
|
35
|
+
pyplot.plot(test)
|
36
|
+
pyplot.plot(predictions, color='red')
|
37
|
+
pyplot.show()
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 7.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1rOzVCBXjN31-6HIb6h8zRRSH3R-MoGvG
|
8
|
+
|
9
|
+
# Aim: Forecasting using MA model.
|
10
|
+
"""
|
11
|
+
|
12
|
+
# correct forecasts with a model of forecast residual errors
|
13
|
+
from pandas import read_csv
|
14
|
+
from pandas import DataFrame
|
15
|
+
from pandas import concat
|
16
|
+
from statsmodels.tsa.ar_model import AutoReg
|
17
|
+
from matplotlib import pyplot
|
18
|
+
from sklearn.metrics import mean_squared_error
|
19
|
+
from math import sqrt
|
20
|
+
# load data
|
21
|
+
series = read_csv('/content/drive/MyDrive/MScDS TSA/daily-total-female-births-CA.csv', header=0, index_col=0, parse_dates=True,squeeze=True)
|
22
|
+
# create lagged dataset
|
23
|
+
values = DataFrame(series.values)
|
24
|
+
dataframe = concat([values.shift(1), values], axis=1)
|
25
|
+
dataframe.columns = ['t', 't+1']
|
26
|
+
print(dataframe)
|
27
|
+
X = dataframe.values
|
28
|
+
|
29
|
+
# split into train and test sets
|
30
|
+
X = dataframe.values
|
31
|
+
train_size = int(len(X) * 0.66)
|
32
|
+
train, test = X[1:train_size], X[train_size:]
|
33
|
+
train_X, train_y = train[:,0], train[:,1]
|
34
|
+
test_X, test_y = test[:,0], test[:,1]
|
35
|
+
# persistence model on training set
|
36
|
+
train_pred = [x for x in train_X]
|
37
|
+
# calculate residuals
|
38
|
+
train_resid = [train_y[i]-train_pred[i] for i in range(len(train_pred))]
|
39
|
+
# model the training set residuals
|
40
|
+
model = AutoReg(train_resid,20)
|
41
|
+
model_fit = model.fit()
|
42
|
+
window = len(model_fit.ar_lags)
|
43
|
+
coef = model_fit.params
|
44
|
+
# walk forward over time steps in test
|
45
|
+
history = train_resid[len(train_resid)-window:]
|
46
|
+
history = [history[i] for i in range(len(history))]
|
47
|
+
predictions = list()
|
48
|
+
for t in range(len(test_y)):
|
49
|
+
# persistence
|
50
|
+
yhat = test_X[t]
|
51
|
+
error = test_y[t] - yhat
|
52
|
+
# predict error
|
53
|
+
length = len(history)
|
54
|
+
lag = [history[i] for i in range(length-window,length)]
|
55
|
+
pred_error = coef[0]
|
56
|
+
for d in range(window):
|
57
|
+
pred_error += coef[d+1] * lag[window-d-1]
|
58
|
+
# correct the prediction
|
59
|
+
yhat = yhat + pred_error
|
60
|
+
predictions.append(yhat)
|
61
|
+
history.append(error)
|
62
|
+
print('predicted=%f, expected=%f' % (yhat, test_y[t]))
|
63
|
+
# error
|
64
|
+
rmse = sqrt(mean_squared_error(test_y, predictions))
|
65
|
+
print('Test RMSE: %.3f' % rmse)
|
66
|
+
# plot predicted error
|
67
|
+
pyplot.plot(test_y)
|
68
|
+
pyplot.plot(predictions, color='red')
|
69
|
+
pyplot.show()
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""Practical No 8.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1Ai_ZPo-aPrKa7A_iQFtal5vinfQbYQvY
|
8
|
+
|
9
|
+
# Aim: Forecasting using ARIMA model
|
10
|
+
|
11
|
+
Time Series Forecasting With ARIMA Model in Python for Temperature Prediction.
|
12
|
+
|
13
|
+
**1) Reading Time Series Data in Python using Pandas library**
|
14
|
+
"""
|
15
|
+
|
16
|
+
import pandas as pd
|
17
|
+
df=pd.read_csv('/content/drive/MyDrive/MScDS TSA/MaunaLoaDailyTemps.csv',index_col='DATE',parse_dates=True)
|
18
|
+
df=df.dropna()
|
19
|
+
print('Shape of data',df.shape)
|
20
|
+
df.head()
|
21
|
+
df
|
22
|
+
|
23
|
+
"""Plot Your data"""
|
24
|
+
|
25
|
+
df['AvgTemp'].plot(figsize=(12,5))
|
26
|
+
|
27
|
+
"""**2) Checking for stationarity of time series model**"""
|
28
|
+
|
29
|
+
from statsmodels.tsa.stattools import adfuller
|
30
|
+
def adf_test(dataset):
|
31
|
+
dftest = adfuller(dataset, autolag = 'AIC')
|
32
|
+
print("1. ADF : ",dftest[0])
|
33
|
+
print("2. P-Value : ", dftest[1])
|
34
|
+
print("3. Num Of Lags : ", dftest[2])
|
35
|
+
print("4. Num Of Observations Used For ADF Regression:", dftest[3])
|
36
|
+
print("5. Critical Values :")
|
37
|
+
for key, val in dftest[4].items():
|
38
|
+
print("\t",key, ": ", val)
|
39
|
+
adf_test(df['AvgTemp'])
|
40
|
+
|
41
|
+
"""3) Auto Arima Function to select order of Auto Regression Model"""
|
42
|
+
|
43
|
+
pip install pmdarima
|
44
|
+
|
45
|
+
from pmdarima import auto_arima
|
46
|
+
import warnings
|
47
|
+
warnings.filterwarnings("ignore")
|
48
|
+
stepwise_fit=auto_arima(df['AvgTemp'],trace=True,suppress_warnings=True)
|
49
|
+
stepwise_fit.summary()
|
50
|
+
|
51
|
+
"""Split Your Dataset"""
|
52
|
+
|
53
|
+
print(df.shape)
|
54
|
+
train=df.iloc[:-30]
|
55
|
+
test=df.iloc[-30:]
|
56
|
+
print(train.shape,test.shape)
|
57
|
+
|
58
|
+
from statsmodels.tsa.arima.model import ARIMA
|
59
|
+
model=ARIMA(train['AvgTemp'],order=(1,0,5))
|
60
|
+
model=model.fit()
|
61
|
+
model.summary()
|
62
|
+
|
63
|
+
"""Check How Good Your Model Is"""
|
64
|
+
|
65
|
+
start=len(train)
|
66
|
+
end=len(train)+len(test)-1
|
67
|
+
pred=model.predict(start=start,end=end,typ='levels').rename('ARIMA Predictions')
|
68
|
+
print(pred)
|
69
|
+
pred.index=df.index[start:end+1]
|
70
|
+
pred.plot(legend=True)
|
71
|
+
test['AvgTemp'].plot(legend=True)
|
72
|
+
|
73
|
+
"""Check your Accuracy Metric"""
|
74
|
+
|
75
|
+
from sklearn.metrics import mean_squared_error
|
76
|
+
from math import sqrt
|
77
|
+
test['AvgTemp'].mean()
|
78
|
+
rmse=sqrt(mean_squared_error(pred,test['AvgTemp']))
|
79
|
+
print(rmse)
|
@@ -0,0 +1,287 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""TSA_Practical_No_1.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1-xNPJwhkAO4KzKFMvXII92dc9Y_VTQ8H
|
8
|
+
|
9
|
+
**Practical No 1:**
|
10
|
+
**Aim: Handling timeseries data**
|
11
|
+
|
12
|
+
a. Import timeseries data
|
13
|
+
|
14
|
+
b. Visualizing timeseries data using various plot
|
15
|
+
|
16
|
+
# **A. Load and Explore Time Series Data**
|
17
|
+
"""
|
18
|
+
|
19
|
+
from pandas import read_csv
|
20
|
+
series = read_csv('/content/daily-total-female-births.csv', header=0, index_col=0, parse_dates=True)
|
21
|
+
print(type(series))
|
22
|
+
print(series.head())
|
23
|
+
|
24
|
+
"""The arguments to the **readcsv()** function. We provide it a number of hints to ensure the data is loaded as a Series.
|
25
|
+
|
26
|
+
1. **header=0:** We must specify the header information at row 0.
|
27
|
+
2. **parse_dates=True:** We give the function a hint that data in the first column contains dates that need to be parsed.
|
28
|
+
3. **index_col=0:** We hint that the first column contains the index information for the time series.
|
29
|
+
4. **squeeze=True:** We hint that we only have one data column and that we are interested in a Series and not a DataFrame.
|
30
|
+
|
31
|
+
You can use the **head()** function to peek at the first 5 records or specify the first n number of records to review
|
32
|
+
"""
|
33
|
+
|
34
|
+
print(series.head(10))
|
35
|
+
|
36
|
+
"""**Number of Observations**
|
37
|
+
You can get the dimensionality of your Series using the size parameter.
|
38
|
+
"""
|
39
|
+
|
40
|
+
print(series.size)
|
41
|
+
|
42
|
+
""" **Querying By Time**
|
43
|
+
You can slice, dice, and query your series using the time index. For example, you can access all observations in January as follows:
|
44
|
+
|
45
|
+
**series.loc[]**
|
46
|
+
|
47
|
+
Access a group of rows and columns by label(s) or a boolean array.
|
48
|
+
.loc[] is primarily label based, but may also be used with a boolean array.
|
49
|
+
"""
|
50
|
+
|
51
|
+
print(series.loc["1959-01"])
|
52
|
+
|
53
|
+
"""**Descriptive Statistics**
|
54
|
+
Calculating descriptive statistics on your time series can help get an idea of the distribution and
|
55
|
+
spread of values.
|
56
|
+
The **describe()** function creates
|
57
|
+
a 7 number summary of the loaded time series including mean, standard deviation, median,
|
58
|
+
minimum, and maximum of the observations
|
59
|
+
"""
|
60
|
+
|
61
|
+
print(series.describe())
|
62
|
+
|
63
|
+
"""# **B. Data Visualization**
|
64
|
+
Visualization plays an important role in time series analysis and forecasting. Plots of the raw
|
65
|
+
sample data can provide valuable diagnostics to identify temporal structures like trends, cycles,
|
66
|
+
and seasonality that can influence the choice of model. A problem is that many novices in the
|
67
|
+
field of time series forecasting stop with line plots.
|
68
|
+
|
69
|
+
Different types of visualizations that you can use on your own time series data. They are:
|
70
|
+
1. Line Plots.
|
71
|
+
2. Histograms and Density Plots.
|
72
|
+
3. Box and Whisker Plots.
|
73
|
+
4. Heat Maps.
|
74
|
+
5. Lag Plots or Scatter Plots.
|
75
|
+
6. Autocorrelation Plots.
|
76
|
+
|
77
|
+
**Minimum Daily Temperatures Dataset**
|
78
|
+
We will use the Minimum Daily Temperatures dataset as an example. This dataset
|
79
|
+
describes the minimum daily temperatures over 10 years (1981-1990) in the city Melbourne,
|
80
|
+
Australia.
|
81
|
+
"""
|
82
|
+
|
83
|
+
from pandas import read_csv
|
84
|
+
from matplotlib import pyplot
|
85
|
+
series = read_csv('daily-min-temperatures.csv', header=0, index_col=0,parse_dates=True)
|
86
|
+
print(series.head())
|
87
|
+
series=series.squeeze()
|
88
|
+
type(series)
|
89
|
+
print(series.describe())
|
90
|
+
|
91
|
+
"""**Line Plot**
|
92
|
+
The first, and perhaps most popular, visualization for time series is the line plot. In this plot,
|
93
|
+
time is shown on the x-axis with observation values along the y-axis. Below is an example of
|
94
|
+
visualizing the Pandas Series of the Minimum Daily Temperatures dataset directly as a line
|
95
|
+
plot.
|
96
|
+
"""
|
97
|
+
|
98
|
+
series.plot()
|
99
|
+
pyplot.show()
|
100
|
+
|
101
|
+
"""Changing the style of the line to
|
102
|
+
be black dots instead of a connected line (the style=’k.’ argument).
|
103
|
+
"""
|
104
|
+
|
105
|
+
series.plot(style='k.')
|
106
|
+
pyplot.show()
|
107
|
+
|
108
|
+
series.plot(style='k--')
|
109
|
+
pyplot.show()
|
110
|
+
|
111
|
+
"""It can be helpful to compare line plots for the same interval, such as from day-to-day,
|
112
|
+
month-to-month, and year-to-year. The Minimum Daily Temperatures dataset spans 10
|
113
|
+
years. We can group data by year and create a line plot for each year for direct comparison. The example below shows how to do this. First the observations are grouped by year (series.groupby(Grouper(freq=’A’))).
|
114
|
+
|
115
|
+
A **Grouper** allows the user to specify a groupby instruction for an object.
|
116
|
+
|
117
|
+
This specification will select a column via the key parameter, or if the level and/or axis parameters are given, a level of the index of the target object.
|
118
|
+
|
119
|
+
If axis and/or level are passed as keywords to both Grouper and groupby, the values passed to Grouper take precedence
|
120
|
+
|
121
|
+
The groups are then enumerated and the observations for each year are stored as columns
|
122
|
+
in a new DataFrame. Finally, a plot of this contrived DataFrame is created with each column
|
123
|
+
visualized as a subplot with legends removed to cut back on the clutter.
|
124
|
+
|
125
|
+
The **squeeze()** method converts a single column DataFrame into a Series.
|
126
|
+
"""
|
127
|
+
|
128
|
+
from pandas import read_csv
|
129
|
+
from pandas import DataFrame
|
130
|
+
from pandas import Grouper
|
131
|
+
from matplotlib import pyplot
|
132
|
+
series = read_csv('/content/daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
|
133
|
+
#print(series.head())
|
134
|
+
|
135
|
+
series=series.squeeze()
|
136
|
+
#print(series.head())
|
137
|
+
groups = series.groupby(Grouper(freq='A'))
|
138
|
+
#print(groups)
|
139
|
+
years = DataFrame()
|
140
|
+
#print(years)
|
141
|
+
for name, group in groups:
|
142
|
+
years[name.year] = group.values
|
143
|
+
print(years)
|
144
|
+
years.plot(subplots=True, legend=False)
|
145
|
+
pyplot.show()
|
146
|
+
|
147
|
+
"""**Histogram and Density Plots**
|
148
|
+
|
149
|
+
Another important visualization is of the distribution of observations themselves. This means a
|
150
|
+
plot of the values without the temporal ordering. Some linear time series forecasting methods
|
151
|
+
assume a well-behaved distribution of observations (i.e. a bell curve or normal distribution).
|
152
|
+
This can be explicitly checked using tools like statistical hypothesis tests. But plots can provide
|
153
|
+
a useful first check of the distribution of observations both on raw observations and after any
|
154
|
+
type of data transform has been performed.
|
155
|
+
The example below creates a histogram plot of the observations in the Minimum Daily
|
156
|
+
Temperatures dataset. A histogram groups values into bins, and the frequency or count of
|
157
|
+
observations in each bin can provide insight into the underlying distribution of the observations. Histograms and density plots provide insight into the distribution of all observations, but we
|
158
|
+
may be interested in the distribution of values by time interval.
|
159
|
+
"""
|
160
|
+
|
161
|
+
series.hist()
|
162
|
+
pyplot.show()
|
163
|
+
|
164
|
+
"""Generate Kernel Density Estimate plot using Gaussian kernels."""
|
165
|
+
|
166
|
+
series.plot(kind='kde')
|
167
|
+
pyplot.show()
|
168
|
+
|
169
|
+
years.boxplot()
|
170
|
+
pyplot.show()
|
171
|
+
|
172
|
+
"""**Box and Whisker Plots by Interval**
|
173
|
+
|
174
|
+
Another type of plot that is
|
175
|
+
useful to summarize the distribution of observations is the box and whisker plot. This plot
|
176
|
+
draws a box around the 25th and 75th percentiles of the data that captures the middle 50% of
|
177
|
+
observations. A line is drawn at the 50th percentile (the median) and whiskers are drawn above
|
178
|
+
and below the box to summarize the general extents of the observations. Dots are drawn for
|
179
|
+
outliers outside the whiskers or extents of the data.
|
180
|
+
|
181
|
+
Box and whisker plots can be created and compared for each interval in a time series, such
|
182
|
+
as years, months, or days. Below is an example of grouping the Minimum Daily Temperatures
|
183
|
+
dataset by years, as was done above in the plot example. A box and whisker plot is then created
|
184
|
+
for each year and lined up side-by-side for direct comparison.
|
185
|
+
|
186
|
+
Within an interval,
|
187
|
+
it can help to spot outliers (dots above or below the whiskers). Across intervals, in this case
|
188
|
+
years, we can look for multiple year trends, seasonality, and other structural information that
|
189
|
+
could be modeled
|
190
|
+
"""
|
191
|
+
|
192
|
+
from pandas import read_csv
|
193
|
+
from pandas import DataFrame
|
194
|
+
from pandas import Grouper
|
195
|
+
from matplotlib import pyplot
|
196
|
+
series = read_csv('daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
|
197
|
+
series=series.squeeze()
|
198
|
+
groups = series.groupby(Grouper(freq='A'))
|
199
|
+
years = DataFrame()
|
200
|
+
for name, group in groups:
|
201
|
+
years[name.year] = group.values
|
202
|
+
years.boxplot()
|
203
|
+
pyplot.show()
|
204
|
+
|
205
|
+
"""**Heat Maps**
|
206
|
+
A matrix of numbers can be plotted as a surface, where the values in each cell of the matrix are
|
207
|
+
assigned a unique color. This is called a heatmap, as larger values can be drawn with warmer
|
208
|
+
colors (yellows and reds) and smaller values can be drawn with cooler colors (blues and greens).
|
209
|
+
Like the box and whisker plots, we can compare observations between intervals using a heat
|
210
|
+
map.
|
211
|
+
In the case of the Minimum Daily Temperatures, the observations can be arranged into a
|
212
|
+
matrix of year-columns and day-rows, with minimum temperature in the cell for each day. A
|
213
|
+
heat map of this matrix can then be plotted. Below is an example of creating a heatmap of
|
214
|
+
the Minimum Daily Temperatures data. The matshow() function from the Matplotlib library
|
215
|
+
is used as no heatmap support is provided directly in Pandas. For convenience, the matrix is rotated (transposed) so that each row represents one year and each column one day. This
|
216
|
+
provides a more intuitive, left-to-right layout of the data.
|
217
|
+
|
218
|
+
"""
|
219
|
+
|
220
|
+
from pandas import read_csv
|
221
|
+
from pandas import DataFrame
|
222
|
+
from pandas import Grouper
|
223
|
+
from matplotlib import pyplot
|
224
|
+
series = read_csv('daily-min-temperatures.csv', header=0, index_col=0, parse_dates=True)
|
225
|
+
series=series.squeeze()
|
226
|
+
groups = series.groupby(Grouper(freq='A'))
|
227
|
+
years = DataFrame()
|
228
|
+
for name, group in groups:
|
229
|
+
years[name.year] = group.values
|
230
|
+
years = years.T
|
231
|
+
print(years)
|
232
|
+
pyplot.matshow(years, interpolation=None, aspect='auto')
|
233
|
+
pyplot.show()
|
234
|
+
|
235
|
+
"""The plot shows the cooler minimum temperatures in the middle days of the years and
|
236
|
+
the warmer minimum temperatures in the start and ends of the years, and all the fading and
|
237
|
+
complexity in between.
|
238
|
+
|
239
|
+
**Lag Scatter Plots**
|
240
|
+
Time series modeling assumes a relationship between an observation and the previous observation.
|
241
|
+
Previous observations in a time series are called lags, with the observation at the previous timestep called lag=1, the observation at two time steps ago lag=2, and so on. A useful type of plot
|
242
|
+
to explore the relationship between each observation and a lag of that observation is called the
|
243
|
+
scatter plot. Pandas has a built-in function for exactly this called the lag plot. It plots the
|
244
|
+
observation at time t on the x-axis and the observation at the next time step (t+1) on the
|
245
|
+
y-axis.
|
246
|
+
|
247
|
+
If the points cluster along a diagonal line from the bottom-left to the top-right of the plot,
|
248
|
+
it suggests a positive correlation relationship.
|
249
|
+
If the points cluster along a diagonal line from the top-left to the bottom-right, it suggests
|
250
|
+
a negative correlation relationship.
|
251
|
+
Either relationship is good as they can be modeled.
|
252
|
+
|
253
|
+
More points tighter in to the diagonal line suggests a stronger relationship and more spread
|
254
|
+
from the line suggests a weaker relationship. A ball in the middle or a spread across the plot
|
255
|
+
suggests a weak or no relationship.
|
256
|
+
"""
|
257
|
+
|
258
|
+
from pandas.plotting import lag_plot
|
259
|
+
lag_plot(series)
|
260
|
+
pyplot.show()
|
261
|
+
|
262
|
+
"""The plot created from running the example shows a relatively strong positive correlation
|
263
|
+
between observations and their lag1 values
|
264
|
+
|
265
|
+
**Autocorrelation Plots**
|
266
|
+
We can quantify the strength and type of relationship between observations and their lags. In
|
267
|
+
statistics, this is called correlation, and when calculated against lag values in time series, it is
|
268
|
+
called autocorrelation (self-correlation). A correlation value calculated between two groups of
|
269
|
+
numbers, such as observations and their lag=1 values, results in a number between -1 and 1.
|
270
|
+
The sign of this number indicates a negative or positive correlation respectively. A value close to
|
271
|
+
zero suggests a weak correlation, whereas a value closer to -1 or 1 indicates a strong correlation.
|
272
|
+
Correlation values, called correlation coefficients, can be calculated for each observation and
|
273
|
+
different lag values. Once calculated, a plot can be created to help better understand how this
|
274
|
+
relationship changes over the lag. This type of plot is called an autocorrelation plot and Pandas provides this capability built in, called the autocorrelation plot() function.
|
275
|
+
"""
|
276
|
+
|
277
|
+
from pandas.plotting import autocorrelation_plot
|
278
|
+
autocorrelation_plot(series)
|
279
|
+
pyplot.show()
|
280
|
+
|
281
|
+
"""The resulting plot shows lag along the x-axis and the correlation on the y-axis. Dotted lines
|
282
|
+
are provided that indicate any correlation values above those lines are statistically significant
|
283
|
+
(meaningful). We can see that for the Minimum Daily Temperatures dataset we see cycles of
|
284
|
+
strong negative and positive correlation. This captures the relationship of an observation with
|
285
|
+
past observations in the same and opposite seasons or times of year. Sine waves like those seen
|
286
|
+
in this example are a strong sign of seasonality in the dataset
|
287
|
+
"""
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
"""TSA_Practical No 2.ipynb
|
3
|
+
|
4
|
+
Automatically generated by Colab.
|
5
|
+
|
6
|
+
Original file is located at
|
7
|
+
https://colab.research.google.com/drive/1X2scZpqK4F_dqV8QohehZCIZswnttekq
|
8
|
+
|
9
|
+
**Practical No 2**
|
10
|
+
**Aim: Implementing timeseries components**
|
11
|
+
1. Seasonality
|
12
|
+
2. Trend
|
13
|
+
3. Pattern
|
14
|
+
4. Cyclic
|
15
|
+
|
16
|
+
**random.normal(loc=0.0, scale=1.0, size=None)**
|
17
|
+
|
18
|
+
Draw random samples from a normal (Gaussian) distribution.
|
19
|
+
|
20
|
+
**Parameters:**
|
21
|
+
loc: float or array_like of floats
|
22
|
+
Mean (“centre”) of the distribution.
|
23
|
+
|
24
|
+
scale: float or array_like of floats
|
25
|
+
Standard deviation (spread or “width”) of the distribution. Must be non-negative.
|
26
|
+
|
27
|
+
size: int or tuple of ints, optional
|
28
|
+
Output shape.
|
29
|
+
"""
|
30
|
+
|
31
|
+
import numpy as np
|
32
|
+
import matplotlib.pyplot as plt
|
33
|
+
|
34
|
+
# Upward Trend
|
35
|
+
t = np.arange(0, 10, 0.1)
|
36
|
+
data = t + np.random.normal(0, 0.5, len(t))
|
37
|
+
plt.plot(t, data, label='Upward Trend')
|
38
|
+
|
39
|
+
# Downward Trend
|
40
|
+
t = np.arange(0, 10, 0.1)
|
41
|
+
data = -t + np.random.normal(0, 0.5, len(t))
|
42
|
+
plt.plot(t, data, label='Downward Trend')
|
43
|
+
|
44
|
+
# Horizontal Trend
|
45
|
+
t = np.arange(0, 10, 0.1)
|
46
|
+
data = np.zeros(len(t)) + np.random.normal(0, 0.5, len(t))
|
47
|
+
plt.plot(t, data, label='Horizontal Trend')
|
48
|
+
|
49
|
+
# Non-linear Trend
|
50
|
+
t = np.arange(0, 10, 0.1)
|
51
|
+
data = t**2 + np.random.normal(0, 0.5, len(t))
|
52
|
+
plt.plot(t, data, label='Non-linear Trend')
|
53
|
+
|
54
|
+
plt.legend()
|
55
|
+
plt.show()
|
56
|
+
|
57
|
+
import numpy as np
|
58
|
+
import matplotlib.pyplot as plt
|
59
|
+
|
60
|
+
# generate sample data with different types of seasonality
|
61
|
+
np.random.seed(1)
|
62
|
+
time = np.arange(0, 366)
|
63
|
+
|
64
|
+
# weekly seasonality
|
65
|
+
weekly_seasonality = np.sin(2 * np.pi * time / 7)
|
66
|
+
weekly_data = 5 + weekly_seasonality
|
67
|
+
|
68
|
+
# monthly seasonality
|
69
|
+
monthly_seasonality = np.sin(2 * np.pi * time / 30)
|
70
|
+
monthly_data = 5 + monthly_seasonality
|
71
|
+
|
72
|
+
# annual seasonality
|
73
|
+
annual_seasonality = np.sin(2 * np.pi * time / 365)
|
74
|
+
annual_data = 5 + annual_seasonality
|
75
|
+
|
76
|
+
# plot the data
|
77
|
+
plt.figure(figsize=(12, 8))
|
78
|
+
plt.plot(time, weekly_data,label='Weekly Seasonality')
|
79
|
+
plt.plot(time, monthly_data,label='Monthly Seasonality')
|
80
|
+
plt.plot(time, annual_data,label='Annual Seasonality')
|
81
|
+
plt.legend(loc='upper left')
|
82
|
+
plt.show()
|
83
|
+
|
84
|
+
import numpy as np
|
85
|
+
import matplotlib.pyplot as plt
|
86
|
+
|
87
|
+
# Generate sample data with cyclic patterns
|
88
|
+
np.random.seed(1)
|
89
|
+
time = np.array([0, 30, 60, 90, 120,
|
90
|
+
150, 180, 210, 240,
|
91
|
+
270, 300, 330])
|
92
|
+
data = 10 * np.sin(2 * np.pi * time / 50) + 20 * np.sin(2 * np.pi * time / 100)
|
93
|
+
|
94
|
+
# Plot the data
|
95
|
+
plt.figure(figsize=(12, 8))
|
96
|
+
plt.plot(time, data, label='Cyclic Data')
|
97
|
+
plt.legend(loc='upper left')
|
98
|
+
plt.xlabel('Time (days)')
|
99
|
+
plt.ylabel('Value')
|
100
|
+
plt.title('Cyclic Time Series Data')
|
101
|
+
plt.show()
|
102
|
+
|
103
|
+
import numpy as np
|
104
|
+
import matplotlib.pyplot as plt
|
105
|
+
|
106
|
+
# Generate sample time series data
|
107
|
+
np.random.seed(1)
|
108
|
+
time = np.arange(0, 100)
|
109
|
+
#data = 5 * np.sin(2 * np.pi * time / 20) + 2 * time
|
110
|
+
data=np.sin(2 * np.pi * time / 30)+time
|
111
|
+
|
112
|
+
# Introduce irregularities by adding random noise
|
113
|
+
irregularities = np.random.normal(0, 5, len(data))
|
114
|
+
irregular_data = data + irregularities
|
115
|
+
|
116
|
+
# Plot the original data and the data with irregularities
|
117
|
+
plt.figure(figsize=(12, 8))
|
118
|
+
plt.plot(time, data, label='Original Data')
|
119
|
+
plt.plot(time, irregular_data,label='Data with Irregularities')
|
120
|
+
plt.legend(loc='upper left')
|
121
|
+
plt.show()
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: myawesomepkg
|
3
|
+
Version: 0.1.4
|
4
|
+
Summary: A simple greeting library
|
5
|
+
Author: Your Name
|
6
|
+
Author-email: your.email@example.com
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
9
|
+
Classifier: Operating System :: OS Independent
|
10
|
+
Requires-Python: >=3.6
|
11
|
+
Description-Content-Type: text/markdown
|
@@ -0,0 +1,26 @@
|
|
1
|
+
setup.py
|
2
|
+
myawesomepkg/__init__.py
|
3
|
+
myawesomepkg/core.py
|
4
|
+
myawesomepkg.egg-info/PKG-INFO
|
5
|
+
myawesomepkg.egg-info/SOURCES.txt
|
6
|
+
myawesomepkg.egg-info/dependency_links.txt
|
7
|
+
myawesomepkg.egg-info/top_level.txt
|
8
|
+
myawesomepkg/TSAPY1/Practical No 1.py
|
9
|
+
myawesomepkg/TSAPY1/Practical No 2.py
|
10
|
+
myawesomepkg/TSAPY1/Practical No 3.py
|
11
|
+
myawesomepkg/TSAPY1/Practical No 4 A.py
|
12
|
+
myawesomepkg/TSAPY1/Practical No 4 B.py
|
13
|
+
myawesomepkg/TSAPY1/Practical No 5.py
|
14
|
+
myawesomepkg/TSAPY1/Practical No 6.py
|
15
|
+
myawesomepkg/TSAPY1/Practical No 7.py
|
16
|
+
myawesomepkg/TSAPY1/Practical No 8.py
|
17
|
+
myawesomepkg/TSAPY1/__init__.py
|
18
|
+
myawesomepkg/TSAPY1/practical_no_3.py
|
19
|
+
myawesomepkg/TSAPY1/practical_no_4.py
|
20
|
+
myawesomepkg/TSAPY1/practical_no_4b.py
|
21
|
+
myawesomepkg/TSAPY1/practical_no_5_ac_and_pca.py
|
22
|
+
myawesomepkg/TSAPY1/practical_no_6.py
|
23
|
+
myawesomepkg/TSAPY1/practical_no_7.py
|
24
|
+
myawesomepkg/TSAPY1/practical_no_8.py
|
25
|
+
myawesomepkg/TSAPY1/tsa_practical_no_1.py
|
26
|
+
myawesomepkg/TSAPY1/tsa_practical_no_2.py
|
@@ -0,0 +1,18 @@
|
|
1
|
+
from setuptools import setup, find_packages
|
2
|
+
|
3
|
+
setup(
|
4
|
+
name='myawesomepkg', # Your package name
|
5
|
+
version='0.1.4', # Current version
|
6
|
+
author='Your Name', # Replace with your actual name
|
7
|
+
author_email='your.email@example.com', # Optional: add your email
|
8
|
+
description='A simple greeting library',
|
9
|
+
long_description_content_type='text/markdown',
|
10
|
+
packages=find_packages(),
|
11
|
+
install_requires=[], # List dependencies here, e.g., ['numpy']
|
12
|
+
python_requires='>=3.6',
|
13
|
+
classifiers=[ # Optional but good for PyPI listings
|
14
|
+
'Programming Language :: Python :: 3',
|
15
|
+
'License :: OSI Approved :: MIT License', # Change to your license
|
16
|
+
'Operating System :: OS Independent',
|
17
|
+
],
|
18
|
+
)
|
myawesomepkg-0.1.3/PKG-INFO
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
import math
|
2
|
-
n=int(input("Enter no of input neurons:"))
|
3
|
-
|
4
|
-
print("enter input")
|
5
|
-
inputs=[]
|
6
|
-
|
7
|
-
for i in range(0,n):
|
8
|
-
x=float(input())
|
9
|
-
inputs.append(x)
|
10
|
-
print(inputs)
|
11
|
-
|
12
|
-
print("enter weight")
|
13
|
-
weights=[]
|
14
|
-
|
15
|
-
for i in range(0,n):
|
16
|
-
w=float(input())
|
17
|
-
weights.append(w)
|
18
|
-
print(weights)
|
19
|
-
|
20
|
-
print(" the net input is calculated as Yin=x1w1+x2w2+x3w3")
|
21
|
-
|
22
|
-
Yin=[]
|
23
|
-
for i in range(0,n):
|
24
|
-
Yin.append(inputs[i]*weights[i])
|
25
|
-
ynet=round(sum(Yin),3)
|
26
|
-
|
27
|
-
print("net input for y neuron",ynet)
|
28
|
-
|
29
|
-
print("apply activation function over net input, Binary function")
|
30
|
-
|
31
|
-
y=round(1/(1+math.exp(-ynet)),3)
|
32
|
-
print(y)
|
33
|
-
|
34
|
-
print("apply activation function over net input, Bipolar function")
|
35
|
-
y=round((2/(1+math.exp(-ynet)))-1,3)
|
36
|
-
print(y)
|
@@ -1,18 +0,0 @@
|
|
1
|
-
setup.py
|
2
|
-
myawesomepkg/__init__.py
|
3
|
-
myawesomepkg/core.py
|
4
|
-
myawesomepkg/d.py
|
5
|
-
myawesomepkg.egg-info/PKG-INFO
|
6
|
-
myawesomepkg.egg-info/SOURCES.txt
|
7
|
-
myawesomepkg.egg-info/dependency_links.txt
|
8
|
-
myawesomepkg.egg-info/top_level.txt
|
9
|
-
myawesomepkg/TSAPY/Practical No 1.py
|
10
|
-
myawesomepkg/TSAPY/Practical No 2.py
|
11
|
-
myawesomepkg/TSAPY/Practical No 3.py
|
12
|
-
myawesomepkg/TSAPY/Practical No 4 A.py
|
13
|
-
myawesomepkg/TSAPY/Practical No 4 B.py
|
14
|
-
myawesomepkg/TSAPY/Practical No 5.py
|
15
|
-
myawesomepkg/TSAPY/Practical No 6.py
|
16
|
-
myawesomepkg/TSAPY/Practical No 7.py
|
17
|
-
myawesomepkg/TSAPY/Practical No 8.py
|
18
|
-
myawesomepkg/TSAPY/__init__.py
|
myawesomepkg-0.1.3/setup.py
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
from setuptools import setup, find_packages
|
2
|
-
|
3
|
-
setup(
|
4
|
-
name='myawesomepkg', # Change this to your library name
|
5
|
-
version='0.1.3',
|
6
|
-
author='Your Name', # Put your name here
|
7
|
-
description='A simple greeting library',
|
8
|
-
packages=find_packages(),
|
9
|
-
install_requires=[], # Add required libraries here, if any
|
10
|
-
python_requires='>=3.6',
|
11
|
-
)
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 1.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 2.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 3.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 4 A.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 4 B.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 5.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 6.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 7.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/Practical No 8.py
RENAMED
File without changes
|
{myawesomepkg-0.1.3/myawesomepkg/TSAPY → myawesomepkg-0.1.4/myawesomepkg/TSAPY1}/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|