PythonTsa 1.5.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pythontsa-1.5.3/PKG-INFO +25 -0
- pythontsa-1.5.3/README.md +4 -0
- pythontsa-1.5.3/pyproject.toml +3 -0
- pythontsa-1.5.3/setup.cfg +4 -0
- pythontsa-1.5.3/setup.py +24 -0
- pythontsa-1.5.3/src/PythonTsa/CheckStationarynInvertible.py +38 -0
- pythontsa-1.5.3/src/PythonTsa/CointegrationTest.py +533 -0
- pythontsa-1.5.3/src/PythonTsa/LjungBoxtest.py +162 -0
- pythontsa-1.5.3/src/PythonTsa/ModResidDiag.py +149 -0
- pythontsa-1.5.3/src/PythonTsa/MultiCorrPvalue.py +88 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/AntidiabeticDrugSales.csv +204 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/AustraliaEmployedTotalPersons.xlsx +0 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/AustraliaUnemployedTotalPersons.xlsx +0 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/BitcoinPrice17-6-23-18-6-22.xlsx +0 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/DAX.csv +1193 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/DAXlogret.csv +1192 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/EconGermany.dat +93 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/ExchRate NZ per UK.txt +40 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/Global mean surface air temp changes 1880-1985.csv +106 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/GlobalTemperature.txt +150 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/IBM.csv +652 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/Noboyngirl.csv +64 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/RwalkwDrift0.3.csv +250 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/SP500dailyreturns.csv +5030 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/Southtemperature.txt +158 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/USEconomicChange.csv +188 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/USFemalesAged20+Job1948-81.csv +408 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/USQgdpunemp.csv +257 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/USbill.csv +462 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/USmacronInRate.txt +215 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/WTI-Brent.csv +400 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/Yearly mean total sunspot number 1700 - 2017.csv +318 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/areturns.csv +522 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/chaos.csv +500 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/dlGDPukcaus1q1980.csv +126 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/elec-temp.csv +26305 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/gdpquarterlychina1992.1-2017.4.csv +105 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/h02July1991June2008.csv +205 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/ibmlogret.csv +241 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/milk.xlsx +0 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/monthly returns of PG stock 1961 to 2016.csv +672 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/monthly returns of Procter n Gamble stock n 3 market indexes 1961 to 2016.csv +673 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/nao.csv +832 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/realGdpConsInv.csv +204 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/us-q-rgdp.csv +299 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/usFOI.csv +227 -0
- pythontsa-1.5.3/src/PythonTsa/Ptsadata/usGDPnotAdjust.csv +300 -0
- pythontsa-1.5.3/src/PythonTsa/RandomWalk.py +38 -0
- pythontsa-1.5.3/src/PythonTsa/SeasonalRW.py +27 -0
- pythontsa-1.5.3/src/PythonTsa/Selecting_arma.py +77 -0
- pythontsa-1.5.3/src/PythonTsa/Selecting_arma2.py +76 -0
- pythontsa-1.5.3/src/PythonTsa/SimulSBM.py +23 -0
- pythontsa-1.5.3/src/PythonTsa/True_acf.py +36 -0
- pythontsa-1.5.3/src/PythonTsa/TsTensor.py +129 -0
- pythontsa-1.5.3/src/PythonTsa/__init__.py +1 -0
- pythontsa-1.5.3/src/PythonTsa/datadir.py +12 -0
- pythontsa-1.5.3/src/PythonTsa/openPDF.py +10 -0
- pythontsa-1.5.3/src/PythonTsa/plot_acf_pacf.py +40 -0
- pythontsa-1.5.3/src/PythonTsa/plot_multi_ACF.py +98 -0
- pythontsa-1.5.3/src/PythonTsa/plot_multi_Q_pvalue.py +97 -0
- pythontsa-1.5.3/src/PythonTsa.egg-info/PKG-INFO +25 -0
- pythontsa-1.5.3/src/PythonTsa.egg-info/SOURCES.txt +62 -0
- pythontsa-1.5.3/src/PythonTsa.egg-info/dependency_links.txt +1 -0
- pythontsa-1.5.3/src/PythonTsa.egg-info/top_level.txt +1 -0
pythontsa-1.5.3/PKG-INFO
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: PythonTsa
|
|
3
|
+
Version: 1.5.3
|
|
4
|
+
Summary: Package for Applied Time Series Analysis and Forecasting with Python, Springer 2022
|
|
5
|
+
Home-page: https://github.com/QuantLet/pyTSA
|
|
6
|
+
Author: Changquan Huang
|
|
7
|
+
Author-email: h.changquan@icloud.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Dynamic: author
|
|
14
|
+
Dynamic: author-email
|
|
15
|
+
Dynamic: classifier
|
|
16
|
+
Dynamic: description
|
|
17
|
+
Dynamic: description-content-type
|
|
18
|
+
Dynamic: home-page
|
|
19
|
+
Dynamic: requires-python
|
|
20
|
+
Dynamic: summary
|
|
21
|
+
|
|
22
|
+
This package is a companion to the book Applied Time Series Analysis and Forecasting with Python, Springer 2022.
|
|
23
|
+
|
|
24
|
+
It contains several key Python functions for analyzing time series and most data sets analyzed in the book. Naturally, these functions can also be used to analyze other time series data.
|
|
25
|
+
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
This package is a companion to the book Applied Time Series Analysis and Forecasting with Python, Springer 2022.
|
|
2
|
+
|
|
3
|
+
It contains several key Python functions for analyzing time series and most data sets analyzed in the book. Naturally, these functions can also be used to analyze other time series data.
|
|
4
|
+
|
pythontsa-1.5.3/setup.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import setuptools
|
|
2
|
+
|
|
3
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
|
4
|
+
long_description = fh.read()
|
|
5
|
+
|
|
6
|
+
setuptools.setup(
|
|
7
|
+
name ="PythonTsa",
|
|
8
|
+
version ="1.5.3",
|
|
9
|
+
author ="Changquan Huang",
|
|
10
|
+
author_email="h.changquan@icloud.com",
|
|
11
|
+
description ="Package for Applied Time Series Analysis and Forecasting with Python, Springer 2022",
|
|
12
|
+
long_description=long_description,
|
|
13
|
+
long_description_content_type="text/markdown",
|
|
14
|
+
url = "https://github.com/QuantLet/pyTSA",
|
|
15
|
+
classifiers=[
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
],
|
|
20
|
+
package_dir={"": "src"},
|
|
21
|
+
packages=setuptools.find_packages(where="src"),
|
|
22
|
+
python_requires=">=3.6",
|
|
23
|
+
package_data={'PythonTsa': ['Ptsadata/*.csv', 'Ptsadata/*.txt', 'Ptsadata/*.xlsx', 'Ptsadata/*.dat']}
|
|
24
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
def isstationary(armaProcess):
|
|
4
|
+
"""
|
|
5
|
+
Arma process is stationary if AR roots are outside unit circle.
|
|
6
|
+
|
|
7
|
+
Returns
|
|
8
|
+
-------
|
|
9
|
+
bool
|
|
10
|
+
True if AR roots all are outside unit circle.
|
|
11
|
+
"""
|
|
12
|
+
narroots=armaProcess.arroots.shape[0]
|
|
13
|
+
arrts=armaProcess.arroots
|
|
14
|
+
for t in range(narroots):
|
|
15
|
+
arrts[t]=float("%.6f" % abs(arrts[t]))
|
|
16
|
+
if np.all(arrts > 1.0):
|
|
17
|
+
return True
|
|
18
|
+
else:
|
|
19
|
+
return False
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def isinvertible(armaProcess):
|
|
23
|
+
"""
|
|
24
|
+
Arma process is invertible if MA roots are outside unit circle.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
bool
|
|
29
|
+
True if MA roots all are outside unit circle.
|
|
30
|
+
"""
|
|
31
|
+
nmaroots=armaProcess.maroots.shape[0]
|
|
32
|
+
marts=armaProcess.maroots
|
|
33
|
+
for t in range(nmaroots):
|
|
34
|
+
marts[t]=float("%.6f" % abs(marts[t]))
|
|
35
|
+
if np.all(marts > 1.0):
|
|
36
|
+
return True
|
|
37
|
+
else:
|
|
38
|
+
return False
|
|
@@ -0,0 +1,533 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cointegration test for time series
|
|
3
|
+
"""
|
|
4
|
+
from statsmodels.compat.numpy import lstsq
|
|
5
|
+
from statsmodels.compat.pandas import deprecate_kwarg
|
|
6
|
+
from statsmodels.compat.python import lrange, lzip
|
|
7
|
+
from statsmodels.compat.scipy import _next_regular
|
|
8
|
+
|
|
9
|
+
import warnings
|
|
10
|
+
|
|
11
|
+
import numpy as np
|
|
12
|
+
from numpy.linalg import LinAlgError
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from scipy import stats
|
|
15
|
+
#import matplotlib.pyplot as plt
|
|
16
|
+
|
|
17
|
+
from statsmodels.regression.linear_model import OLS, yule_walker
|
|
18
|
+
from statsmodels.tools.sm_exceptions import (
|
|
19
|
+
CollinearityWarning,
|
|
20
|
+
InfeasibleTestError,
|
|
21
|
+
InterpolationWarning,
|
|
22
|
+
MissingDataError,
|
|
23
|
+
)
|
|
24
|
+
from statsmodels.tools.tools import Bunch, add_constant
|
|
25
|
+
from statsmodels.tools.validation import (
|
|
26
|
+
array_like,
|
|
27
|
+
bool_like,
|
|
28
|
+
dict_like,
|
|
29
|
+
float_like,
|
|
30
|
+
int_like,
|
|
31
|
+
string_like,
|
|
32
|
+
)
|
|
33
|
+
#from statsmodels.tsa._bds import bds
|
|
34
|
+
#from statsmodels.tsa._innovations import innovations_algo, innovations_filter
|
|
35
|
+
from statsmodels.tsa.adfvalues import mackinnoncrit, mackinnonp
|
|
36
|
+
#from statsmodels.tsa.arima_model import ARMA
|
|
37
|
+
from statsmodels.tsa.tsatools import add_trend, lagmat, lagmat2ds
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"acovf",
|
|
41
|
+
"acf",
|
|
42
|
+
"pacf",
|
|
43
|
+
"pacf_yw",
|
|
44
|
+
"pacf_ols",
|
|
45
|
+
"ccovf",
|
|
46
|
+
"ccf",
|
|
47
|
+
"q_stat",
|
|
48
|
+
"coint",
|
|
49
|
+
"arma_order_select_ic",
|
|
50
|
+
"adfuller",
|
|
51
|
+
"kpss",
|
|
52
|
+
"bds",
|
|
53
|
+
"pacf_burg",
|
|
54
|
+
"innovations_algo",
|
|
55
|
+
"innovations_filter",
|
|
56
|
+
"levinson_durbin_pacf",
|
|
57
|
+
"levinson_durbin",
|
|
58
|
+
"zivot_andrews",
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
SQRTEPS = np.sqrt(np.finfo(np.double).eps)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _autolag(
|
|
65
|
+
mod,
|
|
66
|
+
endog,
|
|
67
|
+
exog,
|
|
68
|
+
startlag,
|
|
69
|
+
maxlag,
|
|
70
|
+
method,
|
|
71
|
+
modargs=(),
|
|
72
|
+
fitargs=(),
|
|
73
|
+
regresults=False,
|
|
74
|
+
):
|
|
75
|
+
"""
|
|
76
|
+
Returns the results for the lag length that maximizes the info criterion.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
mod : Model class
|
|
81
|
+
Model estimator class
|
|
82
|
+
endog : array_like
|
|
83
|
+
nobs array containing endogenous variable
|
|
84
|
+
exog : array_like
|
|
85
|
+
nobs by (startlag + maxlag) array containing lags and possibly other
|
|
86
|
+
variables
|
|
87
|
+
startlag : int
|
|
88
|
+
The first zero-indexed column to hold a lag. See Notes.
|
|
89
|
+
maxlag : int
|
|
90
|
+
The highest lag order for lag length selection.
|
|
91
|
+
method : {"aic", "bic", "t-stat"}
|
|
92
|
+
aic - Akaike Information Criterion
|
|
93
|
+
bic - Bayes Information Criterion
|
|
94
|
+
t-stat - Based on last lag
|
|
95
|
+
modargs : tuple, optional
|
|
96
|
+
args to pass to model. See notes.
|
|
97
|
+
fitargs : tuple, optional
|
|
98
|
+
args to pass to fit. See notes.
|
|
99
|
+
regresults : bool, optional
|
|
100
|
+
Flag indicating to return optional return results
|
|
101
|
+
|
|
102
|
+
Returns
|
|
103
|
+
-------
|
|
104
|
+
icbest : float
|
|
105
|
+
Best information criteria.
|
|
106
|
+
bestlag : int
|
|
107
|
+
The lag length that maximizes the information criterion.
|
|
108
|
+
results : dict, optional
|
|
109
|
+
Dictionary containing all estimation results
|
|
110
|
+
|
|
111
|
+
Notes
|
|
112
|
+
-----
|
|
113
|
+
Does estimation like mod(endog, exog[:,:i], *modargs).fit(*fitargs)
|
|
114
|
+
where i goes from lagstart to lagstart+maxlag+1. Therefore, lags are
|
|
115
|
+
assumed to be in contiguous columns from low to high lag length with
|
|
116
|
+
the highest lag in the last column.
|
|
117
|
+
"""
|
|
118
|
+
# TODO: can tcol be replaced by maxlag + 2?
|
|
119
|
+
# TODO: This could be changed to laggedRHS and exog keyword arguments if
|
|
120
|
+
# this will be more general.
|
|
121
|
+
|
|
122
|
+
results = {}
|
|
123
|
+
method = method.lower()
|
|
124
|
+
for lag in range(startlag, startlag + maxlag + 1):
|
|
125
|
+
mod_instance = mod(endog, exog[:, :lag], *modargs)
|
|
126
|
+
results[lag] = mod_instance.fit()
|
|
127
|
+
|
|
128
|
+
if method == "aic":
|
|
129
|
+
icbest, bestlag = min((v.aic, k) for k, v in results.items())
|
|
130
|
+
elif method == "bic":
|
|
131
|
+
icbest, bestlag = min((v.bic, k) for k, v in results.items())
|
|
132
|
+
elif method == "t-stat":
|
|
133
|
+
# stop = stats.norm.ppf(.95)
|
|
134
|
+
stop = 1.6448536269514722
|
|
135
|
+
# Default values to ensure that always set
|
|
136
|
+
bestlag = startlag + maxlag
|
|
137
|
+
icbest = 0.0
|
|
138
|
+
for lag in range(startlag + maxlag, startlag - 1, -1):
|
|
139
|
+
icbest = np.abs(results[lag].tvalues[-1])
|
|
140
|
+
bestlag = lag
|
|
141
|
+
if np.abs(icbest) >= stop:
|
|
142
|
+
# Break for first lag with a significant t-stat
|
|
143
|
+
break
|
|
144
|
+
else:
|
|
145
|
+
raise ValueError(f"Information Criterion {method} not understood.")
|
|
146
|
+
|
|
147
|
+
if not regresults:
|
|
148
|
+
return icbest, bestlag
|
|
149
|
+
else:
|
|
150
|
+
return icbest, bestlag, results
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# this needs to be converted to a class like HetGoldfeldQuandt,
|
|
154
|
+
# 3 different returns are a mess
|
|
155
|
+
# See:
|
|
156
|
+
# Ng and Perron(2001), Lag length selection and the construction of unit root
|
|
157
|
+
# tests with good size and power, Econometrica, Vol 69 (6) pp 1519-1554
|
|
158
|
+
# TODO: include drift keyword, only valid with regression == "c"
|
|
159
|
+
# just changes the distribution of the test statistic to a t distribution
|
|
160
|
+
# TODO: autolag is untested
|
|
161
|
+
def adfuller(
|
|
162
|
+
x,
|
|
163
|
+
maxlag=None,
|
|
164
|
+
regression="c",
|
|
165
|
+
autolag="AIC",
|
|
166
|
+
store=False,
|
|
167
|
+
regresults=False,
|
|
168
|
+
):
|
|
169
|
+
"""
|
|
170
|
+
Augmented Dickey-Fuller unit root test.
|
|
171
|
+
|
|
172
|
+
The Augmented Dickey-Fuller test can be used to test for a unit root in a
|
|
173
|
+
univariate process in the presence of serial correlation.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
x : array_like, 1d
|
|
178
|
+
The data series to test.
|
|
179
|
+
maxlag : int
|
|
180
|
+
Maximum lag which is included in test, default 12*(nobs/100)^{1/4}.
|
|
181
|
+
regression : {"c","ct","ctt","nc"}
|
|
182
|
+
Constant and trend order to include in regression.
|
|
183
|
+
|
|
184
|
+
* "c" : constant only (default).
|
|
185
|
+
* "ct" : constant and trend.
|
|
186
|
+
* "ctt" : constant, and linear and quadratic trend.
|
|
187
|
+
* "nc" : no constant, no trend.
|
|
188
|
+
|
|
189
|
+
autolag : {"AIC", "BIC", "t-stat", None}
|
|
190
|
+
Method to use when automatically determining the lag length among the
|
|
191
|
+
values 0, 1, ..., maxlag.
|
|
192
|
+
|
|
193
|
+
* If "AIC" (default) or "BIC", then the number of lags is chosen
|
|
194
|
+
to minimize the corresponding information criterion.
|
|
195
|
+
* "t-stat" based choice of maxlag. Starts with maxlag and drops a
|
|
196
|
+
lag until the t-statistic on the last lag length is significant
|
|
197
|
+
using a 5%-sized test.
|
|
198
|
+
* If None, then the number of included lags is set to maxlag.
|
|
199
|
+
store : bool
|
|
200
|
+
If True, then a result instance is returned additionally to
|
|
201
|
+
the adf statistic. Default is False.
|
|
202
|
+
regresults : bool, optional
|
|
203
|
+
If True, the full regression results are returned. Default is False.
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
adf : float
|
|
208
|
+
The test statistic.
|
|
209
|
+
pvalue : float
|
|
210
|
+
MacKinnon"s approximate p-value based on MacKinnon (1994, 2010).
|
|
211
|
+
usedlag : int
|
|
212
|
+
The number of lags used.
|
|
213
|
+
nobs : int
|
|
214
|
+
The number of observations used for the ADF regression and calculation
|
|
215
|
+
of the critical values.
|
|
216
|
+
critical values : dict
|
|
217
|
+
Critical values for the test statistic at the 1 %, 5 %, and 10 %
|
|
218
|
+
levels. Based on MacKinnon (2010).
|
|
219
|
+
icbest : float
|
|
220
|
+
The maximized information criterion if autolag is not None.
|
|
221
|
+
resstore : ResultStore, optional
|
|
222
|
+
A dummy class with results attached as attributes.
|
|
223
|
+
|
|
224
|
+
Notes
|
|
225
|
+
-----
|
|
226
|
+
The null hypothesis of the Augmented Dickey-Fuller is that there is a unit
|
|
227
|
+
root, with the alternative that there is no unit root. If the pvalue is
|
|
228
|
+
above a critical size, then we cannot reject that there is a unit root.
|
|
229
|
+
|
|
230
|
+
The p-values are obtained through regression surface approximation from
|
|
231
|
+
MacKinnon 1994, but using the updated 2010 tables. If the p-value is close
|
|
232
|
+
to significant, then the critical values should be used to judge whether
|
|
233
|
+
to reject the null.
|
|
234
|
+
|
|
235
|
+
The autolag option and maxlag for it are described in Greene.
|
|
236
|
+
|
|
237
|
+
References
|
|
238
|
+
----------
|
|
239
|
+
.. [1] W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.
|
|
240
|
+
|
|
241
|
+
.. [2] Hamilton, J.D. "Time Series Analysis". Princeton, 1994.
|
|
242
|
+
|
|
243
|
+
.. [3] MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for
|
|
244
|
+
unit-root and cointegration tests. `Journal of Business and Economic
|
|
245
|
+
Statistics` 12, 167-76.
|
|
246
|
+
|
|
247
|
+
.. [4] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests." Queen"s
|
|
248
|
+
University, Dept of Economics, Working Papers. Available at
|
|
249
|
+
http://ideas.repec.org/p/qed/wpaper/1227.html
|
|
250
|
+
|
|
251
|
+
Examples
|
|
252
|
+
--------
|
|
253
|
+
See example notebook
|
|
254
|
+
"""
|
|
255
|
+
x = array_like(x, "x")
|
|
256
|
+
maxlag = int_like(maxlag, "maxlag", optional=True)
|
|
257
|
+
regression = string_like(
|
|
258
|
+
regression, "regression", options=("c", "ct", "ctt", "nc")
|
|
259
|
+
)
|
|
260
|
+
autolag = string_like(
|
|
261
|
+
autolag, "autolag", optional=True, options=("aic", "bic", "t-stat")
|
|
262
|
+
)
|
|
263
|
+
store = bool_like(store, "store")
|
|
264
|
+
regresults = bool_like(regresults, "regresults")
|
|
265
|
+
|
|
266
|
+
if regresults:
|
|
267
|
+
store = True
|
|
268
|
+
|
|
269
|
+
trenddict = {None: "nc", 0: "c", 1: "ct", 2: "ctt"}
|
|
270
|
+
if regression is None or isinstance(regression, int):
|
|
271
|
+
regression = trenddict[regression]
|
|
272
|
+
regression = regression.lower()
|
|
273
|
+
nobs = x.shape[0]
|
|
274
|
+
|
|
275
|
+
ntrend = len(regression) if regression != "nc" else 0
|
|
276
|
+
if maxlag is None:
|
|
277
|
+
# from Greene referencing Schwert 1989
|
|
278
|
+
maxlag = int(np.ceil(12.0 * np.power(nobs / 100.0, 1 / 4.0)))
|
|
279
|
+
# -1 for the diff
|
|
280
|
+
maxlag = min(nobs // 2 - ntrend - 1, maxlag)
|
|
281
|
+
if maxlag < 0:
|
|
282
|
+
raise ValueError(
|
|
283
|
+
"sample size is too short to use selected "
|
|
284
|
+
"regression component"
|
|
285
|
+
)
|
|
286
|
+
elif maxlag > nobs // 2 - ntrend - 1:
|
|
287
|
+
raise ValueError(
|
|
288
|
+
"maxlag must be less than (nobs/2 - 1 - ntrend) "
|
|
289
|
+
"where n trend is the number of included "
|
|
290
|
+
"deterministic regressors"
|
|
291
|
+
)
|
|
292
|
+
xdiff = np.diff(x)
|
|
293
|
+
xdall = lagmat(xdiff[:, None], maxlag, trim="both", original="in")
|
|
294
|
+
nobs = xdall.shape[0]
|
|
295
|
+
|
|
296
|
+
xdall[:, 0] = x[-nobs - 1 : -1] # replace 0 xdiff with level of x
|
|
297
|
+
xdshort = xdiff[-nobs:]
|
|
298
|
+
|
|
299
|
+
if store:
|
|
300
|
+
from statsmodels.stats.diagnostic import ResultsStore
|
|
301
|
+
|
|
302
|
+
resstore = ResultsStore()
|
|
303
|
+
if autolag:
|
|
304
|
+
if regression != "nc":
|
|
305
|
+
fullRHS = add_trend(xdall, regression, prepend=True)
|
|
306
|
+
else:
|
|
307
|
+
fullRHS = xdall
|
|
308
|
+
startlag = fullRHS.shape[1] - xdall.shape[1] + 1
|
|
309
|
+
# 1 for level
|
|
310
|
+
# search for lag length with smallest information criteria
|
|
311
|
+
# Note: use the same number of observations to have comparable IC
|
|
312
|
+
# aic and bic: smaller is better
|
|
313
|
+
|
|
314
|
+
if not regresults:
|
|
315
|
+
icbest, bestlag = _autolag(
|
|
316
|
+
OLS, xdshort, fullRHS, startlag, maxlag, autolag
|
|
317
|
+
)
|
|
318
|
+
else:
|
|
319
|
+
icbest, bestlag, alres = _autolag(
|
|
320
|
+
OLS,
|
|
321
|
+
xdshort,
|
|
322
|
+
fullRHS,
|
|
323
|
+
startlag,
|
|
324
|
+
maxlag,
|
|
325
|
+
autolag,
|
|
326
|
+
regresults=regresults,
|
|
327
|
+
)
|
|
328
|
+
resstore.autolag_results = alres
|
|
329
|
+
|
|
330
|
+
bestlag -= startlag # convert to lag not column index
|
|
331
|
+
|
|
332
|
+
# rerun ols with best autolag
|
|
333
|
+
xdall = lagmat(xdiff[:, None], bestlag, trim="both", original="in")
|
|
334
|
+
nobs = xdall.shape[0]
|
|
335
|
+
xdall[:, 0] = x[-nobs - 1 : -1] # replace 0 xdiff with level of x
|
|
336
|
+
xdshort = xdiff[-nobs:]
|
|
337
|
+
usedlag = bestlag
|
|
338
|
+
else:
|
|
339
|
+
usedlag = maxlag
|
|
340
|
+
icbest = None
|
|
341
|
+
if regression != "nc":
|
|
342
|
+
resols = OLS(
|
|
343
|
+
xdshort, add_trend(xdall[:, : usedlag + 1], regression)
|
|
344
|
+
).fit()
|
|
345
|
+
else:
|
|
346
|
+
resols = OLS(xdshort, xdall[:, : usedlag + 1]).fit()
|
|
347
|
+
|
|
348
|
+
adfstat = resols.tvalues[0]
|
|
349
|
+
# adfstat = (resols.params[0]-1.0)/resols.bse[0]
|
|
350
|
+
# the "asymptotically correct" z statistic is obtained as
|
|
351
|
+
# nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
|
|
352
|
+
# I think this is the statistic that is used for series that are integrated
|
|
353
|
+
# for orders higher than I(1), ie., not ADF but cointegration tests.
|
|
354
|
+
|
|
355
|
+
# Get approx p-value and critical values
|
|
356
|
+
pvalue = mackinnonp(adfstat, regression=regression, N=1)
|
|
357
|
+
critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
|
|
358
|
+
critvalues = {
|
|
359
|
+
"1%": critvalues[0],
|
|
360
|
+
"5%": critvalues[1],
|
|
361
|
+
"10%": critvalues[2],
|
|
362
|
+
}
|
|
363
|
+
if store:
|
|
364
|
+
resstore.resols = resols
|
|
365
|
+
resstore.maxlag = maxlag
|
|
366
|
+
resstore.usedlag = usedlag
|
|
367
|
+
resstore.adfstat = adfstat
|
|
368
|
+
resstore.critvalues = critvalues
|
|
369
|
+
resstore.nobs = nobs
|
|
370
|
+
resstore.H0 = (
|
|
371
|
+
"The coefficient on the lagged level equals 1 - " "unit root"
|
|
372
|
+
)
|
|
373
|
+
resstore.HA = "The coefficient on the lagged level < 1 - stationary"
|
|
374
|
+
resstore.icbest = icbest
|
|
375
|
+
resstore._str = "Augmented Dickey-Fuller Test Results"
|
|
376
|
+
return adfstat, pvalue, critvalues, resstore
|
|
377
|
+
else:
|
|
378
|
+
if not autolag:
|
|
379
|
+
return adfstat, pvalue, usedlag, nobs, critvalues
|
|
380
|
+
else:
|
|
381
|
+
return adfstat, pvalue, usedlag, nobs, critvalues, icbest
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def cointest(
|
|
385
|
+
y0,
|
|
386
|
+
y1,
|
|
387
|
+
trend="c",
|
|
388
|
+
method="aeg",
|
|
389
|
+
maxlag=None,
|
|
390
|
+
autolag="aic",
|
|
391
|
+
return_results=None
|
|
392
|
+
):
|
|
393
|
+
"""
|
|
394
|
+
Test for no-cointegration of a univariate equation.
|
|
395
|
+
|
|
396
|
+
The null hypothesis is no cointegration. Variables in y0 and y1 are
|
|
397
|
+
assumed to be integrated of order 1, I(1).
|
|
398
|
+
|
|
399
|
+
This uses the augmented Engle-Granger two-step cointegration test.
|
|
400
|
+
Constant or trend is included in 1st stage regression, i.e. in
|
|
401
|
+
cointegrating equation.
|
|
402
|
+
|
|
403
|
+
**Warning:** The autolag default has changed compared to statsmodels 0.8.
|
|
404
|
+
In 0.8 autolag was always None, no the keyword is used and defaults to
|
|
405
|
+
"aic". Use `autolag=None` to avoid the lag search.
|
|
406
|
+
|
|
407
|
+
Parameters
|
|
408
|
+
----------
|
|
409
|
+
y0 : array_like
|
|
410
|
+
The first element in cointegrated system. Must be 1-d.
|
|
411
|
+
y1 : array_like
|
|
412
|
+
The remaining elements in cointegrated system.
|
|
413
|
+
trend : str {"c", "ct"}
|
|
414
|
+
The trend term included in regression for cointegrating equation.
|
|
415
|
+
|
|
416
|
+
* "c" : constant.
|
|
417
|
+
* "ct" : constant and linear trend.
|
|
418
|
+
* also available quadratic trend "ctt", and no constant "nc".
|
|
419
|
+
|
|
420
|
+
method : {"aeg"}
|
|
421
|
+
Only "aeg" (augmented Engle-Granger) is available.
|
|
422
|
+
maxlag : None or int
|
|
423
|
+
Argument for `adfuller`, largest or given number of lags.
|
|
424
|
+
autolag : str
|
|
425
|
+
Argument for `adfuller`, lag selection criterion.
|
|
426
|
+
|
|
427
|
+
* If None, then maxlag lags are used without lag search.
|
|
428
|
+
* If "AIC" (default) or "BIC", then the number of lags is chosen
|
|
429
|
+
to minimize the corresponding information criterion.
|
|
430
|
+
* "t-stat" based choice of maxlag. Starts with maxlag and drops a
|
|
431
|
+
lag until the t-statistic on the last lag length is significant
|
|
432
|
+
using a 5%-sized test.
|
|
433
|
+
return_results : bool
|
|
434
|
+
For future compatibility, currently only tuple available.
|
|
435
|
+
If True, then a results instance is returned. Otherwise, a tuple
|
|
436
|
+
with the test outcome is returned. Set `return_results=False` to
|
|
437
|
+
avoid future changes in return.
|
|
438
|
+
fig : bool
|
|
439
|
+
If 'True', plot the regression residuals
|
|
440
|
+
|
|
441
|
+
Returns
|
|
442
|
+
-------
|
|
443
|
+
coint_t : float
|
|
444
|
+
The t-statistic of unit-root test on residuals.
|
|
445
|
+
pvalue : float
|
|
446
|
+
MacKinnon"s approximate, asymptotic p-value based on MacKinnon (1994).
|
|
447
|
+
crit_value : dict
|
|
448
|
+
Critical values for the test statistic at the 1 %, 5 %, and 10 %
|
|
449
|
+
levels based on regression curve. This depends on the number of
|
|
450
|
+
observations.
|
|
451
|
+
|
|
452
|
+
Notes
|
|
453
|
+
-----
|
|
454
|
+
The Null hypothesis is that there is no cointegration, the alternative
|
|
455
|
+
hypothesis is that there is cointegrating relationship. If the pvalue is
|
|
456
|
+
small, below a critical size, then we can reject the hypothesis that there
|
|
457
|
+
is no cointegrating relationship.
|
|
458
|
+
|
|
459
|
+
P-values and critical values are obtained through regression surface
|
|
460
|
+
approximation from MacKinnon 1994 and 2010.
|
|
461
|
+
|
|
462
|
+
If the two series are almost perfectly collinear, then computing the
|
|
463
|
+
test is numerically unstable. However, the two series will be cointegrated
|
|
464
|
+
under the maintained assumption that they are integrated. In this case
|
|
465
|
+
the t-statistic will be set to -inf and the pvalue to zero.
|
|
466
|
+
|
|
467
|
+
TODO: We could handle gaps in data by dropping rows with nans in the
|
|
468
|
+
Auxiliary regressions. Not implemented yet, currently assumes no nans
|
|
469
|
+
and no gaps in time series.
|
|
470
|
+
|
|
471
|
+
References
|
|
472
|
+
----------
|
|
473
|
+
.. [1] MacKinnon, J.G. 1994 "Approximate Asymptotic Distribution Functions
|
|
474
|
+
for Unit-Root and Cointegration Tests." Journal of Business & Economics
|
|
475
|
+
Statistics, 12.2, 167-76.
|
|
476
|
+
.. [2] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."
|
|
477
|
+
Queen"s University, Dept of Economics Working Papers 1227.
|
|
478
|
+
http://ideas.repec.org/p/qed/wpaper/1227.html
|
|
479
|
+
"""
|
|
480
|
+
y0 = array_like(y0, "y0")
|
|
481
|
+
y1 = array_like(y1, "y1", ndim=2)
|
|
482
|
+
trend = string_like(trend, "trend", options=("c", "nc", "ct", "ctt"))
|
|
483
|
+
method = string_like(method, "method", options=("aeg",))
|
|
484
|
+
maxlag = int_like(maxlag, "maxlag", optional=True)
|
|
485
|
+
autolag = string_like(
|
|
486
|
+
autolag, "autolag", optional=True, options=("aic", "bic", "t-stat")
|
|
487
|
+
)
|
|
488
|
+
return_results = bool_like(return_results, "return_results", optional=True)
|
|
489
|
+
|
|
490
|
+
nobs, k_vars = y1.shape
|
|
491
|
+
k_vars += 1 # add 1 for y0
|
|
492
|
+
|
|
493
|
+
if trend == "nc":
|
|
494
|
+
xx = y1
|
|
495
|
+
else:
|
|
496
|
+
xx = add_trend(y1, trend=trend, prepend=False)
|
|
497
|
+
|
|
498
|
+
res_co = OLS(y0, xx).fit()
|
|
499
|
+
|
|
500
|
+
if res_co.rsquared < 1 - 100 * SQRTEPS:
|
|
501
|
+
res_adf = adfuller(
|
|
502
|
+
res_co.resid, maxlag=maxlag, autolag=autolag, regression="nc"
|
|
503
|
+
)
|
|
504
|
+
else:
|
|
505
|
+
warnings.warn(
|
|
506
|
+
"y0 and y1 are (almost) perfectly colinear."
|
|
507
|
+
"Cointegration test is not reliable in this case.",
|
|
508
|
+
CollinearityWarning,
|
|
509
|
+
)
|
|
510
|
+
# Edge case where series are too similar
|
|
511
|
+
res_adf = (-np.inf,)
|
|
512
|
+
|
|
513
|
+
# no constant or trend, see egranger in Stata and MacKinnon
|
|
514
|
+
if trend == "nc":
|
|
515
|
+
crit = [np.nan] * 3 # 2010 critical values not available
|
|
516
|
+
else:
|
|
517
|
+
crit = mackinnoncrit(N=k_vars, regression=trend, nobs=nobs - 1)
|
|
518
|
+
# nobs - 1, the -1 is to match egranger in Stata, I do not know why.
|
|
519
|
+
# TODO: check nobs or df = nobs - k
|
|
520
|
+
|
|
521
|
+
pval_asy = mackinnonp(res_adf[0], regression=trend, N=k_vars)
|
|
522
|
+
|
|
523
|
+
print ('Results of Augmented Engle-Granger Two-step Cointegration Test:')
|
|
524
|
+
print('Test Statistic ', res_adf[0])
|
|
525
|
+
print('P-value ', pval_asy)
|
|
526
|
+
a1='%6f'%crit[0]
|
|
527
|
+
b1='%6f'%crit[1]
|
|
528
|
+
c1='%6f'%crit[2]
|
|
529
|
+
print('Critical values: ', a1, '(1%) ' , b1, '(5%) ', c1, '(10%)')
|
|
530
|
+
residts=pd.Series(res_co.resid)
|
|
531
|
+
return residts
|
|
532
|
+
|
|
533
|
+
|