PythonTsa 1.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. pythontsa-1.5.3/PKG-INFO +25 -0
  2. pythontsa-1.5.3/README.md +4 -0
  3. pythontsa-1.5.3/pyproject.toml +3 -0
  4. pythontsa-1.5.3/setup.cfg +4 -0
  5. pythontsa-1.5.3/setup.py +24 -0
  6. pythontsa-1.5.3/src/PythonTsa/CheckStationarynInvertible.py +38 -0
  7. pythontsa-1.5.3/src/PythonTsa/CointegrationTest.py +533 -0
  8. pythontsa-1.5.3/src/PythonTsa/LjungBoxtest.py +162 -0
  9. pythontsa-1.5.3/src/PythonTsa/ModResidDiag.py +149 -0
  10. pythontsa-1.5.3/src/PythonTsa/MultiCorrPvalue.py +88 -0
  11. pythontsa-1.5.3/src/PythonTsa/Ptsadata/AntidiabeticDrugSales.csv +204 -0
  12. pythontsa-1.5.3/src/PythonTsa/Ptsadata/AustraliaEmployedTotalPersons.xlsx +0 -0
  13. pythontsa-1.5.3/src/PythonTsa/Ptsadata/AustraliaUnemployedTotalPersons.xlsx +0 -0
  14. pythontsa-1.5.3/src/PythonTsa/Ptsadata/BitcoinPrice17-6-23-18-6-22.xlsx +0 -0
  15. pythontsa-1.5.3/src/PythonTsa/Ptsadata/DAX.csv +1193 -0
  16. pythontsa-1.5.3/src/PythonTsa/Ptsadata/DAXlogret.csv +1192 -0
  17. pythontsa-1.5.3/src/PythonTsa/Ptsadata/EconGermany.dat +93 -0
  18. pythontsa-1.5.3/src/PythonTsa/Ptsadata/ExchRate NZ per UK.txt +40 -0
  19. pythontsa-1.5.3/src/PythonTsa/Ptsadata/Global mean surface air temp changes 1880-1985.csv +106 -0
  20. pythontsa-1.5.3/src/PythonTsa/Ptsadata/GlobalTemperature.txt +150 -0
  21. pythontsa-1.5.3/src/PythonTsa/Ptsadata/IBM.csv +652 -0
  22. pythontsa-1.5.3/src/PythonTsa/Ptsadata/Noboyngirl.csv +64 -0
  23. pythontsa-1.5.3/src/PythonTsa/Ptsadata/RwalkwDrift0.3.csv +250 -0
  24. pythontsa-1.5.3/src/PythonTsa/Ptsadata/SP500dailyreturns.csv +5030 -0
  25. pythontsa-1.5.3/src/PythonTsa/Ptsadata/Southtemperature.txt +158 -0
  26. pythontsa-1.5.3/src/PythonTsa/Ptsadata/USEconomicChange.csv +188 -0
  27. pythontsa-1.5.3/src/PythonTsa/Ptsadata/USFemalesAged20+Job1948-81.csv +408 -0
  28. pythontsa-1.5.3/src/PythonTsa/Ptsadata/USQgdpunemp.csv +257 -0
  29. pythontsa-1.5.3/src/PythonTsa/Ptsadata/USbill.csv +462 -0
  30. pythontsa-1.5.3/src/PythonTsa/Ptsadata/USmacronInRate.txt +215 -0
  31. pythontsa-1.5.3/src/PythonTsa/Ptsadata/WTI-Brent.csv +400 -0
  32. pythontsa-1.5.3/src/PythonTsa/Ptsadata/Yearly mean total sunspot number 1700 - 2017.csv +318 -0
  33. pythontsa-1.5.3/src/PythonTsa/Ptsadata/areturns.csv +522 -0
  34. pythontsa-1.5.3/src/PythonTsa/Ptsadata/chaos.csv +500 -0
  35. pythontsa-1.5.3/src/PythonTsa/Ptsadata/dlGDPukcaus1q1980.csv +126 -0
  36. pythontsa-1.5.3/src/PythonTsa/Ptsadata/elec-temp.csv +26305 -0
  37. pythontsa-1.5.3/src/PythonTsa/Ptsadata/gdpquarterlychina1992.1-2017.4.csv +105 -0
  38. pythontsa-1.5.3/src/PythonTsa/Ptsadata/h02July1991June2008.csv +205 -0
  39. pythontsa-1.5.3/src/PythonTsa/Ptsadata/ibmlogret.csv +241 -0
  40. pythontsa-1.5.3/src/PythonTsa/Ptsadata/milk.xlsx +0 -0
  41. pythontsa-1.5.3/src/PythonTsa/Ptsadata/monthly returns of PG stock 1961 to 2016.csv +672 -0
  42. pythontsa-1.5.3/src/PythonTsa/Ptsadata/monthly returns of Procter n Gamble stock n 3 market indexes 1961 to 2016.csv +673 -0
  43. pythontsa-1.5.3/src/PythonTsa/Ptsadata/nao.csv +832 -0
  44. pythontsa-1.5.3/src/PythonTsa/Ptsadata/realGdpConsInv.csv +204 -0
  45. pythontsa-1.5.3/src/PythonTsa/Ptsadata/us-q-rgdp.csv +299 -0
  46. pythontsa-1.5.3/src/PythonTsa/Ptsadata/usFOI.csv +227 -0
  47. pythontsa-1.5.3/src/PythonTsa/Ptsadata/usGDPnotAdjust.csv +300 -0
  48. pythontsa-1.5.3/src/PythonTsa/RandomWalk.py +38 -0
  49. pythontsa-1.5.3/src/PythonTsa/SeasonalRW.py +27 -0
  50. pythontsa-1.5.3/src/PythonTsa/Selecting_arma.py +77 -0
  51. pythontsa-1.5.3/src/PythonTsa/Selecting_arma2.py +76 -0
  52. pythontsa-1.5.3/src/PythonTsa/SimulSBM.py +23 -0
  53. pythontsa-1.5.3/src/PythonTsa/True_acf.py +36 -0
  54. pythontsa-1.5.3/src/PythonTsa/TsTensor.py +129 -0
  55. pythontsa-1.5.3/src/PythonTsa/__init__.py +1 -0
  56. pythontsa-1.5.3/src/PythonTsa/datadir.py +12 -0
  57. pythontsa-1.5.3/src/PythonTsa/openPDF.py +10 -0
  58. pythontsa-1.5.3/src/PythonTsa/plot_acf_pacf.py +40 -0
  59. pythontsa-1.5.3/src/PythonTsa/plot_multi_ACF.py +98 -0
  60. pythontsa-1.5.3/src/PythonTsa/plot_multi_Q_pvalue.py +97 -0
  61. pythontsa-1.5.3/src/PythonTsa.egg-info/PKG-INFO +25 -0
  62. pythontsa-1.5.3/src/PythonTsa.egg-info/SOURCES.txt +62 -0
  63. pythontsa-1.5.3/src/PythonTsa.egg-info/dependency_links.txt +1 -0
  64. pythontsa-1.5.3/src/PythonTsa.egg-info/top_level.txt +1 -0
@@ -0,0 +1,25 @@
1
+ Metadata-Version: 2.4
2
+ Name: PythonTsa
3
+ Version: 1.5.3
4
+ Summary: Package for Applied Time Series Analysis and Forecasting with Python, Springer 2022
5
+ Home-page: https://github.com/QuantLet/pyTSA
6
+ Author: Changquan Huang
7
+ Author-email: h.changquan@icloud.com
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.6
12
+ Description-Content-Type: text/markdown
13
+ Dynamic: author
14
+ Dynamic: author-email
15
+ Dynamic: classifier
16
+ Dynamic: description
17
+ Dynamic: description-content-type
18
+ Dynamic: home-page
19
+ Dynamic: requires-python
20
+ Dynamic: summary
21
+
22
+ This package is a companion to the book Applied Time Series Analysis and Forecasting with Python, Springer 2022.
23
+
24
+ It contains several key Python functions for analyzing time series and most data sets analyzed in the book. Naturally, these functions can also be used to analyze other time series data.
25
+
@@ -0,0 +1,4 @@
1
+ This package is a companion to the book Applied Time Series Analysis and Forecasting with Python, Springer 2022.
2
+
3
+ It contains several key Python functions for analyzing time series and most data sets analyzed in the book. Naturally, these functions can also be used to analyze other time series data.
4
+
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = [ "setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,24 @@
1
+ import setuptools
2
+
3
+ with open("README.md", "r", encoding="utf-8") as fh:
4
+ long_description = fh.read()
5
+
6
+ setuptools.setup(
7
+ name ="PythonTsa",
8
+ version ="1.5.3",
9
+ author ="Changquan Huang",
10
+ author_email="h.changquan@icloud.com",
11
+ description ="Package for Applied Time Series Analysis and Forecasting with Python, Springer 2022",
12
+ long_description=long_description,
13
+ long_description_content_type="text/markdown",
14
+ url = "https://github.com/QuantLet/pyTSA",
15
+ classifiers=[
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ ],
20
+ package_dir={"": "src"},
21
+ packages=setuptools.find_packages(where="src"),
22
+ python_requires=">=3.6",
23
+ package_data={'PythonTsa': ['Ptsadata/*.csv', 'Ptsadata/*.txt', 'Ptsadata/*.xlsx', 'Ptsadata/*.dat']}
24
+ )
@@ -0,0 +1,38 @@
1
+ import numpy as np
2
+
3
+ def isstationary(armaProcess):
4
+ """
5
+ Arma process is stationary if AR roots are outside unit circle.
6
+
7
+ Returns
8
+ -------
9
+ bool
10
+ True if AR roots all are outside unit circle.
11
+ """
12
+ narroots=armaProcess.arroots.shape[0]
13
+ arrts=armaProcess.arroots
14
+ for t in range(narroots):
15
+ arrts[t]=float("%.6f" % abs(arrts[t]))
16
+ if np.all(arrts > 1.0):
17
+ return True
18
+ else:
19
+ return False
20
+
21
+
22
+ def isinvertible(armaProcess):
23
+ """
24
+ Arma process is invertible if MA roots are outside unit circle.
25
+
26
+ Returns
27
+ -------
28
+ bool
29
+ True if MA roots all are outside unit circle.
30
+ """
31
+ nmaroots=armaProcess.maroots.shape[0]
32
+ marts=armaProcess.maroots
33
+ for t in range(nmaroots):
34
+ marts[t]=float("%.6f" % abs(marts[t]))
35
+ if np.all(marts > 1.0):
36
+ return True
37
+ else:
38
+ return False
@@ -0,0 +1,533 @@
1
+ """
2
+ Cointegration test for time series
3
+ """
4
+ from statsmodels.compat.numpy import lstsq
5
+ from statsmodels.compat.pandas import deprecate_kwarg
6
+ from statsmodels.compat.python import lrange, lzip
7
+ from statsmodels.compat.scipy import _next_regular
8
+
9
+ import warnings
10
+
11
+ import numpy as np
12
+ from numpy.linalg import LinAlgError
13
+ import pandas as pd
14
+ from scipy import stats
15
+ #import matplotlib.pyplot as plt
16
+
17
+ from statsmodels.regression.linear_model import OLS, yule_walker
18
+ from statsmodels.tools.sm_exceptions import (
19
+ CollinearityWarning,
20
+ InfeasibleTestError,
21
+ InterpolationWarning,
22
+ MissingDataError,
23
+ )
24
+ from statsmodels.tools.tools import Bunch, add_constant
25
+ from statsmodels.tools.validation import (
26
+ array_like,
27
+ bool_like,
28
+ dict_like,
29
+ float_like,
30
+ int_like,
31
+ string_like,
32
+ )
33
+ #from statsmodels.tsa._bds import bds
34
+ #from statsmodels.tsa._innovations import innovations_algo, innovations_filter
35
+ from statsmodels.tsa.adfvalues import mackinnoncrit, mackinnonp
36
+ #from statsmodels.tsa.arima_model import ARMA
37
+ from statsmodels.tsa.tsatools import add_trend, lagmat, lagmat2ds
38
+
39
+ __all__ = [
40
+ "acovf",
41
+ "acf",
42
+ "pacf",
43
+ "pacf_yw",
44
+ "pacf_ols",
45
+ "ccovf",
46
+ "ccf",
47
+ "q_stat",
48
+ "coint",
49
+ "arma_order_select_ic",
50
+ "adfuller",
51
+ "kpss",
52
+ "bds",
53
+ "pacf_burg",
54
+ "innovations_algo",
55
+ "innovations_filter",
56
+ "levinson_durbin_pacf",
57
+ "levinson_durbin",
58
+ "zivot_andrews",
59
+ ]
60
+
61
+ SQRTEPS = np.sqrt(np.finfo(np.double).eps)
62
+
63
+
64
+ def _autolag(
65
+ mod,
66
+ endog,
67
+ exog,
68
+ startlag,
69
+ maxlag,
70
+ method,
71
+ modargs=(),
72
+ fitargs=(),
73
+ regresults=False,
74
+ ):
75
+ """
76
+ Returns the results for the lag length that maximizes the info criterion.
77
+
78
+ Parameters
79
+ ----------
80
+ mod : Model class
81
+ Model estimator class
82
+ endog : array_like
83
+ nobs array containing endogenous variable
84
+ exog : array_like
85
+ nobs by (startlag + maxlag) array containing lags and possibly other
86
+ variables
87
+ startlag : int
88
+ The first zero-indexed column to hold a lag. See Notes.
89
+ maxlag : int
90
+ The highest lag order for lag length selection.
91
+ method : {"aic", "bic", "t-stat"}
92
+ aic - Akaike Information Criterion
93
+ bic - Bayes Information Criterion
94
+ t-stat - Based on last lag
95
+ modargs : tuple, optional
96
+ args to pass to model. See notes.
97
+ fitargs : tuple, optional
98
+ args to pass to fit. See notes.
99
+ regresults : bool, optional
100
+ Flag indicating to return optional return results
101
+
102
+ Returns
103
+ -------
104
+ icbest : float
105
+ Best information criteria.
106
+ bestlag : int
107
+ The lag length that maximizes the information criterion.
108
+ results : dict, optional
109
+ Dictionary containing all estimation results
110
+
111
+ Notes
112
+ -----
113
+ Does estimation like mod(endog, exog[:,:i], *modargs).fit(*fitargs)
114
+ where i goes from lagstart to lagstart+maxlag+1. Therefore, lags are
115
+ assumed to be in contiguous columns from low to high lag length with
116
+ the highest lag in the last column.
117
+ """
118
+ # TODO: can tcol be replaced by maxlag + 2?
119
+ # TODO: This could be changed to laggedRHS and exog keyword arguments if
120
+ # this will be more general.
121
+
122
+ results = {}
123
+ method = method.lower()
124
+ for lag in range(startlag, startlag + maxlag + 1):
125
+ mod_instance = mod(endog, exog[:, :lag], *modargs)
126
+ results[lag] = mod_instance.fit()
127
+
128
+ if method == "aic":
129
+ icbest, bestlag = min((v.aic, k) for k, v in results.items())
130
+ elif method == "bic":
131
+ icbest, bestlag = min((v.bic, k) for k, v in results.items())
132
+ elif method == "t-stat":
133
+ # stop = stats.norm.ppf(.95)
134
+ stop = 1.6448536269514722
135
+ # Default values to ensure that always set
136
+ bestlag = startlag + maxlag
137
+ icbest = 0.0
138
+ for lag in range(startlag + maxlag, startlag - 1, -1):
139
+ icbest = np.abs(results[lag].tvalues[-1])
140
+ bestlag = lag
141
+ if np.abs(icbest) >= stop:
142
+ # Break for first lag with a significant t-stat
143
+ break
144
+ else:
145
+ raise ValueError(f"Information Criterion {method} not understood.")
146
+
147
+ if not regresults:
148
+ return icbest, bestlag
149
+ else:
150
+ return icbest, bestlag, results
151
+
152
+
153
+ # this needs to be converted to a class like HetGoldfeldQuandt,
154
+ # 3 different returns are a mess
155
+ # See:
156
+ # Ng and Perron(2001), Lag length selection and the construction of unit root
157
+ # tests with good size and power, Econometrica, Vol 69 (6) pp 1519-1554
158
+ # TODO: include drift keyword, only valid with regression == "c"
159
+ # just changes the distribution of the test statistic to a t distribution
160
+ # TODO: autolag is untested
161
+ def adfuller(
162
+ x,
163
+ maxlag=None,
164
+ regression="c",
165
+ autolag="AIC",
166
+ store=False,
167
+ regresults=False,
168
+ ):
169
+ """
170
+ Augmented Dickey-Fuller unit root test.
171
+
172
+ The Augmented Dickey-Fuller test can be used to test for a unit root in a
173
+ univariate process in the presence of serial correlation.
174
+
175
+ Parameters
176
+ ----------
177
+ x : array_like, 1d
178
+ The data series to test.
179
+ maxlag : int
180
+ Maximum lag which is included in test, default 12*(nobs/100)^{1/4}.
181
+ regression : {"c","ct","ctt","nc"}
182
+ Constant and trend order to include in regression.
183
+
184
+ * "c" : constant only (default).
185
+ * "ct" : constant and trend.
186
+ * "ctt" : constant, and linear and quadratic trend.
187
+ * "nc" : no constant, no trend.
188
+
189
+ autolag : {"AIC", "BIC", "t-stat", None}
190
+ Method to use when automatically determining the lag length among the
191
+ values 0, 1, ..., maxlag.
192
+
193
+ * If "AIC" (default) or "BIC", then the number of lags is chosen
194
+ to minimize the corresponding information criterion.
195
+ * "t-stat" based choice of maxlag. Starts with maxlag and drops a
196
+ lag until the t-statistic on the last lag length is significant
197
+ using a 5%-sized test.
198
+ * If None, then the number of included lags is set to maxlag.
199
+ store : bool
200
+ If True, then a result instance is returned additionally to
201
+ the adf statistic. Default is False.
202
+ regresults : bool, optional
203
+ If True, the full regression results are returned. Default is False.
204
+
205
+ Returns
206
+ -------
207
+ adf : float
208
+ The test statistic.
209
+ pvalue : float
210
+ MacKinnon"s approximate p-value based on MacKinnon (1994, 2010).
211
+ usedlag : int
212
+ The number of lags used.
213
+ nobs : int
214
+ The number of observations used for the ADF regression and calculation
215
+ of the critical values.
216
+ critical values : dict
217
+ Critical values for the test statistic at the 1 %, 5 %, and 10 %
218
+ levels. Based on MacKinnon (2010).
219
+ icbest : float
220
+ The maximized information criterion if autolag is not None.
221
+ resstore : ResultStore, optional
222
+ A dummy class with results attached as attributes.
223
+
224
+ Notes
225
+ -----
226
+ The null hypothesis of the Augmented Dickey-Fuller is that there is a unit
227
+ root, with the alternative that there is no unit root. If the pvalue is
228
+ above a critical size, then we cannot reject that there is a unit root.
229
+
230
+ The p-values are obtained through regression surface approximation from
231
+ MacKinnon 1994, but using the updated 2010 tables. If the p-value is close
232
+ to significant, then the critical values should be used to judge whether
233
+ to reject the null.
234
+
235
+ The autolag option and maxlag for it are described in Greene.
236
+
237
+ References
238
+ ----------
239
+ .. [1] W. Green. "Econometric Analysis," 5th ed., Pearson, 2003.
240
+
241
+ .. [2] Hamilton, J.D. "Time Series Analysis". Princeton, 1994.
242
+
243
+ .. [3] MacKinnon, J.G. 1994. "Approximate asymptotic distribution functions for
244
+ unit-root and cointegration tests. `Journal of Business and Economic
245
+ Statistics` 12, 167-76.
246
+
247
+ .. [4] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests." Queen"s
248
+ University, Dept of Economics, Working Papers. Available at
249
+ http://ideas.repec.org/p/qed/wpaper/1227.html
250
+
251
+ Examples
252
+ --------
253
+ See example notebook
254
+ """
255
+ x = array_like(x, "x")
256
+ maxlag = int_like(maxlag, "maxlag", optional=True)
257
+ regression = string_like(
258
+ regression, "regression", options=("c", "ct", "ctt", "nc")
259
+ )
260
+ autolag = string_like(
261
+ autolag, "autolag", optional=True, options=("aic", "bic", "t-stat")
262
+ )
263
+ store = bool_like(store, "store")
264
+ regresults = bool_like(regresults, "regresults")
265
+
266
+ if regresults:
267
+ store = True
268
+
269
+ trenddict = {None: "nc", 0: "c", 1: "ct", 2: "ctt"}
270
+ if regression is None or isinstance(regression, int):
271
+ regression = trenddict[regression]
272
+ regression = regression.lower()
273
+ nobs = x.shape[0]
274
+
275
+ ntrend = len(regression) if regression != "nc" else 0
276
+ if maxlag is None:
277
+ # from Greene referencing Schwert 1989
278
+ maxlag = int(np.ceil(12.0 * np.power(nobs / 100.0, 1 / 4.0)))
279
+ # -1 for the diff
280
+ maxlag = min(nobs // 2 - ntrend - 1, maxlag)
281
+ if maxlag < 0:
282
+ raise ValueError(
283
+ "sample size is too short to use selected "
284
+ "regression component"
285
+ )
286
+ elif maxlag > nobs // 2 - ntrend - 1:
287
+ raise ValueError(
288
+ "maxlag must be less than (nobs/2 - 1 - ntrend) "
289
+ "where n trend is the number of included "
290
+ "deterministic regressors"
291
+ )
292
+ xdiff = np.diff(x)
293
+ xdall = lagmat(xdiff[:, None], maxlag, trim="both", original="in")
294
+ nobs = xdall.shape[0]
295
+
296
+ xdall[:, 0] = x[-nobs - 1 : -1] # replace 0 xdiff with level of x
297
+ xdshort = xdiff[-nobs:]
298
+
299
+ if store:
300
+ from statsmodels.stats.diagnostic import ResultsStore
301
+
302
+ resstore = ResultsStore()
303
+ if autolag:
304
+ if regression != "nc":
305
+ fullRHS = add_trend(xdall, regression, prepend=True)
306
+ else:
307
+ fullRHS = xdall
308
+ startlag = fullRHS.shape[1] - xdall.shape[1] + 1
309
+ # 1 for level
310
+ # search for lag length with smallest information criteria
311
+ # Note: use the same number of observations to have comparable IC
312
+ # aic and bic: smaller is better
313
+
314
+ if not regresults:
315
+ icbest, bestlag = _autolag(
316
+ OLS, xdshort, fullRHS, startlag, maxlag, autolag
317
+ )
318
+ else:
319
+ icbest, bestlag, alres = _autolag(
320
+ OLS,
321
+ xdshort,
322
+ fullRHS,
323
+ startlag,
324
+ maxlag,
325
+ autolag,
326
+ regresults=regresults,
327
+ )
328
+ resstore.autolag_results = alres
329
+
330
+ bestlag -= startlag # convert to lag not column index
331
+
332
+ # rerun ols with best autolag
333
+ xdall = lagmat(xdiff[:, None], bestlag, trim="both", original="in")
334
+ nobs = xdall.shape[0]
335
+ xdall[:, 0] = x[-nobs - 1 : -1] # replace 0 xdiff with level of x
336
+ xdshort = xdiff[-nobs:]
337
+ usedlag = bestlag
338
+ else:
339
+ usedlag = maxlag
340
+ icbest = None
341
+ if regression != "nc":
342
+ resols = OLS(
343
+ xdshort, add_trend(xdall[:, : usedlag + 1], regression)
344
+ ).fit()
345
+ else:
346
+ resols = OLS(xdshort, xdall[:, : usedlag + 1]).fit()
347
+
348
+ adfstat = resols.tvalues[0]
349
+ # adfstat = (resols.params[0]-1.0)/resols.bse[0]
350
+ # the "asymptotically correct" z statistic is obtained as
351
+ # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
352
+ # I think this is the statistic that is used for series that are integrated
353
+ # for orders higher than I(1), ie., not ADF but cointegration tests.
354
+
355
+ # Get approx p-value and critical values
356
+ pvalue = mackinnonp(adfstat, regression=regression, N=1)
357
+ critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
358
+ critvalues = {
359
+ "1%": critvalues[0],
360
+ "5%": critvalues[1],
361
+ "10%": critvalues[2],
362
+ }
363
+ if store:
364
+ resstore.resols = resols
365
+ resstore.maxlag = maxlag
366
+ resstore.usedlag = usedlag
367
+ resstore.adfstat = adfstat
368
+ resstore.critvalues = critvalues
369
+ resstore.nobs = nobs
370
+ resstore.H0 = (
371
+ "The coefficient on the lagged level equals 1 - " "unit root"
372
+ )
373
+ resstore.HA = "The coefficient on the lagged level < 1 - stationary"
374
+ resstore.icbest = icbest
375
+ resstore._str = "Augmented Dickey-Fuller Test Results"
376
+ return adfstat, pvalue, critvalues, resstore
377
+ else:
378
+ if not autolag:
379
+ return adfstat, pvalue, usedlag, nobs, critvalues
380
+ else:
381
+ return adfstat, pvalue, usedlag, nobs, critvalues, icbest
382
+
383
+
384
+ def cointest(
385
+ y0,
386
+ y1,
387
+ trend="c",
388
+ method="aeg",
389
+ maxlag=None,
390
+ autolag="aic",
391
+ return_results=None
392
+ ):
393
+ """
394
+ Test for no-cointegration of a univariate equation.
395
+
396
+ The null hypothesis is no cointegration. Variables in y0 and y1 are
397
+ assumed to be integrated of order 1, I(1).
398
+
399
+ This uses the augmented Engle-Granger two-step cointegration test.
400
+ Constant or trend is included in 1st stage regression, i.e. in
401
+ cointegrating equation.
402
+
403
+ **Warning:** The autolag default has changed compared to statsmodels 0.8.
404
+ In 0.8 autolag was always None, no the keyword is used and defaults to
405
+ "aic". Use `autolag=None` to avoid the lag search.
406
+
407
+ Parameters
408
+ ----------
409
+ y0 : array_like
410
+ The first element in cointegrated system. Must be 1-d.
411
+ y1 : array_like
412
+ The remaining elements in cointegrated system.
413
+ trend : str {"c", "ct"}
414
+ The trend term included in regression for cointegrating equation.
415
+
416
+ * "c" : constant.
417
+ * "ct" : constant and linear trend.
418
+ * also available quadratic trend "ctt", and no constant "nc".
419
+
420
+ method : {"aeg"}
421
+ Only "aeg" (augmented Engle-Granger) is available.
422
+ maxlag : None or int
423
+ Argument for `adfuller`, largest or given number of lags.
424
+ autolag : str
425
+ Argument for `adfuller`, lag selection criterion.
426
+
427
+ * If None, then maxlag lags are used without lag search.
428
+ * If "AIC" (default) or "BIC", then the number of lags is chosen
429
+ to minimize the corresponding information criterion.
430
+ * "t-stat" based choice of maxlag. Starts with maxlag and drops a
431
+ lag until the t-statistic on the last lag length is significant
432
+ using a 5%-sized test.
433
+ return_results : bool
434
+ For future compatibility, currently only tuple available.
435
+ If True, then a results instance is returned. Otherwise, a tuple
436
+ with the test outcome is returned. Set `return_results=False` to
437
+ avoid future changes in return.
438
+ fig : bool
439
+ If 'True', plot the regression residuals
440
+
441
+ Returns
442
+ -------
443
+ coint_t : float
444
+ The t-statistic of unit-root test on residuals.
445
+ pvalue : float
446
+ MacKinnon"s approximate, asymptotic p-value based on MacKinnon (1994).
447
+ crit_value : dict
448
+ Critical values for the test statistic at the 1 %, 5 %, and 10 %
449
+ levels based on regression curve. This depends on the number of
450
+ observations.
451
+
452
+ Notes
453
+ -----
454
+ The Null hypothesis is that there is no cointegration, the alternative
455
+ hypothesis is that there is cointegrating relationship. If the pvalue is
456
+ small, below a critical size, then we can reject the hypothesis that there
457
+ is no cointegrating relationship.
458
+
459
+ P-values and critical values are obtained through regression surface
460
+ approximation from MacKinnon 1994 and 2010.
461
+
462
+ If the two series are almost perfectly collinear, then computing the
463
+ test is numerically unstable. However, the two series will be cointegrated
464
+ under the maintained assumption that they are integrated. In this case
465
+ the t-statistic will be set to -inf and the pvalue to zero.
466
+
467
+ TODO: We could handle gaps in data by dropping rows with nans in the
468
+ Auxiliary regressions. Not implemented yet, currently assumes no nans
469
+ and no gaps in time series.
470
+
471
+ References
472
+ ----------
473
+ .. [1] MacKinnon, J.G. 1994 "Approximate Asymptotic Distribution Functions
474
+ for Unit-Root and Cointegration Tests." Journal of Business & Economics
475
+ Statistics, 12.2, 167-76.
476
+ .. [2] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."
477
+ Queen"s University, Dept of Economics Working Papers 1227.
478
+ http://ideas.repec.org/p/qed/wpaper/1227.html
479
+ """
480
+ y0 = array_like(y0, "y0")
481
+ y1 = array_like(y1, "y1", ndim=2)
482
+ trend = string_like(trend, "trend", options=("c", "nc", "ct", "ctt"))
483
+ method = string_like(method, "method", options=("aeg",))
484
+ maxlag = int_like(maxlag, "maxlag", optional=True)
485
+ autolag = string_like(
486
+ autolag, "autolag", optional=True, options=("aic", "bic", "t-stat")
487
+ )
488
+ return_results = bool_like(return_results, "return_results", optional=True)
489
+
490
+ nobs, k_vars = y1.shape
491
+ k_vars += 1 # add 1 for y0
492
+
493
+ if trend == "nc":
494
+ xx = y1
495
+ else:
496
+ xx = add_trend(y1, trend=trend, prepend=False)
497
+
498
+ res_co = OLS(y0, xx).fit()
499
+
500
+ if res_co.rsquared < 1 - 100 * SQRTEPS:
501
+ res_adf = adfuller(
502
+ res_co.resid, maxlag=maxlag, autolag=autolag, regression="nc"
503
+ )
504
+ else:
505
+ warnings.warn(
506
+ "y0 and y1 are (almost) perfectly colinear."
507
+ "Cointegration test is not reliable in this case.",
508
+ CollinearityWarning,
509
+ )
510
+ # Edge case where series are too similar
511
+ res_adf = (-np.inf,)
512
+
513
+ # no constant or trend, see egranger in Stata and MacKinnon
514
+ if trend == "nc":
515
+ crit = [np.nan] * 3 # 2010 critical values not available
516
+ else:
517
+ crit = mackinnoncrit(N=k_vars, regression=trend, nobs=nobs - 1)
518
+ # nobs - 1, the -1 is to match egranger in Stata, I do not know why.
519
+ # TODO: check nobs or df = nobs - k
520
+
521
+ pval_asy = mackinnonp(res_adf[0], regression=trend, N=k_vars)
522
+
523
+ print ('Results of Augmented Engle-Granger Two-step Cointegration Test:')
524
+ print('Test Statistic ', res_adf[0])
525
+ print('P-value ', pval_asy)
526
+ a1='%6f'%crit[0]
527
+ b1='%6f'%crit[1]
528
+ c1='%6f'%crit[2]
529
+ print('Critical values: ', a1, '(1%) ' , b1, '(5%) ', c1, '(10%)')
530
+ residts=pd.Series(res_co.resid)
531
+ return residts
532
+
533
+