segmcoint 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- segmcoint/__init__.py +103 -0
- segmcoint/kim2003.py +672 -0
- segmcoint/martins_rodrigues2022.py +783 -0
- segmcoint/simulation.py +333 -0
- segmcoint/utils.py +433 -0
- segmcoint-1.0.0.dist-info/METADATA +144 -0
- segmcoint-1.0.0.dist-info/RECORD +10 -0
- segmcoint-1.0.0.dist-info/WHEEL +5 -0
- segmcoint-1.0.0.dist-info/licenses/LICENSE +21 -0
- segmcoint-1.0.0.dist-info/top_level.txt +1 -0
segmcoint/kim2003.py
ADDED
|
@@ -0,0 +1,672 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Segmented cointegration tests from Kim (2003).
|
|
3
|
+
|
|
4
|
+
Implements the inference procedures for segmented cointegration proposed in:
|
|
5
|
+
|
|
6
|
+
Kim, J.-Y. (2003). Inference on Segmented Cointegration.
|
|
7
|
+
Econometric Theory, 19, 620-639.
|
|
8
|
+
|
|
9
|
+
This module provides:
|
|
10
|
+
- Phillips-Perron-Ouliaris type tests: Z_rho(C_T) and Z_t(C_T)
|
|
11
|
+
- Augmented Dickey-Fuller type tests: ADF_rho(C_T) and ADF_t(C_T)
|
|
12
|
+
- Infimum test statistics: Z*_rho, Z*_t, ADF*_rho, ADF*_t
|
|
13
|
+
- Extremum estimator for the noncointegration period
|
|
14
|
+
- Critical value tables (Tables 1 and 2 from the paper)
|
|
15
|
+
|
|
16
|
+
Model Specifications (Kim 2003, Eq. 2.1a-2.1c):
|
|
17
|
+
Case I (model='none'): x_{1t} = beta' x_{2t} + eps_t
|
|
18
|
+
Case II (model='drift'): x_{1t} = alpha + beta' x_{2t} + eps_t
|
|
19
|
+
Case III (model='trend'): x_{1t} = alpha + gamma*t + beta' x_{2t} + eps_t
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import numpy as np
|
|
23
|
+
import warnings
|
|
24
|
+
from numpy.linalg import inv
|
|
25
|
+
from .utils import (
|
|
26
|
+
ols_residuals,
|
|
27
|
+
ar1_regression,
|
|
28
|
+
adf_regression,
|
|
29
|
+
newey_west_lrv,
|
|
30
|
+
select_lag_bic,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# ============================================================================
|
|
34
|
+
# Asymptotic critical values from Kim (2003, Tables 1 and 2)
|
|
35
|
+
# Columns: 0.01, 0.025, 0.05, 0.10, 0.15, 0.95, 0.975, 0.99
|
|
36
|
+
# ============================================================================
|
|
37
|
+
|
|
38
|
+
# Table 1: Critical values of Z*_rho(C) and ADF*_rho(C)
|
|
39
|
+
# ell_bar(T_N) = 0.3
|
|
40
|
+
_CV_Zp_CASE_I = {
|
|
41
|
+
1: [-13.00, -10.16, -8.18, -5.68, -4.57, 1.29, 1.64, 2.12],
|
|
42
|
+
2: [-37.20, -32.30, -27.90, -23.70, -20.82, -2.40, -1.29, -0.56],
|
|
43
|
+
3: [-46.63, -41.05, -36.41, -31.61, -28.74, -7.54, -5.81, -4.11],
|
|
44
|
+
4: [-55.89, -48.97, -44.59, -39.43, -36.36, -12.73, -10.85, -9.22],
|
|
45
|
+
5: [-63.94, -58.40, -52.79, -47.78, -44.59, -17.92, -15.86, -13.75],
|
|
46
|
+
6: [-70.39, -64.30, -59.83, -54.22, -51.02, -22.47, -20.60, -18.42],
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
_CV_Zp_CASE_II = {
|
|
50
|
+
1: [-20.15, -16.73, -13.96, -11.37, -9.59, -0.13, 0.50, 1.21],
|
|
51
|
+
2: [-87.37, -64.30, -50.75, -39.58, -34.02, -7.77, -6.19, -4.79],
|
|
52
|
+
3: [-105.48, -84.12, -65.53, -51.53, -44.82, -13.57, -11.25, -9.21],
|
|
53
|
+
4: [-122.91, -96.68, -78.66, -61.95, -54.24, -18.65, -16.37, -13.77],
|
|
54
|
+
5: [-130.86, -106.23, -87.07, -69.28, -60.56, -23.79, -21.33, -18.94],
|
|
55
|
+
6: [-134.72, -109.84, -89.89, -73.65, -66.98, -28.72, -25.86, -22.50],
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
_CV_Zp_CASE_III = {
|
|
59
|
+
1: [-29.17, -24.90, -21.55, -18.22, -16.03, -2.60, -1.67, -0.75],
|
|
60
|
+
2: [-107.65, -80.24, -62.54, -46.59, -38.44, -8.26, -6.42, -4.39],
|
|
61
|
+
3: [-131.66, -101.33, -79.34, -58.96, -49.43, -14.13, -12.05, -9.81],
|
|
62
|
+
4: [-135.59, -109.94, -87.87, -68.31, -59.07, -19.65, -17.64, -15.20],
|
|
63
|
+
5: [-140.29, -115.59, -96.57, -74.32, -65.16, -24.78, -22.25, -19.61],
|
|
64
|
+
6: [-144.09, -118.83, -98.31, -78.46, -70.02, -28.98, -26.27, -23.71],
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Table 2: Critical values of Z*_t(C) and ADF*_t(C)
|
|
68
|
+
_CV_Zt_CASE_I = {
|
|
69
|
+
1: [-2.51, -2.20, -1.96, -1.61, -1.43, 1.29, 1.70, 2.17],
|
|
70
|
+
2: [-4.23, -3.95, -3.65, -3.34, -3.15, -0.88, -0.54, -0.23],
|
|
71
|
+
3: [-4.88, -4.49, -4.23, -3.92, -3.73, -1.74, -1.46, -1.15],
|
|
72
|
+
4: [-5.26, -4.89, -4.68, -4.38, -4.19, -2.32, -2.10, -1.84],
|
|
73
|
+
5: [-5.59, -5.33, -5.09, -4.86, -4.70, -2.84, -2.62, -2.33],
|
|
74
|
+
6: [-5.89, -5.68, -5.45, -5.16, -5.00, -3.24, -3.05, -2.81],
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
_CV_Zt_CASE_II = {
|
|
78
|
+
1: [-3.49, -3.12, -2.88, -2.58, -2.37, -0.06, 0.31, 0.66],
|
|
79
|
+
2: [-8.84, -7.43, -6.36, -5.31, -4.71, -1.91, -1.62, -1.28],
|
|
80
|
+
3: [-10.05, -8.68, -7.38, -6.22, -5.47, -2.53, -2.28, -2.01],
|
|
81
|
+
4: [-11.04, -9.51, -8.25, -6.86, -6.05, -3.03, -2.78, -2.51],
|
|
82
|
+
5: [-11.33, -10.03, -8.70, -7.21, -6.35, -3.43, -3.20, -2.93],
|
|
83
|
+
6: [-11.78, -10.14, -8.76, -7.33, -6.56, -3.78, -3.58, -3.37],
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
_CV_Zt_CASE_III = {
|
|
87
|
+
1: [-3.92, -3.66, -3.40, -3.12, -2.94, -0.90, -0.62, -0.30],
|
|
88
|
+
2: [-10.42, -8.78, -7.74, -6.32, -5.52, -1.96, -1.67, -1.27],
|
|
89
|
+
3: [-11.68, -9.98, -8.62, -7.23, -6.20, -2.61, -2.27, -1.91],
|
|
90
|
+
4: [-11.90, -10.37, -9.13, -7.57, -6.62, -3.10, -2.87, -2.61],
|
|
91
|
+
5: [-12.32, -10.80, -9.52, -7.83, -6.86, -3.51, -3.30, -3.05],
|
|
92
|
+
6: [-12.45, -10.98, -9.53, -7.79, -6.91, -3.84, -3.62, -3.38],
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
# Mapping percentiles to column indices
|
|
96
|
+
_CV_PERCENTILES = [0.01, 0.025, 0.05, 0.10, 0.15, 0.95, 0.975, 0.99]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _get_cv_table(stat_type, model):
|
|
100
|
+
"""Get the appropriate critical value table."""
|
|
101
|
+
if stat_type in ("Zp", "ADFp"):
|
|
102
|
+
tables = {"none": _CV_Zp_CASE_I, "drift": _CV_Zp_CASE_II,
|
|
103
|
+
"trend": _CV_Zp_CASE_III}
|
|
104
|
+
elif stat_type in ("Zt", "ADFt"):
|
|
105
|
+
tables = {"none": _CV_Zt_CASE_I, "drift": _CV_Zt_CASE_II,
|
|
106
|
+
"trend": _CV_Zt_CASE_III}
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Unknown stat_type: {stat_type}")
|
|
109
|
+
return tables.get(model)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_critical_value(n, alpha, stat_type="Zt", model="drift"):
|
|
113
|
+
"""
|
|
114
|
+
Look up asymptotic critical value from Kim (2003, Tables 1-2).
|
|
115
|
+
|
|
116
|
+
These are for ell_bar(T_N) = 0.3 as reported in the paper.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
n : int
|
|
121
|
+
Number of variables in the cointegration regression (1 for
|
|
122
|
+
univariate, 2 for bivariate, etc.).
|
|
123
|
+
alpha : float
|
|
124
|
+
Significance level (one of 0.01, 0.025, 0.05, 0.10, 0.15).
|
|
125
|
+
stat_type : str
|
|
126
|
+
Test statistic type: 'Zp', 'Zt', 'ADFp', 'ADFt'.
|
|
127
|
+
model : str
|
|
128
|
+
Model specification: 'none' (Case I), 'drift' (Case II),
|
|
129
|
+
'trend' (Case III).
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
cv : float
|
|
134
|
+
Critical value at the specified significance level.
|
|
135
|
+
"""
|
|
136
|
+
table = _get_cv_table(stat_type, model)
|
|
137
|
+
if table is None:
|
|
138
|
+
raise ValueError(f"No table for model={model}")
|
|
139
|
+
if n not in table:
|
|
140
|
+
raise ValueError(f"n={n} not in table. Available: {list(table.keys())}")
|
|
141
|
+
if alpha not in _CV_PERCENTILES:
|
|
142
|
+
raise ValueError(
|
|
143
|
+
f"alpha={alpha} not available. Use one of {_CV_PERCENTILES}")
|
|
144
|
+
idx = _CV_PERCENTILES.index(alpha)
|
|
145
|
+
return table[n][idx]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# ============================================================================
|
|
149
|
+
# Phillips-Perron-Ouliaris type statistics: Z_rho(C_T) and Z_t(C_T)
|
|
150
|
+
# Kim (2003, Eqs. 3.3 and 3.4)
|
|
151
|
+
# ============================================================================
|
|
152
|
+
|
|
153
|
+
def _compute_Zp_Zt(e, weighted, q=None):
|
|
154
|
+
"""
|
|
155
|
+
Compute Z_rho and Z_t statistics for a given segmentation.
|
|
156
|
+
|
|
157
|
+
Implements Kim (2003, Eqs. 3.3) and (3.4):
|
|
158
|
+
Z_rho(C_T) = T_C * (rho_hat - 1)
|
|
159
|
+
- 0.5*(T_C^2 * sigma_rho^2 / s^2) * (lambda^2 - gamma_0)
|
|
160
|
+
Z_t(C_T) = (gamma_0/lambda^2)^{1/2} * t(C_T)
|
|
161
|
+
- {(lambda^2 - gamma_0)/(2*lambda)} * {T_C * sigma_rho / s}
|
|
162
|
+
|
|
163
|
+
Parameters
|
|
164
|
+
----------
|
|
165
|
+
e : ndarray
|
|
166
|
+
Full residual series.
|
|
167
|
+
weighted : ndarray
|
|
168
|
+
Weight series w_t(C_T).
|
|
169
|
+
q : int or None
|
|
170
|
+
Bandwidth for long-run variance estimator.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
Z_rho : float
|
|
175
|
+
Z_t : float
|
|
176
|
+
"""
|
|
177
|
+
T = len(e)
|
|
178
|
+
T_C = int(np.sum(weighted > 0))
|
|
179
|
+
|
|
180
|
+
# AR(1) regression with weights
|
|
181
|
+
rho_hat, s_sq, sigma_rho_sq, _ = ar1_regression(e, weighted=weighted)
|
|
182
|
+
|
|
183
|
+
# t-statistic
|
|
184
|
+
if sigma_rho_sq > 0:
|
|
185
|
+
t_stat = (rho_hat - 1.0) / np.sqrt(sigma_rho_sq)
|
|
186
|
+
else:
|
|
187
|
+
t_stat = 0.0
|
|
188
|
+
|
|
189
|
+
# Compute residuals for long-run variance: v_hat = w_t * (e_t - rho_hat * e_{t-1})
|
|
190
|
+
v_hat = weighted[1:] * (e[1:] - rho_hat * e[:-1])
|
|
191
|
+
# Only keep observations where weight > 0
|
|
192
|
+
v_active = v_hat[weighted[1:] > 0]
|
|
193
|
+
|
|
194
|
+
if len(v_active) < 2:
|
|
195
|
+
return np.nan, np.nan
|
|
196
|
+
|
|
197
|
+
lambda_sq, gamma_0 = newey_west_lrv(v_active, q=q)
|
|
198
|
+
|
|
199
|
+
if lambda_sq <= 0 or s_sq <= 0:
|
|
200
|
+
return np.nan, np.nan
|
|
201
|
+
|
|
202
|
+
# Z_rho (Eq. 3.3)
|
|
203
|
+
Z_rho = T_C * (rho_hat - 1.0) - 0.5 * (
|
|
204
|
+
T_C ** 2 * sigma_rho_sq / s_sq) * (lambda_sq - gamma_0)
|
|
205
|
+
|
|
206
|
+
# Z_t (Eq. 3.4)
|
|
207
|
+
lambda_hat = np.sqrt(lambda_sq)
|
|
208
|
+
gamma_0_sqrt = np.sqrt(gamma_0)
|
|
209
|
+
|
|
210
|
+
Z_t = (gamma_0_sqrt / lambda_hat) * t_stat - (
|
|
211
|
+
(lambda_sq - gamma_0) / (2.0 * lambda_hat)) * (
|
|
212
|
+
T_C * np.sqrt(sigma_rho_sq) / np.sqrt(s_sq))
|
|
213
|
+
|
|
214
|
+
return Z_rho, Z_t
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ============================================================================
|
|
218
|
+
# ADF type statistics: ADF_rho(C_T) and ADF_t(C_T)
|
|
219
|
+
# Kim (2003, Eqs. 3.6 and 3.7)
|
|
220
|
+
# ============================================================================
|
|
221
|
+
|
|
222
|
+
def _compute_ADF(e, weighted, p=1):
|
|
223
|
+
"""
|
|
224
|
+
Compute ADF_rho and ADF_t statistics for a given segmentation.
|
|
225
|
+
|
|
226
|
+
Implements Kim (2003, Eqs. 3.6) and (3.7):
|
|
227
|
+
ADF_rho(C_T) = T_C * (lambda_tilde / sigma_epsilon) * (rho_tilde - 1)
|
|
228
|
+
ADF_t(C_T) = t_tilde_T
|
|
229
|
+
|
|
230
|
+
where lambda_tilde/sigma_epsilon = (1 - zeta_1 - ... - zeta_{p-1})^{-1}
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
e : ndarray
|
|
235
|
+
Full residual series.
|
|
236
|
+
weighted : ndarray
|
|
237
|
+
Weight series w_t(C_T).
|
|
238
|
+
p : int
|
|
239
|
+
Lag order for the augmented regression.
|
|
240
|
+
|
|
241
|
+
Returns
|
|
242
|
+
-------
|
|
243
|
+
ADF_rho : float
|
|
244
|
+
ADF_t : float
|
|
245
|
+
"""
|
|
246
|
+
T_C = int(np.sum(weighted > 0))
|
|
247
|
+
|
|
248
|
+
rho_hat, t_stat, sigma_e, zeta_hat, _ = adf_regression(
|
|
249
|
+
e, p=p, weighted=weighted)
|
|
250
|
+
|
|
251
|
+
# lambda_tilde / sigma_epsilon (Eq. below 3.7)
|
|
252
|
+
if len(zeta_hat) > 0:
|
|
253
|
+
denom = 1.0 - np.sum(zeta_hat)
|
|
254
|
+
if abs(denom) < 1e-10:
|
|
255
|
+
return np.nan, np.nan
|
|
256
|
+
lambda_ratio = 1.0 / denom
|
|
257
|
+
else:
|
|
258
|
+
lambda_ratio = 1.0
|
|
259
|
+
|
|
260
|
+
# ADF_rho (Eq. 3.6)
|
|
261
|
+
ADF_rho = T_C * lambda_ratio * (rho_hat - 1.0)
|
|
262
|
+
|
|
263
|
+
# ADF_t (Eq. 3.7)
|
|
264
|
+
ADF_t = t_stat
|
|
265
|
+
|
|
266
|
+
return ADF_rho, ADF_t
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# ============================================================================
|
|
270
|
+
# Segmented cointegration test: infimum statistics
|
|
271
|
+
# Kim (2003, Eqs. 3.13 and 3.14)
|
|
272
|
+
# ============================================================================
|
|
273
|
+
|
|
274
|
+
def kim_test(y, X, model="drift", max_ell=0.3, step=1,
|
|
275
|
+
q=None, p=None, max_p=12, stat_types=("Zp", "Zt", "ADFp", "ADFt"),
|
|
276
|
+
verbose=False):
|
|
277
|
+
"""
|
|
278
|
+
Kim (2003) tests for segmented cointegration.
|
|
279
|
+
|
|
280
|
+
Searches over all possible segmentations {N_T} and computes the infimum
|
|
281
|
+
of the test statistics Z_rho(C_T), Z_t(C_T), ADF_rho(C_T), ADF_t(C_T)
|
|
282
|
+
over these segmentations, as described in Kim (2003, Section 3.1).
|
|
283
|
+
|
|
284
|
+
The null hypothesis is H_0: rho = 1 for all t (no cointegration).
|
|
285
|
+
The alternative is H_1: segmented cointegration where rho < 1 in C_T
|
|
286
|
+
and rho = 1 in N_T.
|
|
287
|
+
|
|
288
|
+
Parameters
|
|
289
|
+
----------
|
|
290
|
+
y : array_like, shape (T,)
|
|
291
|
+
Dependent variable.
|
|
292
|
+
X : array_like, shape (T,) or (T, K)
|
|
293
|
+
Regressor(s).
|
|
294
|
+
model : str
|
|
295
|
+
Deterministic specification: 'none' (Case I), 'drift' (Case II),
|
|
296
|
+
'trend' (Case III).
|
|
297
|
+
max_ell : float
|
|
298
|
+
Upper bound for the length of the noncointegration period as a
|
|
299
|
+
fraction of T. Denoted ell_bar(T_N) in Kim (2003).
|
|
300
|
+
Critical values in Tables 1-2 are for max_ell = 0.3.
|
|
301
|
+
step : int
|
|
302
|
+
Step size for searching over segmentations (in observations).
|
|
303
|
+
q : int or None
|
|
304
|
+
Bandwidth for long-run variance estimator. If None, uses automatic
|
|
305
|
+
selection.
|
|
306
|
+
p : int or None
|
|
307
|
+
Lag order for ADF statistics. If None, selected by BIC.
|
|
308
|
+
max_p : int
|
|
309
|
+
Maximum lag order for BIC selection.
|
|
310
|
+
stat_types : tuple of str
|
|
311
|
+
Which statistics to compute: any subset of
|
|
312
|
+
('Zp', 'Zt', 'ADFp', 'ADFt').
|
|
313
|
+
verbose : bool
|
|
314
|
+
If True, print progress information.
|
|
315
|
+
|
|
316
|
+
Returns
|
|
317
|
+
-------
|
|
318
|
+
results : KimTestResult
|
|
319
|
+
Object containing test statistics, critical values, break dates,
|
|
320
|
+
and other information.
|
|
321
|
+
"""
|
|
322
|
+
y = np.asarray(y, dtype=np.float64).ravel()
|
|
323
|
+
X = np.asarray(X, dtype=np.float64)
|
|
324
|
+
if X.ndim == 1:
|
|
325
|
+
X = X.reshape(-1, 1)
|
|
326
|
+
T = len(y)
|
|
327
|
+
n = X.shape[1] + 1 # Number of variables in cointegration regression
|
|
328
|
+
|
|
329
|
+
# Step 1: Estimate cointegrating regression on full sample
|
|
330
|
+
e_full, beta_hat = ols_residuals(y, X, model=model)
|
|
331
|
+
|
|
332
|
+
# Step 2: Select lag order if not provided
|
|
333
|
+
if p is None:
|
|
334
|
+
p = select_lag_bic(e_full, max_p=max_p)
|
|
335
|
+
|
|
336
|
+
# Step 3: Search over all possible segmentations
|
|
337
|
+
max_len = int(max_ell * T)
|
|
338
|
+
min_k0 = 0 # N_T can start at the beginning
|
|
339
|
+
max_k1 = T # N_T can extend to the end
|
|
340
|
+
|
|
341
|
+
results_dict = {s: {"stat": np.inf, "k0": None, "k1": None}
|
|
342
|
+
for s in stat_types}
|
|
343
|
+
all_stats = {s: [] for s in stat_types}
|
|
344
|
+
|
|
345
|
+
n_searched = 0
|
|
346
|
+
|
|
347
|
+
for ell_N in range(1, max_len + 1, step):
|
|
348
|
+
for k0 in range(0, T - ell_N + 1, step):
|
|
349
|
+
k1 = k0 + ell_N
|
|
350
|
+
if k1 > T:
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
# Ensure C_T has at least n observations (Assumption 2)
|
|
354
|
+
T_C = T - ell_N
|
|
355
|
+
if T_C < n:
|
|
356
|
+
continue
|
|
357
|
+
|
|
358
|
+
# Construct weight vector: w_t = 1 for t in C_T, 0 for t in N_T
|
|
359
|
+
# N_T = {k0+1, ..., k1} (1-indexed), or indices k0 to k1-1 (0-indexed)
|
|
360
|
+
w = np.ones(T)
|
|
361
|
+
w[k0:k1] = 0.0
|
|
362
|
+
|
|
363
|
+
n_searched += 1
|
|
364
|
+
|
|
365
|
+
# Compute statistics
|
|
366
|
+
if "Zp" in stat_types or "Zt" in stat_types:
|
|
367
|
+
Zp_val, Zt_val = _compute_Zp_Zt(e_full, w, q=q)
|
|
368
|
+
|
|
369
|
+
if "Zp" in stat_types and np.isfinite(Zp_val):
|
|
370
|
+
all_stats["Zp"].append((Zp_val, k0, k1))
|
|
371
|
+
if Zp_val < results_dict["Zp"]["stat"]:
|
|
372
|
+
results_dict["Zp"] = {"stat": Zp_val, "k0": k0, "k1": k1}
|
|
373
|
+
|
|
374
|
+
if "Zt" in stat_types and np.isfinite(Zt_val):
|
|
375
|
+
all_stats["Zt"].append((Zt_val, k0, k1))
|
|
376
|
+
if Zt_val < results_dict["Zt"]["stat"]:
|
|
377
|
+
results_dict["Zt"] = {"stat": Zt_val, "k0": k0, "k1": k1}
|
|
378
|
+
|
|
379
|
+
if "ADFp" in stat_types or "ADFt" in stat_types:
|
|
380
|
+
try:
|
|
381
|
+
ADFp_val, ADFt_val = _compute_ADF(e_full, w, p=p)
|
|
382
|
+
except (ValueError, np.linalg.LinAlgError):
|
|
383
|
+
ADFp_val, ADFt_val = np.nan, np.nan
|
|
384
|
+
|
|
385
|
+
if "ADFp" in stat_types and np.isfinite(ADFp_val):
|
|
386
|
+
all_stats["ADFp"].append((ADFp_val, k0, k1))
|
|
387
|
+
if ADFp_val < results_dict["ADFp"]["stat"]:
|
|
388
|
+
results_dict["ADFp"] = {
|
|
389
|
+
"stat": ADFp_val, "k0": k0, "k1": k1}
|
|
390
|
+
|
|
391
|
+
if "ADFt" in stat_types and np.isfinite(ADFt_val):
|
|
392
|
+
all_stats["ADFt"].append((ADFt_val, k0, k1))
|
|
393
|
+
if ADFt_val < results_dict["ADFt"]["stat"]:
|
|
394
|
+
results_dict["ADFt"] = {
|
|
395
|
+
"stat": ADFt_val, "k0": k0, "k1": k1}
|
|
396
|
+
|
|
397
|
+
if verbose:
|
|
398
|
+
print(f"Searched {n_searched} segmentations.")
|
|
399
|
+
|
|
400
|
+
# Step 4: Compute standard (non-segmented) tests on full sample
|
|
401
|
+
w_full = np.ones(T)
|
|
402
|
+
Zp_full, Zt_full = _compute_Zp_Zt(e_full, w_full, q=q)
|
|
403
|
+
try:
|
|
404
|
+
ADFp_full, ADFt_full = _compute_ADF(e_full, w_full, p=p)
|
|
405
|
+
except (ValueError, np.linalg.LinAlgError):
|
|
406
|
+
ADFp_full, ADFt_full = np.nan, np.nan
|
|
407
|
+
|
|
408
|
+
full_sample_stats = {
|
|
409
|
+
"Zp": Zp_full, "Zt": Zt_full,
|
|
410
|
+
"ADFp": ADFp_full, "ADFt": ADFt_full
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
# Step 5: Collect critical values
|
|
414
|
+
cvs = {}
|
|
415
|
+
for s in stat_types:
|
|
416
|
+
try:
|
|
417
|
+
cvs[s] = {
|
|
418
|
+
alpha: get_critical_value(n, alpha, stat_type=s, model=model)
|
|
419
|
+
for alpha in [0.01, 0.025, 0.05, 0.10]
|
|
420
|
+
}
|
|
421
|
+
except (ValueError, KeyError):
|
|
422
|
+
cvs[s] = {}
|
|
423
|
+
|
|
424
|
+
return KimTestResult(
|
|
425
|
+
stat_types=stat_types,
|
|
426
|
+
infimum_stats={s: results_dict[s]["stat"] for s in stat_types},
|
|
427
|
+
break_k0={s: results_dict[s]["k0"] for s in stat_types},
|
|
428
|
+
break_k1={s: results_dict[s]["k1"] for s in stat_types},
|
|
429
|
+
full_sample_stats=full_sample_stats,
|
|
430
|
+
critical_values=cvs,
|
|
431
|
+
model=model,
|
|
432
|
+
n=n,
|
|
433
|
+
T=T,
|
|
434
|
+
max_ell=max_ell,
|
|
435
|
+
lag_order=p,
|
|
436
|
+
beta_hat=beta_hat,
|
|
437
|
+
residuals=e_full,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
# ============================================================================
|
|
442
|
+
# Extremum estimator for the noncointegration period
|
|
443
|
+
# Kim (2003, Eqs. 3.16 and 3.17)
|
|
444
|
+
# ============================================================================
|
|
445
|
+
|
|
446
|
+
def kim_break_estimator(y, X, model="drift", max_ell=0.3, step=1):
|
|
447
|
+
"""
|
|
448
|
+
Extremum estimator for the noncointegration period.
|
|
449
|
+
|
|
450
|
+
Implements the estimator from Kim (2003, Eq. 3.16-3.17):
|
|
451
|
+
Lambda_T(tau) = [((tau_1 - tau_0)T]^{-2} * sum_{t in N_T} e_t(C_T)^2
|
|
452
|
+
/ [T_C^{-1} * sum_{t in C_T} e_t(C_T)^2]
|
|
453
|
+
tau_hat = argmax_{tau in T} Lambda_T(tau)
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
y : array_like, shape (T,)
|
|
458
|
+
Dependent variable.
|
|
459
|
+
X : array_like, shape (T,) or (T, K)
|
|
460
|
+
Regressor(s).
|
|
461
|
+
model : str
|
|
462
|
+
Deterministic specification: 'none', 'drift', 'trend'.
|
|
463
|
+
max_ell : float
|
|
464
|
+
Maximum length of noncointegration period as fraction of T.
|
|
465
|
+
step : int
|
|
466
|
+
Step size for search.
|
|
467
|
+
|
|
468
|
+
Returns
|
|
469
|
+
-------
|
|
470
|
+
result : dict
|
|
471
|
+
Dictionary with keys:
|
|
472
|
+
- 'tau_hat': (tau_0_hat, tau_1_hat) estimated break fractions
|
|
473
|
+
- 'k0_hat': estimated start of noncointegration period (0-indexed)
|
|
474
|
+
- 'k1_hat': estimated end of noncointegration period (0-indexed)
|
|
475
|
+
- 'Lambda_max': maximum value of Lambda_T
|
|
476
|
+
"""
|
|
477
|
+
y = np.asarray(y, dtype=np.float64).ravel()
|
|
478
|
+
X = np.asarray(X, dtype=np.float64)
|
|
479
|
+
if X.ndim == 1:
|
|
480
|
+
X = X.reshape(-1, 1)
|
|
481
|
+
T = len(y)
|
|
482
|
+
n = X.shape[1] + 1
|
|
483
|
+
|
|
484
|
+
max_len = int(max_ell * T)
|
|
485
|
+
|
|
486
|
+
best_Lambda = -np.inf
|
|
487
|
+
best_k0 = 0
|
|
488
|
+
best_k1 = 0
|
|
489
|
+
|
|
490
|
+
for ell_N in range(1, max_len + 1, step):
|
|
491
|
+
for k0 in range(0, T - ell_N + 1, step):
|
|
492
|
+
k1 = k0 + ell_N
|
|
493
|
+
T_C = T - ell_N
|
|
494
|
+
if T_C < n:
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
# Construct weights for estimating beta from C_T only
|
|
498
|
+
w = np.ones(T)
|
|
499
|
+
w[k0:k1] = 0.0
|
|
500
|
+
|
|
501
|
+
# Estimate beta using weighted least squares on C_T
|
|
502
|
+
mask = w > 0
|
|
503
|
+
y_c = y[mask]
|
|
504
|
+
X_c = X[mask]
|
|
505
|
+
e_c_all, _ = ols_residuals(y_c, X_c, model=model)
|
|
506
|
+
|
|
507
|
+
# Compute full residuals using this beta estimate
|
|
508
|
+
_, beta_c = ols_residuals(y_c, X_c, model=model)
|
|
509
|
+
# Reconstruct residuals for ALL periods using C_T beta
|
|
510
|
+
if model == "none":
|
|
511
|
+
e_all = y - X @ beta_c
|
|
512
|
+
elif model == "drift":
|
|
513
|
+
e_all = y - np.column_stack([np.ones(T), X]) @ beta_c
|
|
514
|
+
elif model == "trend":
|
|
515
|
+
e_all = y - np.column_stack(
|
|
516
|
+
[np.ones(T), np.arange(1, T + 1), X]) @ beta_c
|
|
517
|
+
|
|
518
|
+
# Compute Lambda_T (Eq. 3.16)
|
|
519
|
+
e_N = e_all[k0:k1] # Residuals in N_T
|
|
520
|
+
e_C = e_all[mask] # Residuals in C_T
|
|
521
|
+
|
|
522
|
+
sum_sq_N = np.sum(e_N ** 2)
|
|
523
|
+
sum_sq_C = np.sum(e_C ** 2)
|
|
524
|
+
|
|
525
|
+
if sum_sq_C < 1e-15 or ell_N < 1:
|
|
526
|
+
continue
|
|
527
|
+
|
|
528
|
+
Lambda = (ell_N ** (-2) * sum_sq_N) / (T_C ** (-1) * sum_sq_C)
|
|
529
|
+
|
|
530
|
+
if Lambda > best_Lambda:
|
|
531
|
+
best_Lambda = Lambda
|
|
532
|
+
best_k0 = k0
|
|
533
|
+
best_k1 = k1
|
|
534
|
+
|
|
535
|
+
tau_0_hat = best_k0 / T
|
|
536
|
+
tau_1_hat = best_k1 / T
|
|
537
|
+
|
|
538
|
+
return {
|
|
539
|
+
"tau_hat": (tau_0_hat, tau_1_hat),
|
|
540
|
+
"k0_hat": best_k0,
|
|
541
|
+
"k1_hat": best_k1,
|
|
542
|
+
"Lambda_max": best_Lambda,
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
# ============================================================================
|
|
547
|
+
# Result class
|
|
548
|
+
# ============================================================================
|
|
549
|
+
|
|
550
|
+
class KimTestResult:
|
|
551
|
+
"""
|
|
552
|
+
Container for Kim (2003) segmented cointegration test results.
|
|
553
|
+
|
|
554
|
+
Attributes
|
|
555
|
+
----------
|
|
556
|
+
stat_types : tuple
|
|
557
|
+
Test statistic types computed.
|
|
558
|
+
infimum_stats : dict
|
|
559
|
+
Infimum statistics: Z*_rho, Z*_t, ADF*_rho, ADF*_t.
|
|
560
|
+
break_k0 : dict
|
|
561
|
+
Start of estimated noncointegration period for each statistic.
|
|
562
|
+
break_k1 : dict
|
|
563
|
+
End of estimated noncointegration period for each statistic.
|
|
564
|
+
full_sample_stats : dict
|
|
565
|
+
Full-sample (non-segmented) test statistics.
|
|
566
|
+
critical_values : dict
|
|
567
|
+
Critical values from Kim (2003, Tables 1-2).
|
|
568
|
+
model : str
|
|
569
|
+
Deterministic specification.
|
|
570
|
+
n : int
|
|
571
|
+
Number of variables.
|
|
572
|
+
T : int
|
|
573
|
+
Sample size.
|
|
574
|
+
max_ell : float
|
|
575
|
+
Maximum segmentation length.
|
|
576
|
+
lag_order : int
|
|
577
|
+
ADF lag order used.
|
|
578
|
+
beta_hat : ndarray
|
|
579
|
+
Estimated cointegrating vector.
|
|
580
|
+
residuals : ndarray
|
|
581
|
+
OLS residuals from full-sample regression.
|
|
582
|
+
"""
|
|
583
|
+
|
|
584
|
+
def __init__(self, stat_types, infimum_stats, break_k0, break_k1,
|
|
585
|
+
full_sample_stats, critical_values, model, n, T, max_ell,
|
|
586
|
+
lag_order, beta_hat, residuals):
|
|
587
|
+
self.stat_types = stat_types
|
|
588
|
+
self.infimum_stats = infimum_stats
|
|
589
|
+
self.break_k0 = break_k0
|
|
590
|
+
self.break_k1 = break_k1
|
|
591
|
+
self.full_sample_stats = full_sample_stats
|
|
592
|
+
self.critical_values = critical_values
|
|
593
|
+
self.model = model
|
|
594
|
+
self.n = n
|
|
595
|
+
self.T = T
|
|
596
|
+
self.max_ell = max_ell
|
|
597
|
+
self.lag_order = lag_order
|
|
598
|
+
self.beta_hat = beta_hat
|
|
599
|
+
self.residuals = residuals
|
|
600
|
+
|
|
601
|
+
def significant(self, stat_type="Zt", alpha=0.05):
|
|
602
|
+
"""Check if the infimum test rejects H_0 at the given level."""
|
|
603
|
+
cv = self.critical_values.get(stat_type, {}).get(alpha)
|
|
604
|
+
if cv is None:
|
|
605
|
+
return None
|
|
606
|
+
return self.infimum_stats[stat_type] < cv
|
|
607
|
+
|
|
608
|
+
def break_dates(self, stat_type="Zt"):
|
|
609
|
+
"""Return estimated break dates (0-indexed) for a given statistic."""
|
|
610
|
+
return self.break_k0.get(stat_type), self.break_k1.get(stat_type)
|
|
611
|
+
|
|
612
|
+
def break_fractions(self, stat_type="Zt"):
|
|
613
|
+
"""Return estimated break fractions tau_0, tau_1."""
|
|
614
|
+
k0 = self.break_k0.get(stat_type)
|
|
615
|
+
k1 = self.break_k1.get(stat_type)
|
|
616
|
+
if k0 is None or k1 is None:
|
|
617
|
+
return None, None
|
|
618
|
+
return k0 / self.T, k1 / self.T
|
|
619
|
+
|
|
620
|
+
def summary(self):
|
|
621
|
+
"""
|
|
622
|
+
Produce a formatted summary string suitable for publication.
|
|
623
|
+
|
|
624
|
+
Returns
|
|
625
|
+
-------
|
|
626
|
+
s : str
|
|
627
|
+
"""
|
|
628
|
+
model_labels = {"none": "Case I (no deterministics)",
|
|
629
|
+
"drift": "Case II (intercept)",
|
|
630
|
+
"trend": "Case III (intercept + trend)"}
|
|
631
|
+
|
|
632
|
+
lines = []
|
|
633
|
+
lines.append("=" * 72)
|
|
634
|
+
lines.append("Kim (2003) Segmented Cointegration Test Results")
|
|
635
|
+
lines.append("=" * 72)
|
|
636
|
+
lines.append(f"Model: {model_labels.get(self.model, self.model)}")
|
|
637
|
+
lines.append(f"Sample size (T): {self.T}")
|
|
638
|
+
lines.append(f"Variables (n): {self.n}")
|
|
639
|
+
lines.append(f"Max ell (T_N): {self.max_ell:.2f}")
|
|
640
|
+
lines.append(f"ADF lag order: {self.lag_order}")
|
|
641
|
+
lines.append("")
|
|
642
|
+
lines.append("-" * 72)
|
|
643
|
+
lines.append(f"{'Statistic':<12} {'Inf. Value':>12} {'Full Sample':>12}"
|
|
644
|
+
f" {'5% CV':>10} {'Reject H0':>10}"
|
|
645
|
+
f" {'tau_0':>8} {'tau_1':>8}")
|
|
646
|
+
lines.append("-" * 72)
|
|
647
|
+
|
|
648
|
+
for s in self.stat_types:
|
|
649
|
+
inf_val = self.infimum_stats.get(s, np.nan)
|
|
650
|
+
full_val = self.full_sample_stats.get(s, np.nan)
|
|
651
|
+
cv_05 = self.critical_values.get(s, {}).get(0.05, np.nan)
|
|
652
|
+
reject = self.significant(s, 0.05)
|
|
653
|
+
reject_str = "Yes***" if reject else ("No" if reject is not None else "N/A")
|
|
654
|
+
t0, t1 = self.break_fractions(s)
|
|
655
|
+
t0_str = f"{t0:.3f}" if t0 is not None else "N/A"
|
|
656
|
+
t1_str = f"{t1:.3f}" if t1 is not None else "N/A"
|
|
657
|
+
|
|
658
|
+
lines.append(
|
|
659
|
+
f"{s + '*':<12} {inf_val:>12.4f} {full_val:>12.4f}"
|
|
660
|
+
f" {cv_05:>10.4f} {reject_str:>10}"
|
|
661
|
+
f" {t0_str:>8} {t1_str:>8}")
|
|
662
|
+
|
|
663
|
+
lines.append("-" * 72)
|
|
664
|
+
lines.append("Notes: Infimum statistics are Z*_rho, Z*_t, ADF*_rho, ADF*_t")
|
|
665
|
+
lines.append(" from Kim (2003, Eqs. 3.13-3.14).")
|
|
666
|
+
lines.append(" Critical values from Tables 1-2 for ell_bar(T_N)=0.3.")
|
|
667
|
+
lines.append(" Reject H0 implies segmented cointegration detected.")
|
|
668
|
+
lines.append("=" * 72)
|
|
669
|
+
return "\n".join(lines)
|
|
670
|
+
|
|
671
|
+
def __repr__(self):
|
|
672
|
+
return self.summary()
|