segmcoint 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- segmcoint/__init__.py +103 -0
- segmcoint/kim2003.py +672 -0
- segmcoint/martins_rodrigues2022.py +783 -0
- segmcoint/simulation.py +333 -0
- segmcoint/utils.py +433 -0
- segmcoint-1.0.0.dist-info/METADATA +144 -0
- segmcoint-1.0.0.dist-info/RECORD +10 -0
- segmcoint-1.0.0.dist-info/WHEEL +5 -0
- segmcoint-1.0.0.dist-info/licenses/LICENSE +21 -0
- segmcoint-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,783 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Wald-type tests for segmented cointegration from Martins & Rodrigues (2022).
|
|
3
|
+
|
|
4
|
+
Implements the residual-based Wald-type tests for segmented cointegration
|
|
5
|
+
proposed in:
|
|
6
|
+
|
|
7
|
+
Martins, L.F. and Rodrigues, P.M.M. (2022). Tests for Segmented
|
|
8
|
+
Cointegration: An Application to US Governments Budgets.
|
|
9
|
+
Empirical Economics, 63, 567-600.
|
|
10
|
+
|
|
11
|
+
This module provides:
|
|
12
|
+
- F_A(tau, m*) and F_B(tau, m*) statistics (Eq. 3.2)
|
|
13
|
+
- sup F_A(m*) and sup F_B(m*) (Eq. 3.3)
|
|
14
|
+
- W(m*) combined statistic (Eq. 3.4)
|
|
15
|
+
- W_max double maximum statistic (Eq. 3.5)
|
|
16
|
+
- Critical value table (Table 1 from the paper)
|
|
17
|
+
- Break date estimation (Remark 3)
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import numpy as np
|
|
21
|
+
import warnings
|
|
22
|
+
from numpy.linalg import inv
|
|
23
|
+
from itertools import product as iter_product
|
|
24
|
+
from .utils import ols_residuals, select_lag_bic
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ============================================================================
|
|
28
|
+
# Critical values from Martins & Rodrigues (2022, Table 1)
|
|
29
|
+
# Columns: W(1), W(2), W(3), W(4), W_max
|
|
30
|
+
# Rows indexed by K+1 (number of variables)
|
|
31
|
+
# ============================================================================
|
|
32
|
+
|
|
33
|
+
# Significance levels: 10%, 5%, 2.5%, 1%
|
|
34
|
+
_SIG_LEVELS = [0.10, 0.05, 0.025, 0.01]
|
|
35
|
+
|
|
36
|
+
# No deterministics
|
|
37
|
+
_CV_NO_DET = {
|
|
38
|
+
2: {
|
|
39
|
+
0.10: [8.229, 8.362, 7.168, 6.804, 9.677],
|
|
40
|
+
0.05: [9.367, 9.329, 7.956, 7.790, 11.033],
|
|
41
|
+
0.025: [10.615, 10.334, 8.901, 8.932, 12.499],
|
|
42
|
+
0.01: [12.349, 11.958, 10.574, 11.129, 15.016],
|
|
43
|
+
},
|
|
44
|
+
3: {
|
|
45
|
+
0.10: [7.812, 8.216, 6.872, 6.657, 9.403],
|
|
46
|
+
0.05: [8.915, 9.165, 7.660, 7.612, 10.539],
|
|
47
|
+
0.025: [9.829, 9.942, 8.583, 8.575, 12.085],
|
|
48
|
+
0.01: [11.476, 11.298, 9.972, 10.942, 13.887],
|
|
49
|
+
},
|
|
50
|
+
4: {
|
|
51
|
+
0.10: [7.529, 7.988, 6.664, 6.492, 9.131],
|
|
52
|
+
0.05: [8.542, 8.903, 7.450, 7.495, 10.459],
|
|
53
|
+
0.025: [9.615, 9.851, 8.293, 8.661, 11.795],
|
|
54
|
+
0.01: [11.026, 11.217, 9.967, 10.835, 13.941],
|
|
55
|
+
},
|
|
56
|
+
5: {
|
|
57
|
+
0.10: [7.546, 7.942, 6.516, 6.393, 8.952],
|
|
58
|
+
0.05: [8.448, 8.921, 7.171, 7.530, 9.989],
|
|
59
|
+
0.025: [9.320, 9.734, 7.915, 8.750, 11.180],
|
|
60
|
+
0.01: [10.718, 10.741, 9.175, 10.640, 13.109],
|
|
61
|
+
},
|
|
62
|
+
6: {
|
|
63
|
+
0.10: [7.857, 7.882, 6.553, 6.374, 9.151],
|
|
64
|
+
0.05: [8.772, 8.832, 7.235, 7.397, 10.425],
|
|
65
|
+
0.025: [9.733, 9.875, 8.003, 8.740, 11.803],
|
|
66
|
+
0.01: [11.029, 10.910, 9.127, 11.314, 13.336],
|
|
67
|
+
},
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
# Intercept only
|
|
71
|
+
_CV_INTERCEPT = {
|
|
72
|
+
2: {
|
|
73
|
+
0.10: [8.050, 8.279, 7.069, 6.895, 9.536],
|
|
74
|
+
0.05: [9.106, 9.277, 7.764, 7.731, 10.762],
|
|
75
|
+
0.025: [10.308, 10.269, 8.684, 8.930, 12.025],
|
|
76
|
+
0.01: [12.089, 11.428, 10.329, 11.083, 14.670],
|
|
77
|
+
},
|
|
78
|
+
3: {
|
|
79
|
+
0.10: [7.711, 8.036, 6.830, 6.664, 9.214],
|
|
80
|
+
0.05: [8.761, 9.090, 7.645, 7.619, 10.552],
|
|
81
|
+
0.025: [9.661, 10.050, 8.405, 8.602, 11.797],
|
|
82
|
+
0.01: [11.103, 11.470, 9.855, 10.713, 13.973],
|
|
83
|
+
},
|
|
84
|
+
4: {
|
|
85
|
+
0.10: [7.669, 7.913, 6.598, 6.499, 9.093],
|
|
86
|
+
0.05: [8.628, 8.852, 7.281, 7.475, 10.330],
|
|
87
|
+
0.025: [9.721, 9.816, 8.199, 8.738, 11.597],
|
|
88
|
+
0.01: [10.707, 11.419, 9.374, 10.905, 13.785],
|
|
89
|
+
},
|
|
90
|
+
5: {
|
|
91
|
+
0.10: [7.994, 7.928, 6.658, 6.418, 9.194],
|
|
92
|
+
0.05: [8.936, 8.936, 7.364, 7.449, 10.407],
|
|
93
|
+
0.025: [9.876, 9.801, 8.104, 8.683, 11.867],
|
|
94
|
+
0.01: [11.179, 11.204, 9.720, 11.017, 13.893],
|
|
95
|
+
},
|
|
96
|
+
6: {
|
|
97
|
+
0.10: [8.452, 8.023, 6.740, 6.375, 9.591],
|
|
98
|
+
0.05: [9.616, 8.942, 7.450, 7.323, 10.754],
|
|
99
|
+
0.025: [10.667, 9.864, 8.199, 8.511, 11.806],
|
|
100
|
+
0.01: [11.793, 11.028, 9.480, 10.701, 13.379],
|
|
101
|
+
},
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
# Intercept and time trend
|
|
105
|
+
_CV_TREND = {
|
|
106
|
+
2: {
|
|
107
|
+
0.10: [8.373, 8.527, 7.279, 7.152, 9.946],
|
|
108
|
+
0.05: [9.810, 9.755, 8.229, 8.311, 11.580],
|
|
109
|
+
0.025: [11.638, 11.092, 9.531, 9.561, 13.631],
|
|
110
|
+
0.01: [15.592, 12.568, 11.884, 12.126, 17.734],
|
|
111
|
+
},
|
|
112
|
+
3: {
|
|
113
|
+
0.10: [7.666, 8.085, 6.863, 6.713, 9.298],
|
|
114
|
+
0.05: [8.756, 9.070, 7.597, 7.792, 10.430],
|
|
115
|
+
0.025: [9.843, 10.110, 8.467, 9.017, 11.671],
|
|
116
|
+
0.01: [11.365, 11.222, 9.612, 11.072, 13.690],
|
|
117
|
+
},
|
|
118
|
+
4: {
|
|
119
|
+
0.10: [7.588, 7.985, 6.659, 6.588, 9.208],
|
|
120
|
+
0.05: [8.589, 9.049, 7.366, 7.688, 10.375],
|
|
121
|
+
0.025: [9.471, 10.000, 8.148, 9.003, 11.628],
|
|
122
|
+
0.01: [10.870, 11.291, 9.423, 11.085, 13.334],
|
|
123
|
+
},
|
|
124
|
+
5: {
|
|
125
|
+
0.10: [7.947, 7.903, 6.646, 6.435, 9.279],
|
|
126
|
+
0.05: [9.000, 8.969, 7.289, 7.528, 10.461],
|
|
127
|
+
0.025: [10.037, 9.785, 8.024, 8.784, 12.084],
|
|
128
|
+
0.01: [11.751, 11.230, 9.500, 11.696, 14.375],
|
|
129
|
+
},
|
|
130
|
+
6: {
|
|
131
|
+
0.10: [8.330, 7.890, 6.641, 6.458, 9.443],
|
|
132
|
+
0.05: [9.412, 8.767, 7.363, 7.454, 10.676],
|
|
133
|
+
0.025: [10.398, 9.755, 8.048, 9.080, 12.028],
|
|
134
|
+
0.01: [11.916, 10.989, 9.111, 11.956, 13.747],
|
|
135
|
+
},
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _get_mr_cv_table(model):
|
|
140
|
+
"""Get critical value table for M&R (2022) tests."""
|
|
141
|
+
if model == "none":
|
|
142
|
+
return _CV_NO_DET
|
|
143
|
+
elif model == "drift":
|
|
144
|
+
return _CV_INTERCEPT
|
|
145
|
+
elif model == "trend":
|
|
146
|
+
return _CV_TREND
|
|
147
|
+
else:
|
|
148
|
+
raise ValueError(f"Unknown model: {model}")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_mr_critical_value(K_plus_1, alpha, m_star, model="drift"):
|
|
152
|
+
"""
|
|
153
|
+
Look up critical value from Martins & Rodrigues (2022, Table 1).
|
|
154
|
+
|
|
155
|
+
Parameters
|
|
156
|
+
----------
|
|
157
|
+
K_plus_1 : int
|
|
158
|
+
Total number of variables (K+1, where K is the number of regressors).
|
|
159
|
+
alpha : float
|
|
160
|
+
Significance level: 0.10, 0.05, 0.025, 0.01.
|
|
161
|
+
m_star : int or str
|
|
162
|
+
Number of breaks (1, 2, 3, 4) or 'max' for W_max.
|
|
163
|
+
model : str
|
|
164
|
+
Deterministic specification: 'none', 'drift', 'trend'.
|
|
165
|
+
|
|
166
|
+
Returns
|
|
167
|
+
-------
|
|
168
|
+
cv : float
|
|
169
|
+
Critical value.
|
|
170
|
+
"""
|
|
171
|
+
table = _get_mr_cv_table(model)
|
|
172
|
+
if K_plus_1 not in table:
|
|
173
|
+
raise ValueError(
|
|
174
|
+
f"K+1={K_plus_1} not available. Use one of {list(table.keys())}")
|
|
175
|
+
if alpha not in table[K_plus_1]:
|
|
176
|
+
raise ValueError(f"alpha={alpha} not available. Use one of {_SIG_LEVELS}")
|
|
177
|
+
|
|
178
|
+
vals = table[K_plus_1][alpha]
|
|
179
|
+
|
|
180
|
+
if m_star == "max":
|
|
181
|
+
return vals[4]
|
|
182
|
+
elif isinstance(m_star, int) and 1 <= m_star <= 4:
|
|
183
|
+
return vals[m_star - 1]
|
|
184
|
+
else:
|
|
185
|
+
raise ValueError(f"m_star must be 1, 2, 3, 4, or 'max'. Got {m_star}")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
# ============================================================================
|
|
189
|
+
# Core computation: ADF test regression for a subsample
|
|
190
|
+
# ============================================================================
|
|
191
|
+
|
|
192
|
+
def _subsample_adf_regression(e, t_start, t_end, p_T, include_ec=True):
|
|
193
|
+
"""
|
|
194
|
+
ADF regression on a subsample e[t_start:t_end].
|
|
195
|
+
|
|
196
|
+
For the subsample, estimate:
|
|
197
|
+
Delta e_t = c + gamma * e_{t-1} + sum_{i=1}^{p_T} pi_i * Delta e_{t-i} + a_t
|
|
198
|
+
|
|
199
|
+
or under the null (include_ec=False):
|
|
200
|
+
Delta e_t = sum_{i=1}^{p_T} pi_i * Delta e_{t-i} + a_t
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
e : ndarray
|
|
205
|
+
Full residual series.
|
|
206
|
+
t_start : int
|
|
207
|
+
Start index of subsample (inclusive, 0-indexed).
|
|
208
|
+
t_end : int
|
|
209
|
+
End index of subsample (exclusive, 0-indexed).
|
|
210
|
+
p_T : int
|
|
211
|
+
Lag order for augmented terms.
|
|
212
|
+
include_ec : bool
|
|
213
|
+
If True, include the error correction term (c + gamma * e_{t-1}).
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
ssr : float
|
|
218
|
+
Sum of squared residuals.
|
|
219
|
+
n_obs : int
|
|
220
|
+
Number of observations used.
|
|
221
|
+
"""
|
|
222
|
+
sub_e = e[t_start:t_end]
|
|
223
|
+
T_sub = len(sub_e)
|
|
224
|
+
|
|
225
|
+
if T_sub <= p_T + 2:
|
|
226
|
+
return np.nan, 0
|
|
227
|
+
|
|
228
|
+
de = np.diff(sub_e) # Delta e_t, length T_sub - 1
|
|
229
|
+
|
|
230
|
+
# Effective sample starts at index p_T in de
|
|
231
|
+
n_obs = len(de) - p_T
|
|
232
|
+
if n_obs <= 0:
|
|
233
|
+
return np.nan, 0
|
|
234
|
+
|
|
235
|
+
Y = de[p_T:] # Dependent variable
|
|
236
|
+
|
|
237
|
+
# Build regressors
|
|
238
|
+
regressors = []
|
|
239
|
+
|
|
240
|
+
# Augmented lags: Delta e_{t-i} for i = 1, ..., p_T
|
|
241
|
+
for i in range(1, p_T + 1):
|
|
242
|
+
regressors.append(de[p_T - i: len(de) - i])
|
|
243
|
+
|
|
244
|
+
if include_ec:
|
|
245
|
+
# Intercept
|
|
246
|
+
regressors.append(np.ones(n_obs))
|
|
247
|
+
# e_{t-1}
|
|
248
|
+
e_lag = sub_e[p_T: -1] if p_T > 0 else sub_e[:-1]
|
|
249
|
+
regressors.append(e_lag)
|
|
250
|
+
|
|
251
|
+
if len(regressors) == 0:
|
|
252
|
+
ssr = np.sum(Y ** 2)
|
|
253
|
+
return ssr, n_obs
|
|
254
|
+
|
|
255
|
+
Z = np.column_stack(regressors)
|
|
256
|
+
|
|
257
|
+
if Z.shape[1] >= n_obs:
|
|
258
|
+
return np.nan, 0
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
beta = inv(Z.T @ Z) @ (Z.T @ Y)
|
|
262
|
+
residuals = Y - Z @ beta
|
|
263
|
+
ssr = np.sum(residuals ** 2)
|
|
264
|
+
except np.linalg.LinAlgError:
|
|
265
|
+
return np.nan, 0
|
|
266
|
+
|
|
267
|
+
return ssr, n_obs
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
# ============================================================================
|
|
271
|
+
# Compute F_A and F_B statistics
|
|
272
|
+
# Martins & Rodrigues (2022, Eq. 3.2)
|
|
273
|
+
# ============================================================================
|
|
274
|
+
|
|
275
|
+
def _compute_F_statistic(e, breaks, m_star, hypothesis, p_T):
|
|
276
|
+
"""
|
|
277
|
+
Compute F_A or F_B statistic for given break dates.
|
|
278
|
+
|
|
279
|
+
Parameters
|
|
280
|
+
----------
|
|
281
|
+
e : ndarray
|
|
282
|
+
Full residual series.
|
|
283
|
+
breaks : tuple of int
|
|
284
|
+
Break dates (0-indexed). Length m_star.
|
|
285
|
+
m_star : int
|
|
286
|
+
Number of breaks.
|
|
287
|
+
hypothesis : str
|
|
288
|
+
'A' (first regime is I(1)) or 'B' (first regime is I(0)).
|
|
289
|
+
p_T : int
|
|
290
|
+
Lag order.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
F_stat : float
|
|
295
|
+
Test statistic value.
|
|
296
|
+
"""
|
|
297
|
+
T = len(e)
|
|
298
|
+
|
|
299
|
+
# Build regime boundaries
|
|
300
|
+
boundaries = [0] + list(breaks) + [T]
|
|
301
|
+
n_regimes = m_star + 1
|
|
302
|
+
|
|
303
|
+
# Compute SSR0: restricted SSR under null (no error correction anywhere)
|
|
304
|
+
ssr0_total = 0.0
|
|
305
|
+
for j in range(n_regimes):
|
|
306
|
+
ssr_j, _ = _subsample_adf_regression(
|
|
307
|
+
e, boundaries[j], boundaries[j + 1], p_T, include_ec=False)
|
|
308
|
+
if np.isnan(ssr_j):
|
|
309
|
+
return np.nan
|
|
310
|
+
ssr0_total += ssr_j
|
|
311
|
+
|
|
312
|
+
# Compute SSR_k,m*: unrestricted SSR under alternative
|
|
313
|
+
ssr_alt_total = 0.0
|
|
314
|
+
|
|
315
|
+
for j in range(n_regimes):
|
|
316
|
+
regime_num = j + 1 # 1-indexed
|
|
317
|
+
|
|
318
|
+
if hypothesis == "A":
|
|
319
|
+
# H1A: odd regimes are I(1), even regimes are I(0)
|
|
320
|
+
is_stationary = (regime_num % 2 == 0)
|
|
321
|
+
elif hypothesis == "B":
|
|
322
|
+
# H1B: odd regimes are I(0), even regimes are I(1)
|
|
323
|
+
is_stationary = (regime_num % 2 == 1)
|
|
324
|
+
else:
|
|
325
|
+
raise ValueError(f"hypothesis must be 'A' or 'B', got {hypothesis}")
|
|
326
|
+
|
|
327
|
+
ssr_j, _ = _subsample_adf_regression(
|
|
328
|
+
e, boundaries[j], boundaries[j + 1], p_T,
|
|
329
|
+
include_ec=is_stationary)
|
|
330
|
+
if np.isnan(ssr_j):
|
|
331
|
+
return np.nan
|
|
332
|
+
ssr_alt_total += ssr_j
|
|
333
|
+
|
|
334
|
+
if ssr_alt_total <= 0:
|
|
335
|
+
return np.nan
|
|
336
|
+
|
|
337
|
+
# Compute the F-statistic (Eq. 3.2)
|
|
338
|
+
if hypothesis == "A":
|
|
339
|
+
delta_B = 0
|
|
340
|
+
else:
|
|
341
|
+
delta_B = 1
|
|
342
|
+
|
|
343
|
+
if m_star % 2 == 0:
|
|
344
|
+
denom_df = m_star + 2 * delta_B
|
|
345
|
+
numer_df = T - m_star - 2 * delta_B - p_T
|
|
346
|
+
else:
|
|
347
|
+
denom_df = m_star + 1
|
|
348
|
+
numer_df = T - m_star - 1 - p_T
|
|
349
|
+
|
|
350
|
+
if numer_df <= 0 or denom_df <= 0:
|
|
351
|
+
return np.nan
|
|
352
|
+
|
|
353
|
+
F_stat = (numer_df * (ssr0_total - ssr_alt_total)) / (
|
|
354
|
+
denom_df * ssr_alt_total)
|
|
355
|
+
|
|
356
|
+
return F_stat
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
# ============================================================================
|
|
360
|
+
# Generate all possible break date combinations
|
|
361
|
+
# ============================================================================
|
|
362
|
+
|
|
363
|
+
def _generate_break_dates(T, m_star, epsilon):
|
|
364
|
+
"""
|
|
365
|
+
Generate all admissible break date partitions.
|
|
366
|
+
|
|
367
|
+
Following Martins & Rodrigues (2022, below Eq. 3.3):
|
|
368
|
+
tau_{j+1} - tau_j >= epsilon
|
|
369
|
+
tau_1 >= epsilon
|
|
370
|
+
tau_{m*} <= 1 - epsilon
|
|
371
|
+
|
|
372
|
+
Parameters
|
|
373
|
+
----------
|
|
374
|
+
T : int
|
|
375
|
+
Sample size.
|
|
376
|
+
m_star : int
|
|
377
|
+
Number of breaks.
|
|
378
|
+
epsilon : float
|
|
379
|
+
Trimming parameter.
|
|
380
|
+
|
|
381
|
+
Yields
|
|
382
|
+
------
|
|
383
|
+
breaks : tuple of int
|
|
384
|
+
Break dates (0-indexed).
|
|
385
|
+
"""
|
|
386
|
+
min_seg = max(int(np.ceil(epsilon * T)), 2)
|
|
387
|
+
|
|
388
|
+
if m_star == 1:
|
|
389
|
+
for t1 in range(min_seg, T - min_seg + 1):
|
|
390
|
+
yield (t1,)
|
|
391
|
+
elif m_star == 2:
|
|
392
|
+
for t1 in range(min_seg, T - 2 * min_seg + 1):
|
|
393
|
+
for t2 in range(t1 + min_seg, T - min_seg + 1):
|
|
394
|
+
yield (t1, t2)
|
|
395
|
+
elif m_star == 3:
|
|
396
|
+
for t1 in range(min_seg, T - 3 * min_seg + 1):
|
|
397
|
+
for t2 in range(t1 + min_seg, T - 2 * min_seg + 1):
|
|
398
|
+
for t3 in range(t2 + min_seg, T - min_seg + 1):
|
|
399
|
+
yield (t1, t2, t3)
|
|
400
|
+
elif m_star == 4:
|
|
401
|
+
for t1 in range(min_seg, T - 4 * min_seg + 1):
|
|
402
|
+
for t2 in range(t1 + min_seg, T - 3 * min_seg + 1):
|
|
403
|
+
for t3 in range(t2 + min_seg, T - 2 * min_seg + 1):
|
|
404
|
+
for t4 in range(t3 + min_seg, T - min_seg + 1):
|
|
405
|
+
yield (t1, t2, t3, t4)
|
|
406
|
+
else:
|
|
407
|
+
raise ValueError(f"m_star must be 1-4, got {m_star}")
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
def _generate_break_dates_fast(T, m_star, epsilon, step=1):
|
|
411
|
+
"""
|
|
412
|
+
Generate break date partitions with optional step for speed.
|
|
413
|
+
|
|
414
|
+
Same as _generate_break_dates but with configurable step size.
|
|
415
|
+
"""
|
|
416
|
+
min_seg = max(int(np.ceil(epsilon * T)), 2)
|
|
417
|
+
|
|
418
|
+
if m_star == 1:
|
|
419
|
+
for t1 in range(min_seg, T - min_seg + 1, step):
|
|
420
|
+
yield (t1,)
|
|
421
|
+
elif m_star == 2:
|
|
422
|
+
for t1 in range(min_seg, T - 2 * min_seg + 1, step):
|
|
423
|
+
for t2 in range(t1 + min_seg, T - min_seg + 1, step):
|
|
424
|
+
yield (t1, t2)
|
|
425
|
+
elif m_star == 3:
|
|
426
|
+
for t1 in range(min_seg, T - 3 * min_seg + 1, step):
|
|
427
|
+
for t2 in range(t1 + min_seg, T - 2 * min_seg + 1, step):
|
|
428
|
+
for t3 in range(t2 + min_seg, T - min_seg + 1, step):
|
|
429
|
+
yield (t1, t2, t3)
|
|
430
|
+
elif m_star == 4:
|
|
431
|
+
for t1 in range(min_seg, T - 4 * min_seg + 1, step):
|
|
432
|
+
for t2 in range(t1 + min_seg, T - 3 * min_seg + 1, step):
|
|
433
|
+
for t3 in range(t2 + min_seg, T - 2 * min_seg + 1, step):
|
|
434
|
+
for t4 in range(t3 + min_seg, T - min_seg + 1, step):
|
|
435
|
+
yield (t1, t2, t3, t4)
|
|
436
|
+
else:
|
|
437
|
+
raise ValueError(f"m_star must be 1-4, got {m_star}")
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
# ============================================================================
|
|
441
|
+
# Main test function
|
|
442
|
+
# ============================================================================
|
|
443
|
+
|
|
444
|
+
def mr_test(y, X, model="drift", max_breaks=4, epsilon=0.15,
|
|
445
|
+
p=None, max_p=12, step=1, verbose=False):
|
|
446
|
+
"""
|
|
447
|
+
Martins & Rodrigues (2022) Wald-type tests for segmented cointegration.
|
|
448
|
+
|
|
449
|
+
Computes residual-based sup-Wald-type test statistics for detecting
|
|
450
|
+
segmented cointegration with multiple structural breaks.
|
|
451
|
+
|
|
452
|
+
The null hypothesis is H_0: no cointegration over the entire sample.
|
|
453
|
+
The alternative allows m breaks with consecutive switches between
|
|
454
|
+
stationarity and nonstationarity.
|
|
455
|
+
|
|
456
|
+
Parameters
|
|
457
|
+
----------
|
|
458
|
+
y : array_like, shape (T,)
|
|
459
|
+
Dependent variable.
|
|
460
|
+
X : array_like, shape (T,) or (T, K)
|
|
461
|
+
Regressor(s).
|
|
462
|
+
model : str
|
|
463
|
+
Deterministic specification for the cointegrating regression:
|
|
464
|
+
'none', 'drift', 'trend'.
|
|
465
|
+
max_breaks : int
|
|
466
|
+
Maximum number of breaks to consider (m_bar). Default 4.
|
|
467
|
+
epsilon : float
|
|
468
|
+
Trimming parameter. Default 0.15 as in the paper.
|
|
469
|
+
p : int or None
|
|
470
|
+
Lag order for ADF augmented terms. If None, selected by BIC.
|
|
471
|
+
max_p : int
|
|
472
|
+
Maximum lag order for BIC selection.
|
|
473
|
+
step : int
|
|
474
|
+
Step size for grid search over break dates.
|
|
475
|
+
Use step > 1 for faster computation with large samples.
|
|
476
|
+
verbose : bool
|
|
477
|
+
If True, print progress.
|
|
478
|
+
|
|
479
|
+
Returns
|
|
480
|
+
-------
|
|
481
|
+
results : MRTestResult
|
|
482
|
+
Object containing W(m*), W_max statistics, critical values,
|
|
483
|
+
break date estimates, and other information.
|
|
484
|
+
"""
|
|
485
|
+
y = np.asarray(y, dtype=np.float64).ravel()
|
|
486
|
+
X = np.asarray(X, dtype=np.float64)
|
|
487
|
+
if X.ndim == 1:
|
|
488
|
+
X = X.reshape(-1, 1)
|
|
489
|
+
T = len(y)
|
|
490
|
+
K = X.shape[1]
|
|
491
|
+
K_plus_1 = K + 1
|
|
492
|
+
|
|
493
|
+
# Step 1: Estimate cointegrating regression on full sample
|
|
494
|
+
e, beta_hat = ols_residuals(y, X, model=model)
|
|
495
|
+
|
|
496
|
+
# Step 2: Select lag order
|
|
497
|
+
if p is None:
|
|
498
|
+
p = select_lag_bic(e, max_p=max_p)
|
|
499
|
+
|
|
500
|
+
# Step 3: Compute test statistics for each m*
|
|
501
|
+
W_stats = {}
|
|
502
|
+
sup_FA_stats = {}
|
|
503
|
+
sup_FB_stats = {}
|
|
504
|
+
best_breaks = {}
|
|
505
|
+
|
|
506
|
+
for m_star in range(1, max_breaks + 1):
|
|
507
|
+
if verbose:
|
|
508
|
+
print(f"Computing W({m_star})...")
|
|
509
|
+
|
|
510
|
+
best_FA = -np.inf
|
|
511
|
+
best_FA_breaks = None
|
|
512
|
+
best_FB = -np.inf
|
|
513
|
+
best_FB_breaks = None
|
|
514
|
+
|
|
515
|
+
for breaks in _generate_break_dates_fast(T, m_star, epsilon, step):
|
|
516
|
+
# F_A: first regime is I(1)
|
|
517
|
+
FA = _compute_F_statistic(e, breaks, m_star, "A", p)
|
|
518
|
+
if np.isfinite(FA) and FA > best_FA:
|
|
519
|
+
best_FA = FA
|
|
520
|
+
best_FA_breaks = breaks
|
|
521
|
+
|
|
522
|
+
# F_B: first regime is I(0)
|
|
523
|
+
FB = _compute_F_statistic(e, breaks, m_star, "B", p)
|
|
524
|
+
if np.isfinite(FB) and FB > best_FB:
|
|
525
|
+
best_FB = FB
|
|
526
|
+
best_FB_breaks = breaks
|
|
527
|
+
|
|
528
|
+
sup_FA_stats[m_star] = best_FA
|
|
529
|
+
sup_FB_stats[m_star] = best_FB
|
|
530
|
+
|
|
531
|
+
# W(m*) = max(sup F_A(m*), sup F_B(m*)) (Eq. 3.4)
|
|
532
|
+
W_m = max(best_FA, best_FB)
|
|
533
|
+
W_stats[m_star] = W_m
|
|
534
|
+
|
|
535
|
+
# Determine which hypothesis and breaks correspond to W(m*)
|
|
536
|
+
if best_FA >= best_FB:
|
|
537
|
+
best_breaks[m_star] = {
|
|
538
|
+
"hypothesis": "A",
|
|
539
|
+
"breaks": best_FA_breaks,
|
|
540
|
+
"fractions": tuple(
|
|
541
|
+
b / T for b in best_FA_breaks) if best_FA_breaks else None,
|
|
542
|
+
}
|
|
543
|
+
else:
|
|
544
|
+
best_breaks[m_star] = {
|
|
545
|
+
"hypothesis": "B",
|
|
546
|
+
"breaks": best_FB_breaks,
|
|
547
|
+
"fractions": tuple(
|
|
548
|
+
b / T for b in best_FB_breaks) if best_FB_breaks else None,
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
# W_max = max_{1<=m<=m_bar} W(m) (Eq. 3.5)
|
|
552
|
+
W_max = max(W_stats.values()) if W_stats else np.nan
|
|
553
|
+
W_max_m = max(W_stats, key=W_stats.get) if W_stats else None
|
|
554
|
+
|
|
555
|
+
# Collect critical values
|
|
556
|
+
cvs = {}
|
|
557
|
+
for m_star in range(1, max_breaks + 1):
|
|
558
|
+
try:
|
|
559
|
+
cvs[m_star] = {
|
|
560
|
+
alpha: get_mr_critical_value(
|
|
561
|
+
K_plus_1, alpha, m_star, model=model)
|
|
562
|
+
for alpha in _SIG_LEVELS
|
|
563
|
+
}
|
|
564
|
+
except (ValueError, KeyError):
|
|
565
|
+
cvs[m_star] = {}
|
|
566
|
+
try:
|
|
567
|
+
cvs["max"] = {
|
|
568
|
+
alpha: get_mr_critical_value(
|
|
569
|
+
K_plus_1, alpha, "max", model=model)
|
|
570
|
+
for alpha in _SIG_LEVELS
|
|
571
|
+
}
|
|
572
|
+
except (ValueError, KeyError):
|
|
573
|
+
cvs["max"] = {}
|
|
574
|
+
|
|
575
|
+
return MRTestResult(
|
|
576
|
+
W_stats=W_stats,
|
|
577
|
+
W_max=W_max,
|
|
578
|
+
W_max_m=W_max_m,
|
|
579
|
+
sup_FA=sup_FA_stats,
|
|
580
|
+
sup_FB=sup_FB_stats,
|
|
581
|
+
best_breaks=best_breaks,
|
|
582
|
+
critical_values=cvs,
|
|
583
|
+
model=model,
|
|
584
|
+
K_plus_1=K_plus_1,
|
|
585
|
+
T=T,
|
|
586
|
+
epsilon=epsilon,
|
|
587
|
+
max_breaks=max_breaks,
|
|
588
|
+
lag_order=p,
|
|
589
|
+
beta_hat=beta_hat,
|
|
590
|
+
residuals=e,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
# ============================================================================
|
|
595
|
+
# Result class
|
|
596
|
+
# ============================================================================
|
|
597
|
+
|
|
598
|
+
class MRTestResult:
|
|
599
|
+
"""
|
|
600
|
+
Container for Martins & Rodrigues (2022) test results.
|
|
601
|
+
|
|
602
|
+
Attributes
|
|
603
|
+
----------
|
|
604
|
+
W_stats : dict
|
|
605
|
+
W(m*) statistics for m* = 1, ..., max_breaks.
|
|
606
|
+
W_max : float
|
|
607
|
+
W_max double maximum statistic.
|
|
608
|
+
W_max_m : int
|
|
609
|
+
Number of breaks corresponding to W_max.
|
|
610
|
+
sup_FA : dict
|
|
611
|
+
sup F_A(m*) statistics.
|
|
612
|
+
sup_FB : dict
|
|
613
|
+
sup F_B(m*) statistics.
|
|
614
|
+
best_breaks : dict
|
|
615
|
+
Best break date information for each m*.
|
|
616
|
+
critical_values : dict
|
|
617
|
+
Critical values from Table 1.
|
|
618
|
+
model : str
|
|
619
|
+
Deterministic specification.
|
|
620
|
+
K_plus_1 : int
|
|
621
|
+
Total number of variables.
|
|
622
|
+
T : int
|
|
623
|
+
Sample size.
|
|
624
|
+
epsilon : float
|
|
625
|
+
Trimming parameter.
|
|
626
|
+
max_breaks : int
|
|
627
|
+
Maximum number of breaks considered.
|
|
628
|
+
lag_order : int
|
|
629
|
+
ADF lag order used.
|
|
630
|
+
beta_hat : ndarray
|
|
631
|
+
Estimated cointegrating vector.
|
|
632
|
+
residuals : ndarray
|
|
633
|
+
Full-sample OLS residuals.
|
|
634
|
+
"""
|
|
635
|
+
|
|
636
|
+
def __init__(self, W_stats, W_max, W_max_m, sup_FA, sup_FB,
|
|
637
|
+
best_breaks, critical_values, model, K_plus_1, T,
|
|
638
|
+
epsilon, max_breaks, lag_order, beta_hat, residuals):
|
|
639
|
+
self.W_stats = W_stats
|
|
640
|
+
self.W_max = W_max
|
|
641
|
+
self.W_max_m = W_max_m
|
|
642
|
+
self.sup_FA = sup_FA
|
|
643
|
+
self.sup_FB = sup_FB
|
|
644
|
+
self.best_breaks = best_breaks
|
|
645
|
+
self.critical_values = critical_values
|
|
646
|
+
self.model = model
|
|
647
|
+
self.K_plus_1 = K_plus_1
|
|
648
|
+
self.T = T
|
|
649
|
+
self.epsilon = epsilon
|
|
650
|
+
self.max_breaks = max_breaks
|
|
651
|
+
self.lag_order = lag_order
|
|
652
|
+
self.beta_hat = beta_hat
|
|
653
|
+
self.residuals = residuals
|
|
654
|
+
|
|
655
|
+
def significant(self, m_star="max", alpha=0.05):
|
|
656
|
+
"""
|
|
657
|
+
Check if the test rejects H_0 at the given level.
|
|
658
|
+
|
|
659
|
+
Parameters
|
|
660
|
+
----------
|
|
661
|
+
m_star : int or str
|
|
662
|
+
Number of breaks (1-4) or 'max'.
|
|
663
|
+
alpha : float
|
|
664
|
+
Significance level.
|
|
665
|
+
|
|
666
|
+
Returns
|
|
667
|
+
-------
|
|
668
|
+
reject : bool or None
|
|
669
|
+
"""
|
|
670
|
+
if m_star == "max":
|
|
671
|
+
stat_val = self.W_max
|
|
672
|
+
cv = self.critical_values.get("max", {}).get(alpha)
|
|
673
|
+
else:
|
|
674
|
+
stat_val = self.W_stats.get(m_star)
|
|
675
|
+
cv = self.critical_values.get(m_star, {}).get(alpha)
|
|
676
|
+
|
|
677
|
+
if cv is None or stat_val is None:
|
|
678
|
+
return None
|
|
679
|
+
return stat_val > cv
|
|
680
|
+
|
|
681
|
+
def estimated_breaks(self, m_star=None):
|
|
682
|
+
"""
|
|
683
|
+
Return estimated break dates and fractions.
|
|
684
|
+
|
|
685
|
+
Parameters
|
|
686
|
+
----------
|
|
687
|
+
m_star : int or None
|
|
688
|
+
Number of breaks. If None, uses W_max_m.
|
|
689
|
+
|
|
690
|
+
Returns
|
|
691
|
+
-------
|
|
692
|
+
info : dict
|
|
693
|
+
"""
|
|
694
|
+
if m_star is None:
|
|
695
|
+
m_star = self.W_max_m
|
|
696
|
+
return self.best_breaks.get(m_star, {})
|
|
697
|
+
|
|
698
|
+
def summary(self):
|
|
699
|
+
"""
|
|
700
|
+
Produce a formatted summary string suitable for publication.
|
|
701
|
+
|
|
702
|
+
Returns
|
|
703
|
+
-------
|
|
704
|
+
s : str
|
|
705
|
+
"""
|
|
706
|
+
model_labels = {"none": "No deterministics",
|
|
707
|
+
"drift": "Intercept only",
|
|
708
|
+
"trend": "Intercept and time trend"}
|
|
709
|
+
|
|
710
|
+
lines = []
|
|
711
|
+
lines.append("=" * 78)
|
|
712
|
+
lines.append("Martins & Rodrigues (2022) Wald-Type Tests for Segmented Cointegration")
|
|
713
|
+
lines.append("=" * 78)
|
|
714
|
+
lines.append(f"Model: {model_labels.get(self.model, self.model)}")
|
|
715
|
+
lines.append(f"Sample size (T): {self.T}")
|
|
716
|
+
lines.append(f"Variables (K+1): {self.K_plus_1}")
|
|
717
|
+
lines.append(f"Trimming (eps): {self.epsilon:.2f}")
|
|
718
|
+
lines.append(f"Max breaks: {self.max_breaks}")
|
|
719
|
+
lines.append(f"ADF lag order: {self.lag_order}")
|
|
720
|
+
lines.append("")
|
|
721
|
+
|
|
722
|
+
# W(m*) statistics
|
|
723
|
+
lines.append("-" * 78)
|
|
724
|
+
lines.append(f"{'Test':<10} {'Statistic':>12} "
|
|
725
|
+
f"{'10% CV':>10} {'5% CV':>10} {'1% CV':>10} "
|
|
726
|
+
f"{'Reject 5%':>10}")
|
|
727
|
+
lines.append("-" * 78)
|
|
728
|
+
|
|
729
|
+
for m_star in range(1, self.max_breaks + 1):
|
|
730
|
+
stat = self.W_stats.get(m_star, np.nan)
|
|
731
|
+
cv10 = self.critical_values.get(m_star, {}).get(0.10, np.nan)
|
|
732
|
+
cv05 = self.critical_values.get(m_star, {}).get(0.05, np.nan)
|
|
733
|
+
cv01 = self.critical_values.get(m_star, {}).get(0.01, np.nan)
|
|
734
|
+
rej = self.significant(m_star, 0.05)
|
|
735
|
+
rej_str = "Yes**" if rej else ("No" if rej is not None else "N/A")
|
|
736
|
+
|
|
737
|
+
lines.append(
|
|
738
|
+
f"W({m_star}) {stat:>12.4f} "
|
|
739
|
+
f"{cv10:>10.3f} {cv05:>10.3f} {cv01:>10.3f} "
|
|
740
|
+
f"{rej_str:>10}")
|
|
741
|
+
|
|
742
|
+
# W_max
|
|
743
|
+
stat = self.W_max
|
|
744
|
+
cv10 = self.critical_values.get("max", {}).get(0.10, np.nan)
|
|
745
|
+
cv05 = self.critical_values.get("max", {}).get(0.05, np.nan)
|
|
746
|
+
cv01 = self.critical_values.get("max", {}).get(0.01, np.nan)
|
|
747
|
+
rej = self.significant("max", 0.05)
|
|
748
|
+
rej_str = "Yes**" if rej else ("No" if rej is not None else "N/A")
|
|
749
|
+
|
|
750
|
+
lines.append(
|
|
751
|
+
f"W_max {stat:>12.4f} "
|
|
752
|
+
f"{cv10:>10.3f} {cv05:>10.3f} {cv01:>10.3f} "
|
|
753
|
+
f"{rej_str:>10}")
|
|
754
|
+
|
|
755
|
+
lines.append("-" * 78)
|
|
756
|
+
|
|
757
|
+
# Break date estimates
|
|
758
|
+
lines.append("")
|
|
759
|
+
lines.append("Estimated break dates (for W_max):")
|
|
760
|
+
m_opt = self.W_max_m
|
|
761
|
+
if m_opt is not None:
|
|
762
|
+
info = self.best_breaks.get(m_opt, {})
|
|
763
|
+
hyp = info.get("hypothesis", "N/A")
|
|
764
|
+
brk = info.get("breaks", ())
|
|
765
|
+
frac = info.get("fractions", ())
|
|
766
|
+
|
|
767
|
+
r1_label = "I(1)" if hyp == "A" else "I(0)"
|
|
768
|
+
lines.append(f" Number of breaks: {m_opt}")
|
|
769
|
+
lines.append(f" First regime: {r1_label} (H1{hyp})")
|
|
770
|
+
if brk:
|
|
771
|
+
lines.append(f" Break dates: {brk}")
|
|
772
|
+
lines.append(f" Break fractions: "
|
|
773
|
+
f"{tuple(round(f, 4) for f in frac)}")
|
|
774
|
+
|
|
775
|
+
lines.append("")
|
|
776
|
+
lines.append("Notes: W(m*) = max(sup F_A(m*), sup F_B(m*)).")
|
|
777
|
+
lines.append(" W_max = max_{1<=m<=m_bar} W(m).")
|
|
778
|
+
lines.append(" Critical values from Martins & Rodrigues (2022, Table 1).")
|
|
779
|
+
lines.append("=" * 78)
|
|
780
|
+
return "\n".join(lines)
|
|
781
|
+
|
|
782
|
+
def __repr__(self):
|
|
783
|
+
return self.summary()
|