segmcoint 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,783 @@
1
+ """
2
+ Wald-type tests for segmented cointegration from Martins & Rodrigues (2022).
3
+
4
+ Implements the residual-based Wald-type tests for segmented cointegration
5
+ proposed in:
6
+
7
+ Martins, L.F. and Rodrigues, P.M.M. (2022). Tests for Segmented
8
+ Cointegration: An Application to US Governments Budgets.
9
+ Empirical Economics, 63, 567-600.
10
+
11
+ This module provides:
12
+ - F_A(tau, m*) and F_B(tau, m*) statistics (Eq. 3.2)
13
+ - sup F_A(m*) and sup F_B(m*) (Eq. 3.3)
14
+ - W(m*) combined statistic (Eq. 3.4)
15
+ - W_max double maximum statistic (Eq. 3.5)
16
+ - Critical value table (Table 1 from the paper)
17
+ - Break date estimation (Remark 3)
18
+ """
19
+
20
+ import numpy as np
21
+ import warnings
22
+ from numpy.linalg import inv
23
+ from itertools import product as iter_product
24
+ from .utils import ols_residuals, select_lag_bic
25
+
26
+
27
+ # ============================================================================
28
+ # Critical values from Martins & Rodrigues (2022, Table 1)
29
+ # Columns: W(1), W(2), W(3), W(4), W_max
30
+ # Rows indexed by K+1 (number of variables)
31
+ # ============================================================================
32
+
33
+ # Significance levels: 10%, 5%, 2.5%, 1%
34
+ _SIG_LEVELS = [0.10, 0.05, 0.025, 0.01]
35
+
36
+ # No deterministics
37
+ _CV_NO_DET = {
38
+ 2: {
39
+ 0.10: [8.229, 8.362, 7.168, 6.804, 9.677],
40
+ 0.05: [9.367, 9.329, 7.956, 7.790, 11.033],
41
+ 0.025: [10.615, 10.334, 8.901, 8.932, 12.499],
42
+ 0.01: [12.349, 11.958, 10.574, 11.129, 15.016],
43
+ },
44
+ 3: {
45
+ 0.10: [7.812, 8.216, 6.872, 6.657, 9.403],
46
+ 0.05: [8.915, 9.165, 7.660, 7.612, 10.539],
47
+ 0.025: [9.829, 9.942, 8.583, 8.575, 12.085],
48
+ 0.01: [11.476, 11.298, 9.972, 10.942, 13.887],
49
+ },
50
+ 4: {
51
+ 0.10: [7.529, 7.988, 6.664, 6.492, 9.131],
52
+ 0.05: [8.542, 8.903, 7.450, 7.495, 10.459],
53
+ 0.025: [9.615, 9.851, 8.293, 8.661, 11.795],
54
+ 0.01: [11.026, 11.217, 9.967, 10.835, 13.941],
55
+ },
56
+ 5: {
57
+ 0.10: [7.546, 7.942, 6.516, 6.393, 8.952],
58
+ 0.05: [8.448, 8.921, 7.171, 7.530, 9.989],
59
+ 0.025: [9.320, 9.734, 7.915, 8.750, 11.180],
60
+ 0.01: [10.718, 10.741, 9.175, 10.640, 13.109],
61
+ },
62
+ 6: {
63
+ 0.10: [7.857, 7.882, 6.553, 6.374, 9.151],
64
+ 0.05: [8.772, 8.832, 7.235, 7.397, 10.425],
65
+ 0.025: [9.733, 9.875, 8.003, 8.740, 11.803],
66
+ 0.01: [11.029, 10.910, 9.127, 11.314, 13.336],
67
+ },
68
+ }
69
+
70
+ # Intercept only
71
+ _CV_INTERCEPT = {
72
+ 2: {
73
+ 0.10: [8.050, 8.279, 7.069, 6.895, 9.536],
74
+ 0.05: [9.106, 9.277, 7.764, 7.731, 10.762],
75
+ 0.025: [10.308, 10.269, 8.684, 8.930, 12.025],
76
+ 0.01: [12.089, 11.428, 10.329, 11.083, 14.670],
77
+ },
78
+ 3: {
79
+ 0.10: [7.711, 8.036, 6.830, 6.664, 9.214],
80
+ 0.05: [8.761, 9.090, 7.645, 7.619, 10.552],
81
+ 0.025: [9.661, 10.050, 8.405, 8.602, 11.797],
82
+ 0.01: [11.103, 11.470, 9.855, 10.713, 13.973],
83
+ },
84
+ 4: {
85
+ 0.10: [7.669, 7.913, 6.598, 6.499, 9.093],
86
+ 0.05: [8.628, 8.852, 7.281, 7.475, 10.330],
87
+ 0.025: [9.721, 9.816, 8.199, 8.738, 11.597],
88
+ 0.01: [10.707, 11.419, 9.374, 10.905, 13.785],
89
+ },
90
+ 5: {
91
+ 0.10: [7.994, 7.928, 6.658, 6.418, 9.194],
92
+ 0.05: [8.936, 8.936, 7.364, 7.449, 10.407],
93
+ 0.025: [9.876, 9.801, 8.104, 8.683, 11.867],
94
+ 0.01: [11.179, 11.204, 9.720, 11.017, 13.893],
95
+ },
96
+ 6: {
97
+ 0.10: [8.452, 8.023, 6.740, 6.375, 9.591],
98
+ 0.05: [9.616, 8.942, 7.450, 7.323, 10.754],
99
+ 0.025: [10.667, 9.864, 8.199, 8.511, 11.806],
100
+ 0.01: [11.793, 11.028, 9.480, 10.701, 13.379],
101
+ },
102
+ }
103
+
104
+ # Intercept and time trend
105
+ _CV_TREND = {
106
+ 2: {
107
+ 0.10: [8.373, 8.527, 7.279, 7.152, 9.946],
108
+ 0.05: [9.810, 9.755, 8.229, 8.311, 11.580],
109
+ 0.025: [11.638, 11.092, 9.531, 9.561, 13.631],
110
+ 0.01: [15.592, 12.568, 11.884, 12.126, 17.734],
111
+ },
112
+ 3: {
113
+ 0.10: [7.666, 8.085, 6.863, 6.713, 9.298],
114
+ 0.05: [8.756, 9.070, 7.597, 7.792, 10.430],
115
+ 0.025: [9.843, 10.110, 8.467, 9.017, 11.671],
116
+ 0.01: [11.365, 11.222, 9.612, 11.072, 13.690],
117
+ },
118
+ 4: {
119
+ 0.10: [7.588, 7.985, 6.659, 6.588, 9.208],
120
+ 0.05: [8.589, 9.049, 7.366, 7.688, 10.375],
121
+ 0.025: [9.471, 10.000, 8.148, 9.003, 11.628],
122
+ 0.01: [10.870, 11.291, 9.423, 11.085, 13.334],
123
+ },
124
+ 5: {
125
+ 0.10: [7.947, 7.903, 6.646, 6.435, 9.279],
126
+ 0.05: [9.000, 8.969, 7.289, 7.528, 10.461],
127
+ 0.025: [10.037, 9.785, 8.024, 8.784, 12.084],
128
+ 0.01: [11.751, 11.230, 9.500, 11.696, 14.375],
129
+ },
130
+ 6: {
131
+ 0.10: [8.330, 7.890, 6.641, 6.458, 9.443],
132
+ 0.05: [9.412, 8.767, 7.363, 7.454, 10.676],
133
+ 0.025: [10.398, 9.755, 8.048, 9.080, 12.028],
134
+ 0.01: [11.916, 10.989, 9.111, 11.956, 13.747],
135
+ },
136
+ }
137
+
138
+
139
+ def _get_mr_cv_table(model):
140
+ """Get critical value table for M&R (2022) tests."""
141
+ if model == "none":
142
+ return _CV_NO_DET
143
+ elif model == "drift":
144
+ return _CV_INTERCEPT
145
+ elif model == "trend":
146
+ return _CV_TREND
147
+ else:
148
+ raise ValueError(f"Unknown model: {model}")
149
+
150
+
151
+ def get_mr_critical_value(K_plus_1, alpha, m_star, model="drift"):
152
+ """
153
+ Look up critical value from Martins & Rodrigues (2022, Table 1).
154
+
155
+ Parameters
156
+ ----------
157
+ K_plus_1 : int
158
+ Total number of variables (K+1, where K is the number of regressors).
159
+ alpha : float
160
+ Significance level: 0.10, 0.05, 0.025, 0.01.
161
+ m_star : int or str
162
+ Number of breaks (1, 2, 3, 4) or 'max' for W_max.
163
+ model : str
164
+ Deterministic specification: 'none', 'drift', 'trend'.
165
+
166
+ Returns
167
+ -------
168
+ cv : float
169
+ Critical value.
170
+ """
171
+ table = _get_mr_cv_table(model)
172
+ if K_plus_1 not in table:
173
+ raise ValueError(
174
+ f"K+1={K_plus_1} not available. Use one of {list(table.keys())}")
175
+ if alpha not in table[K_plus_1]:
176
+ raise ValueError(f"alpha={alpha} not available. Use one of {_SIG_LEVELS}")
177
+
178
+ vals = table[K_plus_1][alpha]
179
+
180
+ if m_star == "max":
181
+ return vals[4]
182
+ elif isinstance(m_star, int) and 1 <= m_star <= 4:
183
+ return vals[m_star - 1]
184
+ else:
185
+ raise ValueError(f"m_star must be 1, 2, 3, 4, or 'max'. Got {m_star}")
186
+
187
+
188
+ # ============================================================================
189
+ # Core computation: ADF test regression for a subsample
190
+ # ============================================================================
191
+
192
+ def _subsample_adf_regression(e, t_start, t_end, p_T, include_ec=True):
193
+ """
194
+ ADF regression on a subsample e[t_start:t_end].
195
+
196
+ For the subsample, estimate:
197
+ Delta e_t = c + gamma * e_{t-1} + sum_{i=1}^{p_T} pi_i * Delta e_{t-i} + a_t
198
+
199
+ or under the null (include_ec=False):
200
+ Delta e_t = sum_{i=1}^{p_T} pi_i * Delta e_{t-i} + a_t
201
+
202
+ Parameters
203
+ ----------
204
+ e : ndarray
205
+ Full residual series.
206
+ t_start : int
207
+ Start index of subsample (inclusive, 0-indexed).
208
+ t_end : int
209
+ End index of subsample (exclusive, 0-indexed).
210
+ p_T : int
211
+ Lag order for augmented terms.
212
+ include_ec : bool
213
+ If True, include the error correction term (c + gamma * e_{t-1}).
214
+
215
+ Returns
216
+ -------
217
+ ssr : float
218
+ Sum of squared residuals.
219
+ n_obs : int
220
+ Number of observations used.
221
+ """
222
+ sub_e = e[t_start:t_end]
223
+ T_sub = len(sub_e)
224
+
225
+ if T_sub <= p_T + 2:
226
+ return np.nan, 0
227
+
228
+ de = np.diff(sub_e) # Delta e_t, length T_sub - 1
229
+
230
+ # Effective sample starts at index p_T in de
231
+ n_obs = len(de) - p_T
232
+ if n_obs <= 0:
233
+ return np.nan, 0
234
+
235
+ Y = de[p_T:] # Dependent variable
236
+
237
+ # Build regressors
238
+ regressors = []
239
+
240
+ # Augmented lags: Delta e_{t-i} for i = 1, ..., p_T
241
+ for i in range(1, p_T + 1):
242
+ regressors.append(de[p_T - i: len(de) - i])
243
+
244
+ if include_ec:
245
+ # Intercept
246
+ regressors.append(np.ones(n_obs))
247
+ # e_{t-1}
248
+ e_lag = sub_e[p_T: -1] if p_T > 0 else sub_e[:-1]
249
+ regressors.append(e_lag)
250
+
251
+ if len(regressors) == 0:
252
+ ssr = np.sum(Y ** 2)
253
+ return ssr, n_obs
254
+
255
+ Z = np.column_stack(regressors)
256
+
257
+ if Z.shape[1] >= n_obs:
258
+ return np.nan, 0
259
+
260
+ try:
261
+ beta = inv(Z.T @ Z) @ (Z.T @ Y)
262
+ residuals = Y - Z @ beta
263
+ ssr = np.sum(residuals ** 2)
264
+ except np.linalg.LinAlgError:
265
+ return np.nan, 0
266
+
267
+ return ssr, n_obs
268
+
269
+
270
+ # ============================================================================
271
+ # Compute F_A and F_B statistics
272
+ # Martins & Rodrigues (2022, Eq. 3.2)
273
+ # ============================================================================
274
+
275
+ def _compute_F_statistic(e, breaks, m_star, hypothesis, p_T):
276
+ """
277
+ Compute F_A or F_B statistic for given break dates.
278
+
279
+ Parameters
280
+ ----------
281
+ e : ndarray
282
+ Full residual series.
283
+ breaks : tuple of int
284
+ Break dates (0-indexed). Length m_star.
285
+ m_star : int
286
+ Number of breaks.
287
+ hypothesis : str
288
+ 'A' (first regime is I(1)) or 'B' (first regime is I(0)).
289
+ p_T : int
290
+ Lag order.
291
+
292
+ Returns
293
+ -------
294
+ F_stat : float
295
+ Test statistic value.
296
+ """
297
+ T = len(e)
298
+
299
+ # Build regime boundaries
300
+ boundaries = [0] + list(breaks) + [T]
301
+ n_regimes = m_star + 1
302
+
303
+ # Compute SSR0: restricted SSR under null (no error correction anywhere)
304
+ ssr0_total = 0.0
305
+ for j in range(n_regimes):
306
+ ssr_j, _ = _subsample_adf_regression(
307
+ e, boundaries[j], boundaries[j + 1], p_T, include_ec=False)
308
+ if np.isnan(ssr_j):
309
+ return np.nan
310
+ ssr0_total += ssr_j
311
+
312
+ # Compute SSR_k,m*: unrestricted SSR under alternative
313
+ ssr_alt_total = 0.0
314
+
315
+ for j in range(n_regimes):
316
+ regime_num = j + 1 # 1-indexed
317
+
318
+ if hypothesis == "A":
319
+ # H1A: odd regimes are I(1), even regimes are I(0)
320
+ is_stationary = (regime_num % 2 == 0)
321
+ elif hypothesis == "B":
322
+ # H1B: odd regimes are I(0), even regimes are I(1)
323
+ is_stationary = (regime_num % 2 == 1)
324
+ else:
325
+ raise ValueError(f"hypothesis must be 'A' or 'B', got {hypothesis}")
326
+
327
+ ssr_j, _ = _subsample_adf_regression(
328
+ e, boundaries[j], boundaries[j + 1], p_T,
329
+ include_ec=is_stationary)
330
+ if np.isnan(ssr_j):
331
+ return np.nan
332
+ ssr_alt_total += ssr_j
333
+
334
+ if ssr_alt_total <= 0:
335
+ return np.nan
336
+
337
+ # Compute the F-statistic (Eq. 3.2)
338
+ if hypothesis == "A":
339
+ delta_B = 0
340
+ else:
341
+ delta_B = 1
342
+
343
+ if m_star % 2 == 0:
344
+ denom_df = m_star + 2 * delta_B
345
+ numer_df = T - m_star - 2 * delta_B - p_T
346
+ else:
347
+ denom_df = m_star + 1
348
+ numer_df = T - m_star - 1 - p_T
349
+
350
+ if numer_df <= 0 or denom_df <= 0:
351
+ return np.nan
352
+
353
+ F_stat = (numer_df * (ssr0_total - ssr_alt_total)) / (
354
+ denom_df * ssr_alt_total)
355
+
356
+ return F_stat
357
+
358
+
359
+ # ============================================================================
360
+ # Generate all possible break date combinations
361
+ # ============================================================================
362
+
363
+ def _generate_break_dates(T, m_star, epsilon):
364
+ """
365
+ Generate all admissible break date partitions.
366
+
367
+ Following Martins & Rodrigues (2022, below Eq. 3.3):
368
+ tau_{j+1} - tau_j >= epsilon
369
+ tau_1 >= epsilon
370
+ tau_{m*} <= 1 - epsilon
371
+
372
+ Parameters
373
+ ----------
374
+ T : int
375
+ Sample size.
376
+ m_star : int
377
+ Number of breaks.
378
+ epsilon : float
379
+ Trimming parameter.
380
+
381
+ Yields
382
+ ------
383
+ breaks : tuple of int
384
+ Break dates (0-indexed).
385
+ """
386
+ min_seg = max(int(np.ceil(epsilon * T)), 2)
387
+
388
+ if m_star == 1:
389
+ for t1 in range(min_seg, T - min_seg + 1):
390
+ yield (t1,)
391
+ elif m_star == 2:
392
+ for t1 in range(min_seg, T - 2 * min_seg + 1):
393
+ for t2 in range(t1 + min_seg, T - min_seg + 1):
394
+ yield (t1, t2)
395
+ elif m_star == 3:
396
+ for t1 in range(min_seg, T - 3 * min_seg + 1):
397
+ for t2 in range(t1 + min_seg, T - 2 * min_seg + 1):
398
+ for t3 in range(t2 + min_seg, T - min_seg + 1):
399
+ yield (t1, t2, t3)
400
+ elif m_star == 4:
401
+ for t1 in range(min_seg, T - 4 * min_seg + 1):
402
+ for t2 in range(t1 + min_seg, T - 3 * min_seg + 1):
403
+ for t3 in range(t2 + min_seg, T - 2 * min_seg + 1):
404
+ for t4 in range(t3 + min_seg, T - min_seg + 1):
405
+ yield (t1, t2, t3, t4)
406
+ else:
407
+ raise ValueError(f"m_star must be 1-4, got {m_star}")
408
+
409
+
410
+ def _generate_break_dates_fast(T, m_star, epsilon, step=1):
411
+ """
412
+ Generate break date partitions with optional step for speed.
413
+
414
+ Same as _generate_break_dates but with configurable step size.
415
+ """
416
+ min_seg = max(int(np.ceil(epsilon * T)), 2)
417
+
418
+ if m_star == 1:
419
+ for t1 in range(min_seg, T - min_seg + 1, step):
420
+ yield (t1,)
421
+ elif m_star == 2:
422
+ for t1 in range(min_seg, T - 2 * min_seg + 1, step):
423
+ for t2 in range(t1 + min_seg, T - min_seg + 1, step):
424
+ yield (t1, t2)
425
+ elif m_star == 3:
426
+ for t1 in range(min_seg, T - 3 * min_seg + 1, step):
427
+ for t2 in range(t1 + min_seg, T - 2 * min_seg + 1, step):
428
+ for t3 in range(t2 + min_seg, T - min_seg + 1, step):
429
+ yield (t1, t2, t3)
430
+ elif m_star == 4:
431
+ for t1 in range(min_seg, T - 4 * min_seg + 1, step):
432
+ for t2 in range(t1 + min_seg, T - 3 * min_seg + 1, step):
433
+ for t3 in range(t2 + min_seg, T - 2 * min_seg + 1, step):
434
+ for t4 in range(t3 + min_seg, T - min_seg + 1, step):
435
+ yield (t1, t2, t3, t4)
436
+ else:
437
+ raise ValueError(f"m_star must be 1-4, got {m_star}")
438
+
439
+
440
+ # ============================================================================
441
+ # Main test function
442
+ # ============================================================================
443
+
444
+ def mr_test(y, X, model="drift", max_breaks=4, epsilon=0.15,
445
+ p=None, max_p=12, step=1, verbose=False):
446
+ """
447
+ Martins & Rodrigues (2022) Wald-type tests for segmented cointegration.
448
+
449
+ Computes residual-based sup-Wald-type test statistics for detecting
450
+ segmented cointegration with multiple structural breaks.
451
+
452
+ The null hypothesis is H_0: no cointegration over the entire sample.
453
+ The alternative allows m breaks with consecutive switches between
454
+ stationarity and nonstationarity.
455
+
456
+ Parameters
457
+ ----------
458
+ y : array_like, shape (T,)
459
+ Dependent variable.
460
+ X : array_like, shape (T,) or (T, K)
461
+ Regressor(s).
462
+ model : str
463
+ Deterministic specification for the cointegrating regression:
464
+ 'none', 'drift', 'trend'.
465
+ max_breaks : int
466
+ Maximum number of breaks to consider (m_bar). Default 4.
467
+ epsilon : float
468
+ Trimming parameter. Default 0.15 as in the paper.
469
+ p : int or None
470
+ Lag order for ADF augmented terms. If None, selected by BIC.
471
+ max_p : int
472
+ Maximum lag order for BIC selection.
473
+ step : int
474
+ Step size for grid search over break dates.
475
+ Use step > 1 for faster computation with large samples.
476
+ verbose : bool
477
+ If True, print progress.
478
+
479
+ Returns
480
+ -------
481
+ results : MRTestResult
482
+ Object containing W(m*), W_max statistics, critical values,
483
+ break date estimates, and other information.
484
+ """
485
+ y = np.asarray(y, dtype=np.float64).ravel()
486
+ X = np.asarray(X, dtype=np.float64)
487
+ if X.ndim == 1:
488
+ X = X.reshape(-1, 1)
489
+ T = len(y)
490
+ K = X.shape[1]
491
+ K_plus_1 = K + 1
492
+
493
+ # Step 1: Estimate cointegrating regression on full sample
494
+ e, beta_hat = ols_residuals(y, X, model=model)
495
+
496
+ # Step 2: Select lag order
497
+ if p is None:
498
+ p = select_lag_bic(e, max_p=max_p)
499
+
500
+ # Step 3: Compute test statistics for each m*
501
+ W_stats = {}
502
+ sup_FA_stats = {}
503
+ sup_FB_stats = {}
504
+ best_breaks = {}
505
+
506
+ for m_star in range(1, max_breaks + 1):
507
+ if verbose:
508
+ print(f"Computing W({m_star})...")
509
+
510
+ best_FA = -np.inf
511
+ best_FA_breaks = None
512
+ best_FB = -np.inf
513
+ best_FB_breaks = None
514
+
515
+ for breaks in _generate_break_dates_fast(T, m_star, epsilon, step):
516
+ # F_A: first regime is I(1)
517
+ FA = _compute_F_statistic(e, breaks, m_star, "A", p)
518
+ if np.isfinite(FA) and FA > best_FA:
519
+ best_FA = FA
520
+ best_FA_breaks = breaks
521
+
522
+ # F_B: first regime is I(0)
523
+ FB = _compute_F_statistic(e, breaks, m_star, "B", p)
524
+ if np.isfinite(FB) and FB > best_FB:
525
+ best_FB = FB
526
+ best_FB_breaks = breaks
527
+
528
+ sup_FA_stats[m_star] = best_FA
529
+ sup_FB_stats[m_star] = best_FB
530
+
531
+ # W(m*) = max(sup F_A(m*), sup F_B(m*)) (Eq. 3.4)
532
+ W_m = max(best_FA, best_FB)
533
+ W_stats[m_star] = W_m
534
+
535
+ # Determine which hypothesis and breaks correspond to W(m*)
536
+ if best_FA >= best_FB:
537
+ best_breaks[m_star] = {
538
+ "hypothesis": "A",
539
+ "breaks": best_FA_breaks,
540
+ "fractions": tuple(
541
+ b / T for b in best_FA_breaks) if best_FA_breaks else None,
542
+ }
543
+ else:
544
+ best_breaks[m_star] = {
545
+ "hypothesis": "B",
546
+ "breaks": best_FB_breaks,
547
+ "fractions": tuple(
548
+ b / T for b in best_FB_breaks) if best_FB_breaks else None,
549
+ }
550
+
551
+ # W_max = max_{1<=m<=m_bar} W(m) (Eq. 3.5)
552
+ W_max = max(W_stats.values()) if W_stats else np.nan
553
+ W_max_m = max(W_stats, key=W_stats.get) if W_stats else None
554
+
555
+ # Collect critical values
556
+ cvs = {}
557
+ for m_star in range(1, max_breaks + 1):
558
+ try:
559
+ cvs[m_star] = {
560
+ alpha: get_mr_critical_value(
561
+ K_plus_1, alpha, m_star, model=model)
562
+ for alpha in _SIG_LEVELS
563
+ }
564
+ except (ValueError, KeyError):
565
+ cvs[m_star] = {}
566
+ try:
567
+ cvs["max"] = {
568
+ alpha: get_mr_critical_value(
569
+ K_plus_1, alpha, "max", model=model)
570
+ for alpha in _SIG_LEVELS
571
+ }
572
+ except (ValueError, KeyError):
573
+ cvs["max"] = {}
574
+
575
+ return MRTestResult(
576
+ W_stats=W_stats,
577
+ W_max=W_max,
578
+ W_max_m=W_max_m,
579
+ sup_FA=sup_FA_stats,
580
+ sup_FB=sup_FB_stats,
581
+ best_breaks=best_breaks,
582
+ critical_values=cvs,
583
+ model=model,
584
+ K_plus_1=K_plus_1,
585
+ T=T,
586
+ epsilon=epsilon,
587
+ max_breaks=max_breaks,
588
+ lag_order=p,
589
+ beta_hat=beta_hat,
590
+ residuals=e,
591
+ )
592
+
593
+
594
+ # ============================================================================
595
+ # Result class
596
+ # ============================================================================
597
+
598
+ class MRTestResult:
599
+ """
600
+ Container for Martins & Rodrigues (2022) test results.
601
+
602
+ Attributes
603
+ ----------
604
+ W_stats : dict
605
+ W(m*) statistics for m* = 1, ..., max_breaks.
606
+ W_max : float
607
+ W_max double maximum statistic.
608
+ W_max_m : int
609
+ Number of breaks corresponding to W_max.
610
+ sup_FA : dict
611
+ sup F_A(m*) statistics.
612
+ sup_FB : dict
613
+ sup F_B(m*) statistics.
614
+ best_breaks : dict
615
+ Best break date information for each m*.
616
+ critical_values : dict
617
+ Critical values from Table 1.
618
+ model : str
619
+ Deterministic specification.
620
+ K_plus_1 : int
621
+ Total number of variables.
622
+ T : int
623
+ Sample size.
624
+ epsilon : float
625
+ Trimming parameter.
626
+ max_breaks : int
627
+ Maximum number of breaks considered.
628
+ lag_order : int
629
+ ADF lag order used.
630
+ beta_hat : ndarray
631
+ Estimated cointegrating vector.
632
+ residuals : ndarray
633
+ Full-sample OLS residuals.
634
+ """
635
+
636
+ def __init__(self, W_stats, W_max, W_max_m, sup_FA, sup_FB,
637
+ best_breaks, critical_values, model, K_plus_1, T,
638
+ epsilon, max_breaks, lag_order, beta_hat, residuals):
639
+ self.W_stats = W_stats
640
+ self.W_max = W_max
641
+ self.W_max_m = W_max_m
642
+ self.sup_FA = sup_FA
643
+ self.sup_FB = sup_FB
644
+ self.best_breaks = best_breaks
645
+ self.critical_values = critical_values
646
+ self.model = model
647
+ self.K_plus_1 = K_plus_1
648
+ self.T = T
649
+ self.epsilon = epsilon
650
+ self.max_breaks = max_breaks
651
+ self.lag_order = lag_order
652
+ self.beta_hat = beta_hat
653
+ self.residuals = residuals
654
+
655
+ def significant(self, m_star="max", alpha=0.05):
656
+ """
657
+ Check if the test rejects H_0 at the given level.
658
+
659
+ Parameters
660
+ ----------
661
+ m_star : int or str
662
+ Number of breaks (1-4) or 'max'.
663
+ alpha : float
664
+ Significance level.
665
+
666
+ Returns
667
+ -------
668
+ reject : bool or None
669
+ """
670
+ if m_star == "max":
671
+ stat_val = self.W_max
672
+ cv = self.critical_values.get("max", {}).get(alpha)
673
+ else:
674
+ stat_val = self.W_stats.get(m_star)
675
+ cv = self.critical_values.get(m_star, {}).get(alpha)
676
+
677
+ if cv is None or stat_val is None:
678
+ return None
679
+ return stat_val > cv
680
+
681
+ def estimated_breaks(self, m_star=None):
682
+ """
683
+ Return estimated break dates and fractions.
684
+
685
+ Parameters
686
+ ----------
687
+ m_star : int or None
688
+ Number of breaks. If None, uses W_max_m.
689
+
690
+ Returns
691
+ -------
692
+ info : dict
693
+ """
694
+ if m_star is None:
695
+ m_star = self.W_max_m
696
+ return self.best_breaks.get(m_star, {})
697
+
698
+ def summary(self):
699
+ """
700
+ Produce a formatted summary string suitable for publication.
701
+
702
+ Returns
703
+ -------
704
+ s : str
705
+ """
706
+ model_labels = {"none": "No deterministics",
707
+ "drift": "Intercept only",
708
+ "trend": "Intercept and time trend"}
709
+
710
+ lines = []
711
+ lines.append("=" * 78)
712
+ lines.append("Martins & Rodrigues (2022) Wald-Type Tests for Segmented Cointegration")
713
+ lines.append("=" * 78)
714
+ lines.append(f"Model: {model_labels.get(self.model, self.model)}")
715
+ lines.append(f"Sample size (T): {self.T}")
716
+ lines.append(f"Variables (K+1): {self.K_plus_1}")
717
+ lines.append(f"Trimming (eps): {self.epsilon:.2f}")
718
+ lines.append(f"Max breaks: {self.max_breaks}")
719
+ lines.append(f"ADF lag order: {self.lag_order}")
720
+ lines.append("")
721
+
722
+ # W(m*) statistics
723
+ lines.append("-" * 78)
724
+ lines.append(f"{'Test':<10} {'Statistic':>12} "
725
+ f"{'10% CV':>10} {'5% CV':>10} {'1% CV':>10} "
726
+ f"{'Reject 5%':>10}")
727
+ lines.append("-" * 78)
728
+
729
+ for m_star in range(1, self.max_breaks + 1):
730
+ stat = self.W_stats.get(m_star, np.nan)
731
+ cv10 = self.critical_values.get(m_star, {}).get(0.10, np.nan)
732
+ cv05 = self.critical_values.get(m_star, {}).get(0.05, np.nan)
733
+ cv01 = self.critical_values.get(m_star, {}).get(0.01, np.nan)
734
+ rej = self.significant(m_star, 0.05)
735
+ rej_str = "Yes**" if rej else ("No" if rej is not None else "N/A")
736
+
737
+ lines.append(
738
+ f"W({m_star}) {stat:>12.4f} "
739
+ f"{cv10:>10.3f} {cv05:>10.3f} {cv01:>10.3f} "
740
+ f"{rej_str:>10}")
741
+
742
+ # W_max
743
+ stat = self.W_max
744
+ cv10 = self.critical_values.get("max", {}).get(0.10, np.nan)
745
+ cv05 = self.critical_values.get("max", {}).get(0.05, np.nan)
746
+ cv01 = self.critical_values.get("max", {}).get(0.01, np.nan)
747
+ rej = self.significant("max", 0.05)
748
+ rej_str = "Yes**" if rej else ("No" if rej is not None else "N/A")
749
+
750
+ lines.append(
751
+ f"W_max {stat:>12.4f} "
752
+ f"{cv10:>10.3f} {cv05:>10.3f} {cv01:>10.3f} "
753
+ f"{rej_str:>10}")
754
+
755
+ lines.append("-" * 78)
756
+
757
+ # Break date estimates
758
+ lines.append("")
759
+ lines.append("Estimated break dates (for W_max):")
760
+ m_opt = self.W_max_m
761
+ if m_opt is not None:
762
+ info = self.best_breaks.get(m_opt, {})
763
+ hyp = info.get("hypothesis", "N/A")
764
+ brk = info.get("breaks", ())
765
+ frac = info.get("fractions", ())
766
+
767
+ r1_label = "I(1)" if hyp == "A" else "I(0)"
768
+ lines.append(f" Number of breaks: {m_opt}")
769
+ lines.append(f" First regime: {r1_label} (H1{hyp})")
770
+ if brk:
771
+ lines.append(f" Break dates: {brk}")
772
+ lines.append(f" Break fractions: "
773
+ f"{tuple(round(f, 4) for f in frac)}")
774
+
775
+ lines.append("")
776
+ lines.append("Notes: W(m*) = max(sup F_A(m*), sup F_B(m*)).")
777
+ lines.append(" W_max = max_{1<=m<=m_bar} W(m).")
778
+ lines.append(" Critical values from Martins & Rodrigues (2022, Table 1).")
779
+ lines.append("=" * 78)
780
+ return "\n".join(lines)
781
+
782
+ def __repr__(self):
783
+ return self.summary()