segmcoint 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
segmcoint/kim2003.py ADDED
@@ -0,0 +1,672 @@
1
+ """
2
+ Segmented cointegration tests from Kim (2003).
3
+
4
+ Implements the inference procedures for segmented cointegration proposed in:
5
+
6
+ Kim, J.-Y. (2003). Inference on Segmented Cointegration.
7
+ Econometric Theory, 19, 620-639.
8
+
9
+ This module provides:
10
+ - Phillips-Perron-Ouliaris type tests: Z_rho(C_T) and Z_t(C_T)
11
+ - Augmented Dickey-Fuller type tests: ADF_rho(C_T) and ADF_t(C_T)
12
+ - Infimum test statistics: Z*_rho, Z*_t, ADF*_rho, ADF*_t
13
+ - Extremum estimator for the noncointegration period
14
+ - Critical value tables (Tables 1 and 2 from the paper)
15
+
16
+ Model Specifications (Kim 2003, Eq. 2.1a-2.1c):
17
+ Case I (model='none'): x_{1t} = beta' x_{2t} + eps_t
18
+ Case II (model='drift'): x_{1t} = alpha + beta' x_{2t} + eps_t
19
+ Case III (model='trend'): x_{1t} = alpha + gamma*t + beta' x_{2t} + eps_t
20
+ """
21
+
22
+ import numpy as np
23
+ import warnings
24
+ from numpy.linalg import inv
25
+ from .utils import (
26
+ ols_residuals,
27
+ ar1_regression,
28
+ adf_regression,
29
+ newey_west_lrv,
30
+ select_lag_bic,
31
+ )
32
+
33
+ # ============================================================================
34
+ # Asymptotic critical values from Kim (2003, Tables 1 and 2)
35
+ # Columns: 0.01, 0.025, 0.05, 0.10, 0.15, 0.95, 0.975, 0.99
36
+ # ============================================================================
37
+
38
+ # Table 1: Critical values of Z*_rho(C) and ADF*_rho(C)
39
+ # ell_bar(T_N) = 0.3
40
+ _CV_Zp_CASE_I = {
41
+ 1: [-13.00, -10.16, -8.18, -5.68, -4.57, 1.29, 1.64, 2.12],
42
+ 2: [-37.20, -32.30, -27.90, -23.70, -20.82, -2.40, -1.29, -0.56],
43
+ 3: [-46.63, -41.05, -36.41, -31.61, -28.74, -7.54, -5.81, -4.11],
44
+ 4: [-55.89, -48.97, -44.59, -39.43, -36.36, -12.73, -10.85, -9.22],
45
+ 5: [-63.94, -58.40, -52.79, -47.78, -44.59, -17.92, -15.86, -13.75],
46
+ 6: [-70.39, -64.30, -59.83, -54.22, -51.02, -22.47, -20.60, -18.42],
47
+ }
48
+
49
+ _CV_Zp_CASE_II = {
50
+ 1: [-20.15, -16.73, -13.96, -11.37, -9.59, -0.13, 0.50, 1.21],
51
+ 2: [-87.37, -64.30, -50.75, -39.58, -34.02, -7.77, -6.19, -4.79],
52
+ 3: [-105.48, -84.12, -65.53, -51.53, -44.82, -13.57, -11.25, -9.21],
53
+ 4: [-122.91, -96.68, -78.66, -61.95, -54.24, -18.65, -16.37, -13.77],
54
+ 5: [-130.86, -106.23, -87.07, -69.28, -60.56, -23.79, -21.33, -18.94],
55
+ 6: [-134.72, -109.84, -89.89, -73.65, -66.98, -28.72, -25.86, -22.50],
56
+ }
57
+
58
+ _CV_Zp_CASE_III = {
59
+ 1: [-29.17, -24.90, -21.55, -18.22, -16.03, -2.60, -1.67, -0.75],
60
+ 2: [-107.65, -80.24, -62.54, -46.59, -38.44, -8.26, -6.42, -4.39],
61
+ 3: [-131.66, -101.33, -79.34, -58.96, -49.43, -14.13, -12.05, -9.81],
62
+ 4: [-135.59, -109.94, -87.87, -68.31, -59.07, -19.65, -17.64, -15.20],
63
+ 5: [-140.29, -115.59, -96.57, -74.32, -65.16, -24.78, -22.25, -19.61],
64
+ 6: [-144.09, -118.83, -98.31, -78.46, -70.02, -28.98, -26.27, -23.71],
65
+ }
66
+
67
+ # Table 2: Critical values of Z*_t(C) and ADF*_t(C)
68
+ _CV_Zt_CASE_I = {
69
+ 1: [-2.51, -2.20, -1.96, -1.61, -1.43, 1.29, 1.70, 2.17],
70
+ 2: [-4.23, -3.95, -3.65, -3.34, -3.15, -0.88, -0.54, -0.23],
71
+ 3: [-4.88, -4.49, -4.23, -3.92, -3.73, -1.74, -1.46, -1.15],
72
+ 4: [-5.26, -4.89, -4.68, -4.38, -4.19, -2.32, -2.10, -1.84],
73
+ 5: [-5.59, -5.33, -5.09, -4.86, -4.70, -2.84, -2.62, -2.33],
74
+ 6: [-5.89, -5.68, -5.45, -5.16, -5.00, -3.24, -3.05, -2.81],
75
+ }
76
+
77
+ _CV_Zt_CASE_II = {
78
+ 1: [-3.49, -3.12, -2.88, -2.58, -2.37, -0.06, 0.31, 0.66],
79
+ 2: [-8.84, -7.43, -6.36, -5.31, -4.71, -1.91, -1.62, -1.28],
80
+ 3: [-10.05, -8.68, -7.38, -6.22, -5.47, -2.53, -2.28, -2.01],
81
+ 4: [-11.04, -9.51, -8.25, -6.86, -6.05, -3.03, -2.78, -2.51],
82
+ 5: [-11.33, -10.03, -8.70, -7.21, -6.35, -3.43, -3.20, -2.93],
83
+ 6: [-11.78, -10.14, -8.76, -7.33, -6.56, -3.78, -3.58, -3.37],
84
+ }
85
+
86
+ _CV_Zt_CASE_III = {
87
+ 1: [-3.92, -3.66, -3.40, -3.12, -2.94, -0.90, -0.62, -0.30],
88
+ 2: [-10.42, -8.78, -7.74, -6.32, -5.52, -1.96, -1.67, -1.27],
89
+ 3: [-11.68, -9.98, -8.62, -7.23, -6.20, -2.61, -2.27, -1.91],
90
+ 4: [-11.90, -10.37, -9.13, -7.57, -6.62, -3.10, -2.87, -2.61],
91
+ 5: [-12.32, -10.80, -9.52, -7.83, -6.86, -3.51, -3.30, -3.05],
92
+ 6: [-12.45, -10.98, -9.53, -7.79, -6.91, -3.84, -3.62, -3.38],
93
+ }
94
+
95
+ # Mapping percentiles to column indices
96
+ _CV_PERCENTILES = [0.01, 0.025, 0.05, 0.10, 0.15, 0.95, 0.975, 0.99]
97
+
98
+
99
+ def _get_cv_table(stat_type, model):
100
+ """Get the appropriate critical value table."""
101
+ if stat_type in ("Zp", "ADFp"):
102
+ tables = {"none": _CV_Zp_CASE_I, "drift": _CV_Zp_CASE_II,
103
+ "trend": _CV_Zp_CASE_III}
104
+ elif stat_type in ("Zt", "ADFt"):
105
+ tables = {"none": _CV_Zt_CASE_I, "drift": _CV_Zt_CASE_II,
106
+ "trend": _CV_Zt_CASE_III}
107
+ else:
108
+ raise ValueError(f"Unknown stat_type: {stat_type}")
109
+ return tables.get(model)
110
+
111
+
112
+ def get_critical_value(n, alpha, stat_type="Zt", model="drift"):
113
+ """
114
+ Look up asymptotic critical value from Kim (2003, Tables 1-2).
115
+
116
+ These are for ell_bar(T_N) = 0.3 as reported in the paper.
117
+
118
+ Parameters
119
+ ----------
120
+ n : int
121
+ Number of variables in the cointegration regression (1 for
122
+ univariate, 2 for bivariate, etc.).
123
+ alpha : float
124
+ Significance level (one of 0.01, 0.025, 0.05, 0.10, 0.15).
125
+ stat_type : str
126
+ Test statistic type: 'Zp', 'Zt', 'ADFp', 'ADFt'.
127
+ model : str
128
+ Model specification: 'none' (Case I), 'drift' (Case II),
129
+ 'trend' (Case III).
130
+
131
+ Returns
132
+ -------
133
+ cv : float
134
+ Critical value at the specified significance level.
135
+ """
136
+ table = _get_cv_table(stat_type, model)
137
+ if table is None:
138
+ raise ValueError(f"No table for model={model}")
139
+ if n not in table:
140
+ raise ValueError(f"n={n} not in table. Available: {list(table.keys())}")
141
+ if alpha not in _CV_PERCENTILES:
142
+ raise ValueError(
143
+ f"alpha={alpha} not available. Use one of {_CV_PERCENTILES}")
144
+ idx = _CV_PERCENTILES.index(alpha)
145
+ return table[n][idx]
146
+
147
+
148
+ # ============================================================================
149
+ # Phillips-Perron-Ouliaris type statistics: Z_rho(C_T) and Z_t(C_T)
150
+ # Kim (2003, Eqs. 3.3 and 3.4)
151
+ # ============================================================================
152
+
153
+ def _compute_Zp_Zt(e, weighted, q=None):
154
+ """
155
+ Compute Z_rho and Z_t statistics for a given segmentation.
156
+
157
+ Implements Kim (2003, Eqs. 3.3) and (3.4):
158
+ Z_rho(C_T) = T_C * (rho_hat - 1)
159
+ - 0.5*(T_C^2 * sigma_rho^2 / s^2) * (lambda^2 - gamma_0)
160
+ Z_t(C_T) = (gamma_0/lambda^2)^{1/2} * t(C_T)
161
+ - {(lambda^2 - gamma_0)/(2*lambda)} * {T_C * sigma_rho / s}
162
+
163
+ Parameters
164
+ ----------
165
+ e : ndarray
166
+ Full residual series.
167
+ weighted : ndarray
168
+ Weight series w_t(C_T).
169
+ q : int or None
170
+ Bandwidth for long-run variance estimator.
171
+
172
+ Returns
173
+ -------
174
+ Z_rho : float
175
+ Z_t : float
176
+ """
177
+ T = len(e)
178
+ T_C = int(np.sum(weighted > 0))
179
+
180
+ # AR(1) regression with weights
181
+ rho_hat, s_sq, sigma_rho_sq, _ = ar1_regression(e, weighted=weighted)
182
+
183
+ # t-statistic
184
+ if sigma_rho_sq > 0:
185
+ t_stat = (rho_hat - 1.0) / np.sqrt(sigma_rho_sq)
186
+ else:
187
+ t_stat = 0.0
188
+
189
+ # Compute residuals for long-run variance: v_hat = w_t * (e_t - rho_hat * e_{t-1})
190
+ v_hat = weighted[1:] * (e[1:] - rho_hat * e[:-1])
191
+ # Only keep observations where weight > 0
192
+ v_active = v_hat[weighted[1:] > 0]
193
+
194
+ if len(v_active) < 2:
195
+ return np.nan, np.nan
196
+
197
+ lambda_sq, gamma_0 = newey_west_lrv(v_active, q=q)
198
+
199
+ if lambda_sq <= 0 or s_sq <= 0:
200
+ return np.nan, np.nan
201
+
202
+ # Z_rho (Eq. 3.3)
203
+ Z_rho = T_C * (rho_hat - 1.0) - 0.5 * (
204
+ T_C ** 2 * sigma_rho_sq / s_sq) * (lambda_sq - gamma_0)
205
+
206
+ # Z_t (Eq. 3.4)
207
+ lambda_hat = np.sqrt(lambda_sq)
208
+ gamma_0_sqrt = np.sqrt(gamma_0)
209
+
210
+ Z_t = (gamma_0_sqrt / lambda_hat) * t_stat - (
211
+ (lambda_sq - gamma_0) / (2.0 * lambda_hat)) * (
212
+ T_C * np.sqrt(sigma_rho_sq) / np.sqrt(s_sq))
213
+
214
+ return Z_rho, Z_t
215
+
216
+
217
+ # ============================================================================
218
+ # ADF type statistics: ADF_rho(C_T) and ADF_t(C_T)
219
+ # Kim (2003, Eqs. 3.6 and 3.7)
220
+ # ============================================================================
221
+
222
+ def _compute_ADF(e, weighted, p=1):
223
+ """
224
+ Compute ADF_rho and ADF_t statistics for a given segmentation.
225
+
226
+ Implements Kim (2003, Eqs. 3.6) and (3.7):
227
+ ADF_rho(C_T) = T_C * (lambda_tilde / sigma_epsilon) * (rho_tilde - 1)
228
+ ADF_t(C_T) = t_tilde_T
229
+
230
+ where lambda_tilde/sigma_epsilon = (1 - zeta_1 - ... - zeta_{p-1})^{-1}
231
+
232
+ Parameters
233
+ ----------
234
+ e : ndarray
235
+ Full residual series.
236
+ weighted : ndarray
237
+ Weight series w_t(C_T).
238
+ p : int
239
+ Lag order for the augmented regression.
240
+
241
+ Returns
242
+ -------
243
+ ADF_rho : float
244
+ ADF_t : float
245
+ """
246
+ T_C = int(np.sum(weighted > 0))
247
+
248
+ rho_hat, t_stat, sigma_e, zeta_hat, _ = adf_regression(
249
+ e, p=p, weighted=weighted)
250
+
251
+ # lambda_tilde / sigma_epsilon (Eq. below 3.7)
252
+ if len(zeta_hat) > 0:
253
+ denom = 1.0 - np.sum(zeta_hat)
254
+ if abs(denom) < 1e-10:
255
+ return np.nan, np.nan
256
+ lambda_ratio = 1.0 / denom
257
+ else:
258
+ lambda_ratio = 1.0
259
+
260
+ # ADF_rho (Eq. 3.6)
261
+ ADF_rho = T_C * lambda_ratio * (rho_hat - 1.0)
262
+
263
+ # ADF_t (Eq. 3.7)
264
+ ADF_t = t_stat
265
+
266
+ return ADF_rho, ADF_t
267
+
268
+
269
+ # ============================================================================
270
+ # Segmented cointegration test: infimum statistics
271
+ # Kim (2003, Eqs. 3.13 and 3.14)
272
+ # ============================================================================
273
+
274
+ def kim_test(y, X, model="drift", max_ell=0.3, step=1,
275
+ q=None, p=None, max_p=12, stat_types=("Zp", "Zt", "ADFp", "ADFt"),
276
+ verbose=False):
277
+ """
278
+ Kim (2003) tests for segmented cointegration.
279
+
280
+ Searches over all possible segmentations {N_T} and computes the infimum
281
+ of the test statistics Z_rho(C_T), Z_t(C_T), ADF_rho(C_T), ADF_t(C_T)
282
+ over these segmentations, as described in Kim (2003, Section 3.1).
283
+
284
+ The null hypothesis is H_0: rho = 1 for all t (no cointegration).
285
+ The alternative is H_1: segmented cointegration where rho < 1 in C_T
286
+ and rho = 1 in N_T.
287
+
288
+ Parameters
289
+ ----------
290
+ y : array_like, shape (T,)
291
+ Dependent variable.
292
+ X : array_like, shape (T,) or (T, K)
293
+ Regressor(s).
294
+ model : str
295
+ Deterministic specification: 'none' (Case I), 'drift' (Case II),
296
+ 'trend' (Case III).
297
+ max_ell : float
298
+ Upper bound for the length of the noncointegration period as a
299
+ fraction of T. Denoted ell_bar(T_N) in Kim (2003).
300
+ Critical values in Tables 1-2 are for max_ell = 0.3.
301
+ step : int
302
+ Step size for searching over segmentations (in observations).
303
+ q : int or None
304
+ Bandwidth for long-run variance estimator. If None, uses automatic
305
+ selection.
306
+ p : int or None
307
+ Lag order for ADF statistics. If None, selected by BIC.
308
+ max_p : int
309
+ Maximum lag order for BIC selection.
310
+ stat_types : tuple of str
311
+ Which statistics to compute: any subset of
312
+ ('Zp', 'Zt', 'ADFp', 'ADFt').
313
+ verbose : bool
314
+ If True, print progress information.
315
+
316
+ Returns
317
+ -------
318
+ results : KimTestResult
319
+ Object containing test statistics, critical values, break dates,
320
+ and other information.
321
+ """
322
+ y = np.asarray(y, dtype=np.float64).ravel()
323
+ X = np.asarray(X, dtype=np.float64)
324
+ if X.ndim == 1:
325
+ X = X.reshape(-1, 1)
326
+ T = len(y)
327
+ n = X.shape[1] + 1 # Number of variables in cointegration regression
328
+
329
+ # Step 1: Estimate cointegrating regression on full sample
330
+ e_full, beta_hat = ols_residuals(y, X, model=model)
331
+
332
+ # Step 2: Select lag order if not provided
333
+ if p is None:
334
+ p = select_lag_bic(e_full, max_p=max_p)
335
+
336
+ # Step 3: Search over all possible segmentations
337
+ max_len = int(max_ell * T)
338
+ min_k0 = 0 # N_T can start at the beginning
339
+ max_k1 = T # N_T can extend to the end
340
+
341
+ results_dict = {s: {"stat": np.inf, "k0": None, "k1": None}
342
+ for s in stat_types}
343
+ all_stats = {s: [] for s in stat_types}
344
+
345
+ n_searched = 0
346
+
347
+ for ell_N in range(1, max_len + 1, step):
348
+ for k0 in range(0, T - ell_N + 1, step):
349
+ k1 = k0 + ell_N
350
+ if k1 > T:
351
+ continue
352
+
353
+ # Ensure C_T has at least n observations (Assumption 2)
354
+ T_C = T - ell_N
355
+ if T_C < n:
356
+ continue
357
+
358
+ # Construct weight vector: w_t = 1 for t in C_T, 0 for t in N_T
359
+ # N_T = {k0+1, ..., k1} (1-indexed), or indices k0 to k1-1 (0-indexed)
360
+ w = np.ones(T)
361
+ w[k0:k1] = 0.0
362
+
363
+ n_searched += 1
364
+
365
+ # Compute statistics
366
+ if "Zp" in stat_types or "Zt" in stat_types:
367
+ Zp_val, Zt_val = _compute_Zp_Zt(e_full, w, q=q)
368
+
369
+ if "Zp" in stat_types and np.isfinite(Zp_val):
370
+ all_stats["Zp"].append((Zp_val, k0, k1))
371
+ if Zp_val < results_dict["Zp"]["stat"]:
372
+ results_dict["Zp"] = {"stat": Zp_val, "k0": k0, "k1": k1}
373
+
374
+ if "Zt" in stat_types and np.isfinite(Zt_val):
375
+ all_stats["Zt"].append((Zt_val, k0, k1))
376
+ if Zt_val < results_dict["Zt"]["stat"]:
377
+ results_dict["Zt"] = {"stat": Zt_val, "k0": k0, "k1": k1}
378
+
379
+ if "ADFp" in stat_types or "ADFt" in stat_types:
380
+ try:
381
+ ADFp_val, ADFt_val = _compute_ADF(e_full, w, p=p)
382
+ except (ValueError, np.linalg.LinAlgError):
383
+ ADFp_val, ADFt_val = np.nan, np.nan
384
+
385
+ if "ADFp" in stat_types and np.isfinite(ADFp_val):
386
+ all_stats["ADFp"].append((ADFp_val, k0, k1))
387
+ if ADFp_val < results_dict["ADFp"]["stat"]:
388
+ results_dict["ADFp"] = {
389
+ "stat": ADFp_val, "k0": k0, "k1": k1}
390
+
391
+ if "ADFt" in stat_types and np.isfinite(ADFt_val):
392
+ all_stats["ADFt"].append((ADFt_val, k0, k1))
393
+ if ADFt_val < results_dict["ADFt"]["stat"]:
394
+ results_dict["ADFt"] = {
395
+ "stat": ADFt_val, "k0": k0, "k1": k1}
396
+
397
+ if verbose:
398
+ print(f"Searched {n_searched} segmentations.")
399
+
400
+ # Step 4: Compute standard (non-segmented) tests on full sample
401
+ w_full = np.ones(T)
402
+ Zp_full, Zt_full = _compute_Zp_Zt(e_full, w_full, q=q)
403
+ try:
404
+ ADFp_full, ADFt_full = _compute_ADF(e_full, w_full, p=p)
405
+ except (ValueError, np.linalg.LinAlgError):
406
+ ADFp_full, ADFt_full = np.nan, np.nan
407
+
408
+ full_sample_stats = {
409
+ "Zp": Zp_full, "Zt": Zt_full,
410
+ "ADFp": ADFp_full, "ADFt": ADFt_full
411
+ }
412
+
413
+ # Step 5: Collect critical values
414
+ cvs = {}
415
+ for s in stat_types:
416
+ try:
417
+ cvs[s] = {
418
+ alpha: get_critical_value(n, alpha, stat_type=s, model=model)
419
+ for alpha in [0.01, 0.025, 0.05, 0.10]
420
+ }
421
+ except (ValueError, KeyError):
422
+ cvs[s] = {}
423
+
424
+ return KimTestResult(
425
+ stat_types=stat_types,
426
+ infimum_stats={s: results_dict[s]["stat"] for s in stat_types},
427
+ break_k0={s: results_dict[s]["k0"] for s in stat_types},
428
+ break_k1={s: results_dict[s]["k1"] for s in stat_types},
429
+ full_sample_stats=full_sample_stats,
430
+ critical_values=cvs,
431
+ model=model,
432
+ n=n,
433
+ T=T,
434
+ max_ell=max_ell,
435
+ lag_order=p,
436
+ beta_hat=beta_hat,
437
+ residuals=e_full,
438
+ )
439
+
440
+
441
+ # ============================================================================
442
+ # Extremum estimator for the noncointegration period
443
+ # Kim (2003, Eqs. 3.16 and 3.17)
444
+ # ============================================================================
445
+
446
+ def kim_break_estimator(y, X, model="drift", max_ell=0.3, step=1):
447
+ """
448
+ Extremum estimator for the noncointegration period.
449
+
450
+ Implements the estimator from Kim (2003, Eq. 3.16-3.17):
451
+ Lambda_T(tau) = [((tau_1 - tau_0)T]^{-2} * sum_{t in N_T} e_t(C_T)^2
452
+ / [T_C^{-1} * sum_{t in C_T} e_t(C_T)^2]
453
+ tau_hat = argmax_{tau in T} Lambda_T(tau)
454
+
455
+ Parameters
456
+ ----------
457
+ y : array_like, shape (T,)
458
+ Dependent variable.
459
+ X : array_like, shape (T,) or (T, K)
460
+ Regressor(s).
461
+ model : str
462
+ Deterministic specification: 'none', 'drift', 'trend'.
463
+ max_ell : float
464
+ Maximum length of noncointegration period as fraction of T.
465
+ step : int
466
+ Step size for search.
467
+
468
+ Returns
469
+ -------
470
+ result : dict
471
+ Dictionary with keys:
472
+ - 'tau_hat': (tau_0_hat, tau_1_hat) estimated break fractions
473
+ - 'k0_hat': estimated start of noncointegration period (0-indexed)
474
+ - 'k1_hat': estimated end of noncointegration period (0-indexed)
475
+ - 'Lambda_max': maximum value of Lambda_T
476
+ """
477
+ y = np.asarray(y, dtype=np.float64).ravel()
478
+ X = np.asarray(X, dtype=np.float64)
479
+ if X.ndim == 1:
480
+ X = X.reshape(-1, 1)
481
+ T = len(y)
482
+ n = X.shape[1] + 1
483
+
484
+ max_len = int(max_ell * T)
485
+
486
+ best_Lambda = -np.inf
487
+ best_k0 = 0
488
+ best_k1 = 0
489
+
490
+ for ell_N in range(1, max_len + 1, step):
491
+ for k0 in range(0, T - ell_N + 1, step):
492
+ k1 = k0 + ell_N
493
+ T_C = T - ell_N
494
+ if T_C < n:
495
+ continue
496
+
497
+ # Construct weights for estimating beta from C_T only
498
+ w = np.ones(T)
499
+ w[k0:k1] = 0.0
500
+
501
+ # Estimate beta using weighted least squares on C_T
502
+ mask = w > 0
503
+ y_c = y[mask]
504
+ X_c = X[mask]
505
+ e_c_all, _ = ols_residuals(y_c, X_c, model=model)
506
+
507
+ # Compute full residuals using this beta estimate
508
+ _, beta_c = ols_residuals(y_c, X_c, model=model)
509
+ # Reconstruct residuals for ALL periods using C_T beta
510
+ if model == "none":
511
+ e_all = y - X @ beta_c
512
+ elif model == "drift":
513
+ e_all = y - np.column_stack([np.ones(T), X]) @ beta_c
514
+ elif model == "trend":
515
+ e_all = y - np.column_stack(
516
+ [np.ones(T), np.arange(1, T + 1), X]) @ beta_c
517
+
518
+ # Compute Lambda_T (Eq. 3.16)
519
+ e_N = e_all[k0:k1] # Residuals in N_T
520
+ e_C = e_all[mask] # Residuals in C_T
521
+
522
+ sum_sq_N = np.sum(e_N ** 2)
523
+ sum_sq_C = np.sum(e_C ** 2)
524
+
525
+ if sum_sq_C < 1e-15 or ell_N < 1:
526
+ continue
527
+
528
+ Lambda = (ell_N ** (-2) * sum_sq_N) / (T_C ** (-1) * sum_sq_C)
529
+
530
+ if Lambda > best_Lambda:
531
+ best_Lambda = Lambda
532
+ best_k0 = k0
533
+ best_k1 = k1
534
+
535
+ tau_0_hat = best_k0 / T
536
+ tau_1_hat = best_k1 / T
537
+
538
+ return {
539
+ "tau_hat": (tau_0_hat, tau_1_hat),
540
+ "k0_hat": best_k0,
541
+ "k1_hat": best_k1,
542
+ "Lambda_max": best_Lambda,
543
+ }
544
+
545
+
546
+ # ============================================================================
547
+ # Result class
548
+ # ============================================================================
549
+
550
+ class KimTestResult:
551
+ """
552
+ Container for Kim (2003) segmented cointegration test results.
553
+
554
+ Attributes
555
+ ----------
556
+ stat_types : tuple
557
+ Test statistic types computed.
558
+ infimum_stats : dict
559
+ Infimum statistics: Z*_rho, Z*_t, ADF*_rho, ADF*_t.
560
+ break_k0 : dict
561
+ Start of estimated noncointegration period for each statistic.
562
+ break_k1 : dict
563
+ End of estimated noncointegration period for each statistic.
564
+ full_sample_stats : dict
565
+ Full-sample (non-segmented) test statistics.
566
+ critical_values : dict
567
+ Critical values from Kim (2003, Tables 1-2).
568
+ model : str
569
+ Deterministic specification.
570
+ n : int
571
+ Number of variables.
572
+ T : int
573
+ Sample size.
574
+ max_ell : float
575
+ Maximum segmentation length.
576
+ lag_order : int
577
+ ADF lag order used.
578
+ beta_hat : ndarray
579
+ Estimated cointegrating vector.
580
+ residuals : ndarray
581
+ OLS residuals from full-sample regression.
582
+ """
583
+
584
+ def __init__(self, stat_types, infimum_stats, break_k0, break_k1,
585
+ full_sample_stats, critical_values, model, n, T, max_ell,
586
+ lag_order, beta_hat, residuals):
587
+ self.stat_types = stat_types
588
+ self.infimum_stats = infimum_stats
589
+ self.break_k0 = break_k0
590
+ self.break_k1 = break_k1
591
+ self.full_sample_stats = full_sample_stats
592
+ self.critical_values = critical_values
593
+ self.model = model
594
+ self.n = n
595
+ self.T = T
596
+ self.max_ell = max_ell
597
+ self.lag_order = lag_order
598
+ self.beta_hat = beta_hat
599
+ self.residuals = residuals
600
+
601
+ def significant(self, stat_type="Zt", alpha=0.05):
602
+ """Check if the infimum test rejects H_0 at the given level."""
603
+ cv = self.critical_values.get(stat_type, {}).get(alpha)
604
+ if cv is None:
605
+ return None
606
+ return self.infimum_stats[stat_type] < cv
607
+
608
+ def break_dates(self, stat_type="Zt"):
609
+ """Return estimated break dates (0-indexed) for a given statistic."""
610
+ return self.break_k0.get(stat_type), self.break_k1.get(stat_type)
611
+
612
+ def break_fractions(self, stat_type="Zt"):
613
+ """Return estimated break fractions tau_0, tau_1."""
614
+ k0 = self.break_k0.get(stat_type)
615
+ k1 = self.break_k1.get(stat_type)
616
+ if k0 is None or k1 is None:
617
+ return None, None
618
+ return k0 / self.T, k1 / self.T
619
+
620
+ def summary(self):
621
+ """
622
+ Produce a formatted summary string suitable for publication.
623
+
624
+ Returns
625
+ -------
626
+ s : str
627
+ """
628
+ model_labels = {"none": "Case I (no deterministics)",
629
+ "drift": "Case II (intercept)",
630
+ "trend": "Case III (intercept + trend)"}
631
+
632
+ lines = []
633
+ lines.append("=" * 72)
634
+ lines.append("Kim (2003) Segmented Cointegration Test Results")
635
+ lines.append("=" * 72)
636
+ lines.append(f"Model: {model_labels.get(self.model, self.model)}")
637
+ lines.append(f"Sample size (T): {self.T}")
638
+ lines.append(f"Variables (n): {self.n}")
639
+ lines.append(f"Max ell (T_N): {self.max_ell:.2f}")
640
+ lines.append(f"ADF lag order: {self.lag_order}")
641
+ lines.append("")
642
+ lines.append("-" * 72)
643
+ lines.append(f"{'Statistic':<12} {'Inf. Value':>12} {'Full Sample':>12}"
644
+ f" {'5% CV':>10} {'Reject H0':>10}"
645
+ f" {'tau_0':>8} {'tau_1':>8}")
646
+ lines.append("-" * 72)
647
+
648
+ for s in self.stat_types:
649
+ inf_val = self.infimum_stats.get(s, np.nan)
650
+ full_val = self.full_sample_stats.get(s, np.nan)
651
+ cv_05 = self.critical_values.get(s, {}).get(0.05, np.nan)
652
+ reject = self.significant(s, 0.05)
653
+ reject_str = "Yes***" if reject else ("No" if reject is not None else "N/A")
654
+ t0, t1 = self.break_fractions(s)
655
+ t0_str = f"{t0:.3f}" if t0 is not None else "N/A"
656
+ t1_str = f"{t1:.3f}" if t1 is not None else "N/A"
657
+
658
+ lines.append(
659
+ f"{s + '*':<12} {inf_val:>12.4f} {full_val:>12.4f}"
660
+ f" {cv_05:>10.4f} {reject_str:>10}"
661
+ f" {t0_str:>8} {t1_str:>8}")
662
+
663
+ lines.append("-" * 72)
664
+ lines.append("Notes: Infimum statistics are Z*_rho, Z*_t, ADF*_rho, ADF*_t")
665
+ lines.append(" from Kim (2003, Eqs. 3.13-3.14).")
666
+ lines.append(" Critical values from Tables 1-2 for ell_bar(T_N)=0.3.")
667
+ lines.append(" Reject H0 implies segmented cointegration detected.")
668
+ lines.append("=" * 72)
669
+ return "\n".join(lines)
670
+
671
+ def __repr__(self):
672
+ return self.summary()