segmcoint 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,333 @@
1
+ """
2
+ Monte Carlo simulation for generating critical values.
3
+
4
+ Implements the simulation procedures described in:
5
+ - Kim (2003, Section 3.1): Response surface method for asymptotic
6
+ critical values of Z*_rho, Z*_t, ADF*_rho, ADF*_t.
7
+ - Martins & Rodrigues (2022, Table 1): Critical values for W(m*) and
8
+ W_max statistics.
9
+
10
+ References
11
+ ----------
12
+ MacKinnon, J.G. (1991). Critical values for cointegration tests.
13
+ In Engle and Granger (eds.), Long-Run Economic Relationships.
14
+ """
15
+
16
+ import numpy as np
17
+ from numpy.linalg import inv
18
+ import warnings
19
+
20
+
21
+ def simulate_kim_critical_values(n, model="drift", max_ell=0.3,
22
+ T=500, n_reps=5000, seed=None):
23
+ """
24
+ Simulate critical values for Kim (2003) infimum test statistics.
25
+
26
+ Generates data under H_0 (no cointegration: all variables are
27
+ independent random walks), computes the infimum statistics
28
+ Z*_rho, Z*_t, ADF*_rho, ADF*_t, and returns the empirical
29
+ distribution quantiles.
30
+
31
+ Following Kim (2003, p.628), the response surface approach of
32
+ MacKinnon (1991) is used: critical values are computed for
33
+ multiple sample sizes and then extrapolated.
34
+
35
+ Parameters
36
+ ----------
37
+ n : int
38
+ Number of variables in the cointegration regression.
39
+ model : str
40
+ Model specification: 'none', 'drift', 'trend'.
41
+ max_ell : float
42
+ Maximum length of noncointegration period.
43
+ T : int
44
+ Sample size for simulation.
45
+ n_reps : int
46
+ Number of Monte Carlo replications.
47
+ seed : int or None
48
+ Random seed.
49
+
50
+ Returns
51
+ -------
52
+ results : dict
53
+ Dictionary with keys 'Zp', 'Zt', 'ADFp', 'ADFt', each containing
54
+ a dict of percentile -> critical value.
55
+ """
56
+ from .kim2003 import _compute_Zp_Zt, _compute_ADF
57
+ from .utils import ols_residuals, select_lag_bic
58
+
59
+ rng = np.random.default_rng(seed)
60
+
61
+ percentiles = [0.01, 0.025, 0.05, 0.10, 0.15, 0.95, 0.975, 0.99]
62
+
63
+ all_Zp = []
64
+ all_Zt = []
65
+ all_ADFp = []
66
+ all_ADFt = []
67
+
68
+ max_len = int(max_ell * T)
69
+
70
+ for rep in range(n_reps):
71
+ # Generate n independent random walks under H0
72
+ u = rng.normal(0, 1, size=(T, n))
73
+ data = np.cumsum(u, axis=0)
74
+
75
+ y = data[:, 0]
76
+ X = data[:, 1:] if n > 1 else rng.normal(0, 1, size=(T, 1))
77
+ X_rw = np.cumsum(X, axis=0) if n == 1 else X
78
+
79
+ # OLS residuals
80
+ try:
81
+ e, _ = ols_residuals(y, X_rw, model=model)
82
+ except np.linalg.LinAlgError:
83
+ continue
84
+
85
+ # Select lag order
86
+ p = min(select_lag_bic(e, max_p=4), 2)
87
+
88
+ # Search for infimum
89
+ best_Zp = np.inf
90
+ best_Zt = np.inf
91
+ best_ADFp = np.inf
92
+ best_ADFt = np.inf
93
+
94
+ for ell_N in range(1, max_len + 1, max(1, max_len // 20)):
95
+ for k0 in range(0, T - ell_N + 1, max(1, (T - ell_N) // 20)):
96
+ k1 = k0 + ell_N
97
+ T_C = T - ell_N
98
+ if T_C < n:
99
+ continue
100
+
101
+ w = np.ones(T)
102
+ w[k0:k1] = 0.0
103
+
104
+ Zp, Zt = _compute_Zp_Zt(e, w)
105
+ if np.isfinite(Zp) and Zp < best_Zp:
106
+ best_Zp = Zp
107
+ if np.isfinite(Zt) and Zt < best_Zt:
108
+ best_Zt = Zt
109
+
110
+ try:
111
+ ADFp, ADFt = _compute_ADF(e, w, p=p)
112
+ if np.isfinite(ADFp) and ADFp < best_ADFp:
113
+ best_ADFp = ADFp
114
+ if np.isfinite(ADFt) and ADFt < best_ADFt:
115
+ best_ADFt = ADFt
116
+ except (ValueError, np.linalg.LinAlgError):
117
+ pass
118
+
119
+ all_Zp.append(best_Zp)
120
+ all_Zt.append(best_Zt)
121
+ all_ADFp.append(best_ADFp)
122
+ all_ADFt.append(best_ADFt)
123
+
124
+ # Compute quantiles
125
+ results = {}
126
+ for name, values in [("Zp", all_Zp), ("Zt", all_Zt),
127
+ ("ADFp", all_ADFp), ("ADFt", all_ADFt)]:
128
+ arr = np.array([v for v in values if np.isfinite(v)])
129
+ if len(arr) > 0:
130
+ results[name] = {
131
+ p: np.percentile(arr, p * 100) for p in percentiles
132
+ }
133
+ else:
134
+ results[name] = {p: np.nan for p in percentiles}
135
+
136
+ return results
137
+
138
+
139
+ def simulate_mr_critical_values(K_plus_1, model="drift", max_breaks=4,
140
+ epsilon=0.15, T=1000, n_reps=5000,
141
+ seed=None):
142
+ """
143
+ Simulate critical values for Martins & Rodrigues (2022) Wald tests.
144
+
145
+ Generates data under H_0 (no cointegration), computes W(m*) for
146
+ m* = 1,...,max_breaks and W_max, and returns empirical quantiles.
147
+
148
+ Parameters
149
+ ----------
150
+ K_plus_1 : int
151
+ Total number of variables.
152
+ model : str
153
+ Model specification: 'none', 'drift', 'trend'.
154
+ max_breaks : int
155
+ Maximum number of breaks.
156
+ epsilon : float
157
+ Trimming parameter.
158
+ T : int
159
+ Sample size.
160
+ n_reps : int
161
+ Number of Monte Carlo replications.
162
+ seed : int or None
163
+ Random seed.
164
+
165
+ Returns
166
+ -------
167
+ results : dict
168
+ Dictionary with keys 1, 2, ..., max_breaks, 'max', each containing
169
+ a dict of percentile -> critical value.
170
+ """
171
+ from .martins_rodrigues2022 import (
172
+ _compute_F_statistic, _generate_break_dates_fast)
173
+ from .utils import ols_residuals, select_lag_bic
174
+
175
+ rng = np.random.default_rng(seed)
176
+ K = K_plus_1 - 1
177
+
178
+ sig_levels = [0.90, 0.95, 0.975, 0.99]
179
+
180
+ # Storage
181
+ W_all = {m: [] for m in range(1, max_breaks + 1)}
182
+ W_max_all = []
183
+
184
+ for rep in range(n_reps):
185
+ # Generate K+1 independent random walks under H0
186
+ u = rng.normal(0, 1, size=(T, K_plus_1))
187
+ data = np.cumsum(u, axis=0)
188
+
189
+ y = data[:, 0]
190
+ X = data[:, 1:]
191
+
192
+ # OLS residuals
193
+ try:
194
+ e, _ = ols_residuals(y, X, model=model)
195
+ except np.linalg.LinAlgError:
196
+ continue
197
+
198
+ p = min(select_lag_bic(e, max_p=4), 2)
199
+
200
+ W_rep = {}
201
+ for m_star in range(1, max_breaks + 1):
202
+ best_FA = -np.inf
203
+ best_FB = -np.inf
204
+
205
+ step = max(1, int(T * epsilon / 5))
206
+
207
+ for breaks in _generate_break_dates_fast(
208
+ T, m_star, epsilon, step):
209
+ FA = _compute_F_statistic(e, breaks, m_star, "A", p)
210
+ FB = _compute_F_statistic(e, breaks, m_star, "B", p)
211
+ if np.isfinite(FA) and FA > best_FA:
212
+ best_FA = FA
213
+ if np.isfinite(FB) and FB > best_FB:
214
+ best_FB = FB
215
+
216
+ W_m = max(best_FA, best_FB)
217
+ W_all[m_star].append(W_m)
218
+ W_rep[m_star] = W_m
219
+
220
+ W_max_all.append(max(W_rep.values()) if W_rep else np.nan)
221
+
222
+ # Compute quantiles
223
+ results = {}
224
+ for m_star in range(1, max_breaks + 1):
225
+ arr = np.array([v for v in W_all[m_star] if np.isfinite(v)])
226
+ if len(arr) > 0:
227
+ results[m_star] = {
228
+ p: np.percentile(arr, p * 100) for p in sig_levels
229
+ }
230
+ else:
231
+ results[m_star] = {p: np.nan for p in sig_levels}
232
+
233
+ arr = np.array([v for v in W_max_all if np.isfinite(v)])
234
+ if len(arr) > 0:
235
+ results["max"] = {
236
+ p: np.percentile(arr, p * 100) for p in sig_levels
237
+ }
238
+ else:
239
+ results["max"] = {p: np.nan for p in sig_levels}
240
+
241
+ return results
242
+
243
+
244
+ def monte_carlo_size_power(T=200, n_reps=2000, rho=0.9,
245
+ n_break_start=None, n_break_end=None,
246
+ model="drift", alpha=0.05, test="both",
247
+ seed=None):
248
+ """
249
+ Perform Monte Carlo size and power analysis.
250
+
251
+ Reproduces the simulation design from:
252
+ - Kim (2003, Section 4): Tables 3-5
253
+ - Martins & Rodrigues (2022, Section 5): Tables 2-4
254
+
255
+ Parameters
256
+ ----------
257
+ T : int
258
+ Sample size.
259
+ n_reps : int
260
+ Number of Monte Carlo replications.
261
+ rho : float
262
+ AR(1) root in cointegration regime. rho=1 gives size, rho<1 gives power.
263
+ n_break_start : int or None
264
+ Start of noncointegration period. None defaults to int(0.4*T).
265
+ n_break_end : int or None
266
+ End of noncointegration period. None defaults to int(0.6*T).
267
+ model : str
268
+ Model specification.
269
+ alpha : float
270
+ Nominal significance level.
271
+ test : str
272
+ Which test to evaluate: 'kim', 'mr', or 'both'.
273
+ seed : int or None
274
+ Random seed.
275
+
276
+ Returns
277
+ -------
278
+ results : dict
279
+ Rejection frequencies for each test statistic.
280
+ """
281
+ from .utils import generate_segmented_data
282
+ from .kim2003 import kim_test
283
+ from .martins_rodrigues2022 import mr_test
284
+
285
+ rng = np.random.default_rng(seed)
286
+
287
+ if n_break_start is None:
288
+ n_break_start = int(0.4 * T)
289
+ if n_break_end is None:
290
+ n_break_end = int(0.6 * T)
291
+
292
+ kim_rejections = {"Zp": 0, "Zt": 0, "ADFp": 0, "ADFt": 0}
293
+ mr_rejections = {m: 0 for m in range(1, 5)}
294
+ mr_rejections["max"] = 0
295
+
296
+ for rep in range(n_reps):
297
+ seed_i = rng.integers(0, 2**31)
298
+
299
+ y, X, _, _ = generate_segmented_data(
300
+ T, beta=1.0, rho=rho, sigma_v=0.1, sigma_u=0.1,
301
+ n_break_start=n_break_start, n_break_end=n_break_end,
302
+ model=model, seed=seed_i)
303
+
304
+ if test in ("kim", "both"):
305
+ try:
306
+ res_kim = kim_test(y, X, model=model, step=max(1, T // 50))
307
+ for s in ["Zp", "Zt", "ADFp", "ADFt"]:
308
+ if res_kim.significant(s, alpha):
309
+ kim_rejections[s] += 1
310
+ except Exception:
311
+ pass
312
+
313
+ if test in ("mr", "both"):
314
+ try:
315
+ res_mr = mr_test(y, X, model=model,
316
+ step=max(1, T // 50))
317
+ for m in range(1, 5):
318
+ if res_mr.significant(m, alpha):
319
+ mr_rejections[m] += 1
320
+ if res_mr.significant("max", alpha):
321
+ mr_rejections["max"] += 1
322
+ except Exception:
323
+ pass
324
+
325
+ results = {}
326
+ if test in ("kim", "both"):
327
+ results["kim"] = {
328
+ s: kim_rejections[s] / n_reps for s in kim_rejections}
329
+ if test in ("mr", "both"):
330
+ results["mr"] = {
331
+ s: mr_rejections[s] / n_reps for s in mr_rejections}
332
+
333
+ return results