segmcoint 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- segmcoint/__init__.py +103 -0
- segmcoint/kim2003.py +672 -0
- segmcoint/martins_rodrigues2022.py +783 -0
- segmcoint/simulation.py +333 -0
- segmcoint/utils.py +433 -0
- segmcoint-1.0.0.dist-info/METADATA +144 -0
- segmcoint-1.0.0.dist-info/RECORD +10 -0
- segmcoint-1.0.0.dist-info/WHEEL +5 -0
- segmcoint-1.0.0.dist-info/licenses/LICENSE +21 -0
- segmcoint-1.0.0.dist-info/top_level.txt +1 -0
segmcoint/simulation.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Monte Carlo simulation for generating critical values.
|
|
3
|
+
|
|
4
|
+
Implements the simulation procedures described in:
|
|
5
|
+
- Kim (2003, Section 3.1): Response surface method for asymptotic
|
|
6
|
+
critical values of Z*_rho, Z*_t, ADF*_rho, ADF*_t.
|
|
7
|
+
- Martins & Rodrigues (2022, Table 1): Critical values for W(m*) and
|
|
8
|
+
W_max statistics.
|
|
9
|
+
|
|
10
|
+
References
|
|
11
|
+
----------
|
|
12
|
+
MacKinnon, J.G. (1991). Critical values for cointegration tests.
|
|
13
|
+
In Engle and Granger (eds.), Long-Run Economic Relationships.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
from numpy.linalg import inv
|
|
18
|
+
import warnings
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def simulate_kim_critical_values(n, model="drift", max_ell=0.3,
|
|
22
|
+
T=500, n_reps=5000, seed=None):
|
|
23
|
+
"""
|
|
24
|
+
Simulate critical values for Kim (2003) infimum test statistics.
|
|
25
|
+
|
|
26
|
+
Generates data under H_0 (no cointegration: all variables are
|
|
27
|
+
independent random walks), computes the infimum statistics
|
|
28
|
+
Z*_rho, Z*_t, ADF*_rho, ADF*_t, and returns the empirical
|
|
29
|
+
distribution quantiles.
|
|
30
|
+
|
|
31
|
+
Following Kim (2003, p.628), the response surface approach of
|
|
32
|
+
MacKinnon (1991) is used: critical values are computed for
|
|
33
|
+
multiple sample sizes and then extrapolated.
|
|
34
|
+
|
|
35
|
+
Parameters
|
|
36
|
+
----------
|
|
37
|
+
n : int
|
|
38
|
+
Number of variables in the cointegration regression.
|
|
39
|
+
model : str
|
|
40
|
+
Model specification: 'none', 'drift', 'trend'.
|
|
41
|
+
max_ell : float
|
|
42
|
+
Maximum length of noncointegration period.
|
|
43
|
+
T : int
|
|
44
|
+
Sample size for simulation.
|
|
45
|
+
n_reps : int
|
|
46
|
+
Number of Monte Carlo replications.
|
|
47
|
+
seed : int or None
|
|
48
|
+
Random seed.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
results : dict
|
|
53
|
+
Dictionary with keys 'Zp', 'Zt', 'ADFp', 'ADFt', each containing
|
|
54
|
+
a dict of percentile -> critical value.
|
|
55
|
+
"""
|
|
56
|
+
from .kim2003 import _compute_Zp_Zt, _compute_ADF
|
|
57
|
+
from .utils import ols_residuals, select_lag_bic
|
|
58
|
+
|
|
59
|
+
rng = np.random.default_rng(seed)
|
|
60
|
+
|
|
61
|
+
percentiles = [0.01, 0.025, 0.05, 0.10, 0.15, 0.95, 0.975, 0.99]
|
|
62
|
+
|
|
63
|
+
all_Zp = []
|
|
64
|
+
all_Zt = []
|
|
65
|
+
all_ADFp = []
|
|
66
|
+
all_ADFt = []
|
|
67
|
+
|
|
68
|
+
max_len = int(max_ell * T)
|
|
69
|
+
|
|
70
|
+
for rep in range(n_reps):
|
|
71
|
+
# Generate n independent random walks under H0
|
|
72
|
+
u = rng.normal(0, 1, size=(T, n))
|
|
73
|
+
data = np.cumsum(u, axis=0)
|
|
74
|
+
|
|
75
|
+
y = data[:, 0]
|
|
76
|
+
X = data[:, 1:] if n > 1 else rng.normal(0, 1, size=(T, 1))
|
|
77
|
+
X_rw = np.cumsum(X, axis=0) if n == 1 else X
|
|
78
|
+
|
|
79
|
+
# OLS residuals
|
|
80
|
+
try:
|
|
81
|
+
e, _ = ols_residuals(y, X_rw, model=model)
|
|
82
|
+
except np.linalg.LinAlgError:
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
# Select lag order
|
|
86
|
+
p = min(select_lag_bic(e, max_p=4), 2)
|
|
87
|
+
|
|
88
|
+
# Search for infimum
|
|
89
|
+
best_Zp = np.inf
|
|
90
|
+
best_Zt = np.inf
|
|
91
|
+
best_ADFp = np.inf
|
|
92
|
+
best_ADFt = np.inf
|
|
93
|
+
|
|
94
|
+
for ell_N in range(1, max_len + 1, max(1, max_len // 20)):
|
|
95
|
+
for k0 in range(0, T - ell_N + 1, max(1, (T - ell_N) // 20)):
|
|
96
|
+
k1 = k0 + ell_N
|
|
97
|
+
T_C = T - ell_N
|
|
98
|
+
if T_C < n:
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
w = np.ones(T)
|
|
102
|
+
w[k0:k1] = 0.0
|
|
103
|
+
|
|
104
|
+
Zp, Zt = _compute_Zp_Zt(e, w)
|
|
105
|
+
if np.isfinite(Zp) and Zp < best_Zp:
|
|
106
|
+
best_Zp = Zp
|
|
107
|
+
if np.isfinite(Zt) and Zt < best_Zt:
|
|
108
|
+
best_Zt = Zt
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
ADFp, ADFt = _compute_ADF(e, w, p=p)
|
|
112
|
+
if np.isfinite(ADFp) and ADFp < best_ADFp:
|
|
113
|
+
best_ADFp = ADFp
|
|
114
|
+
if np.isfinite(ADFt) and ADFt < best_ADFt:
|
|
115
|
+
best_ADFt = ADFt
|
|
116
|
+
except (ValueError, np.linalg.LinAlgError):
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
all_Zp.append(best_Zp)
|
|
120
|
+
all_Zt.append(best_Zt)
|
|
121
|
+
all_ADFp.append(best_ADFp)
|
|
122
|
+
all_ADFt.append(best_ADFt)
|
|
123
|
+
|
|
124
|
+
# Compute quantiles
|
|
125
|
+
results = {}
|
|
126
|
+
for name, values in [("Zp", all_Zp), ("Zt", all_Zt),
|
|
127
|
+
("ADFp", all_ADFp), ("ADFt", all_ADFt)]:
|
|
128
|
+
arr = np.array([v for v in values if np.isfinite(v)])
|
|
129
|
+
if len(arr) > 0:
|
|
130
|
+
results[name] = {
|
|
131
|
+
p: np.percentile(arr, p * 100) for p in percentiles
|
|
132
|
+
}
|
|
133
|
+
else:
|
|
134
|
+
results[name] = {p: np.nan for p in percentiles}
|
|
135
|
+
|
|
136
|
+
return results
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def simulate_mr_critical_values(K_plus_1, model="drift", max_breaks=4,
|
|
140
|
+
epsilon=0.15, T=1000, n_reps=5000,
|
|
141
|
+
seed=None):
|
|
142
|
+
"""
|
|
143
|
+
Simulate critical values for Martins & Rodrigues (2022) Wald tests.
|
|
144
|
+
|
|
145
|
+
Generates data under H_0 (no cointegration), computes W(m*) for
|
|
146
|
+
m* = 1,...,max_breaks and W_max, and returns empirical quantiles.
|
|
147
|
+
|
|
148
|
+
Parameters
|
|
149
|
+
----------
|
|
150
|
+
K_plus_1 : int
|
|
151
|
+
Total number of variables.
|
|
152
|
+
model : str
|
|
153
|
+
Model specification: 'none', 'drift', 'trend'.
|
|
154
|
+
max_breaks : int
|
|
155
|
+
Maximum number of breaks.
|
|
156
|
+
epsilon : float
|
|
157
|
+
Trimming parameter.
|
|
158
|
+
T : int
|
|
159
|
+
Sample size.
|
|
160
|
+
n_reps : int
|
|
161
|
+
Number of Monte Carlo replications.
|
|
162
|
+
seed : int or None
|
|
163
|
+
Random seed.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
results : dict
|
|
168
|
+
Dictionary with keys 1, 2, ..., max_breaks, 'max', each containing
|
|
169
|
+
a dict of percentile -> critical value.
|
|
170
|
+
"""
|
|
171
|
+
from .martins_rodrigues2022 import (
|
|
172
|
+
_compute_F_statistic, _generate_break_dates_fast)
|
|
173
|
+
from .utils import ols_residuals, select_lag_bic
|
|
174
|
+
|
|
175
|
+
rng = np.random.default_rng(seed)
|
|
176
|
+
K = K_plus_1 - 1
|
|
177
|
+
|
|
178
|
+
sig_levels = [0.90, 0.95, 0.975, 0.99]
|
|
179
|
+
|
|
180
|
+
# Storage
|
|
181
|
+
W_all = {m: [] for m in range(1, max_breaks + 1)}
|
|
182
|
+
W_max_all = []
|
|
183
|
+
|
|
184
|
+
for rep in range(n_reps):
|
|
185
|
+
# Generate K+1 independent random walks under H0
|
|
186
|
+
u = rng.normal(0, 1, size=(T, K_plus_1))
|
|
187
|
+
data = np.cumsum(u, axis=0)
|
|
188
|
+
|
|
189
|
+
y = data[:, 0]
|
|
190
|
+
X = data[:, 1:]
|
|
191
|
+
|
|
192
|
+
# OLS residuals
|
|
193
|
+
try:
|
|
194
|
+
e, _ = ols_residuals(y, X, model=model)
|
|
195
|
+
except np.linalg.LinAlgError:
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
p = min(select_lag_bic(e, max_p=4), 2)
|
|
199
|
+
|
|
200
|
+
W_rep = {}
|
|
201
|
+
for m_star in range(1, max_breaks + 1):
|
|
202
|
+
best_FA = -np.inf
|
|
203
|
+
best_FB = -np.inf
|
|
204
|
+
|
|
205
|
+
step = max(1, int(T * epsilon / 5))
|
|
206
|
+
|
|
207
|
+
for breaks in _generate_break_dates_fast(
|
|
208
|
+
T, m_star, epsilon, step):
|
|
209
|
+
FA = _compute_F_statistic(e, breaks, m_star, "A", p)
|
|
210
|
+
FB = _compute_F_statistic(e, breaks, m_star, "B", p)
|
|
211
|
+
if np.isfinite(FA) and FA > best_FA:
|
|
212
|
+
best_FA = FA
|
|
213
|
+
if np.isfinite(FB) and FB > best_FB:
|
|
214
|
+
best_FB = FB
|
|
215
|
+
|
|
216
|
+
W_m = max(best_FA, best_FB)
|
|
217
|
+
W_all[m_star].append(W_m)
|
|
218
|
+
W_rep[m_star] = W_m
|
|
219
|
+
|
|
220
|
+
W_max_all.append(max(W_rep.values()) if W_rep else np.nan)
|
|
221
|
+
|
|
222
|
+
# Compute quantiles
|
|
223
|
+
results = {}
|
|
224
|
+
for m_star in range(1, max_breaks + 1):
|
|
225
|
+
arr = np.array([v for v in W_all[m_star] if np.isfinite(v)])
|
|
226
|
+
if len(arr) > 0:
|
|
227
|
+
results[m_star] = {
|
|
228
|
+
p: np.percentile(arr, p * 100) for p in sig_levels
|
|
229
|
+
}
|
|
230
|
+
else:
|
|
231
|
+
results[m_star] = {p: np.nan for p in sig_levels}
|
|
232
|
+
|
|
233
|
+
arr = np.array([v for v in W_max_all if np.isfinite(v)])
|
|
234
|
+
if len(arr) > 0:
|
|
235
|
+
results["max"] = {
|
|
236
|
+
p: np.percentile(arr, p * 100) for p in sig_levels
|
|
237
|
+
}
|
|
238
|
+
else:
|
|
239
|
+
results["max"] = {p: np.nan for p in sig_levels}
|
|
240
|
+
|
|
241
|
+
return results
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def monte_carlo_size_power(T=200, n_reps=2000, rho=0.9,
|
|
245
|
+
n_break_start=None, n_break_end=None,
|
|
246
|
+
model="drift", alpha=0.05, test="both",
|
|
247
|
+
seed=None):
|
|
248
|
+
"""
|
|
249
|
+
Perform Monte Carlo size and power analysis.
|
|
250
|
+
|
|
251
|
+
Reproduces the simulation design from:
|
|
252
|
+
- Kim (2003, Section 4): Tables 3-5
|
|
253
|
+
- Martins & Rodrigues (2022, Section 5): Tables 2-4
|
|
254
|
+
|
|
255
|
+
Parameters
|
|
256
|
+
----------
|
|
257
|
+
T : int
|
|
258
|
+
Sample size.
|
|
259
|
+
n_reps : int
|
|
260
|
+
Number of Monte Carlo replications.
|
|
261
|
+
rho : float
|
|
262
|
+
AR(1) root in cointegration regime. rho=1 gives size, rho<1 gives power.
|
|
263
|
+
n_break_start : int or None
|
|
264
|
+
Start of noncointegration period. None defaults to int(0.4*T).
|
|
265
|
+
n_break_end : int or None
|
|
266
|
+
End of noncointegration period. None defaults to int(0.6*T).
|
|
267
|
+
model : str
|
|
268
|
+
Model specification.
|
|
269
|
+
alpha : float
|
|
270
|
+
Nominal significance level.
|
|
271
|
+
test : str
|
|
272
|
+
Which test to evaluate: 'kim', 'mr', or 'both'.
|
|
273
|
+
seed : int or None
|
|
274
|
+
Random seed.
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
results : dict
|
|
279
|
+
Rejection frequencies for each test statistic.
|
|
280
|
+
"""
|
|
281
|
+
from .utils import generate_segmented_data
|
|
282
|
+
from .kim2003 import kim_test
|
|
283
|
+
from .martins_rodrigues2022 import mr_test
|
|
284
|
+
|
|
285
|
+
rng = np.random.default_rng(seed)
|
|
286
|
+
|
|
287
|
+
if n_break_start is None:
|
|
288
|
+
n_break_start = int(0.4 * T)
|
|
289
|
+
if n_break_end is None:
|
|
290
|
+
n_break_end = int(0.6 * T)
|
|
291
|
+
|
|
292
|
+
kim_rejections = {"Zp": 0, "Zt": 0, "ADFp": 0, "ADFt": 0}
|
|
293
|
+
mr_rejections = {m: 0 for m in range(1, 5)}
|
|
294
|
+
mr_rejections["max"] = 0
|
|
295
|
+
|
|
296
|
+
for rep in range(n_reps):
|
|
297
|
+
seed_i = rng.integers(0, 2**31)
|
|
298
|
+
|
|
299
|
+
y, X, _, _ = generate_segmented_data(
|
|
300
|
+
T, beta=1.0, rho=rho, sigma_v=0.1, sigma_u=0.1,
|
|
301
|
+
n_break_start=n_break_start, n_break_end=n_break_end,
|
|
302
|
+
model=model, seed=seed_i)
|
|
303
|
+
|
|
304
|
+
if test in ("kim", "both"):
|
|
305
|
+
try:
|
|
306
|
+
res_kim = kim_test(y, X, model=model, step=max(1, T // 50))
|
|
307
|
+
for s in ["Zp", "Zt", "ADFp", "ADFt"]:
|
|
308
|
+
if res_kim.significant(s, alpha):
|
|
309
|
+
kim_rejections[s] += 1
|
|
310
|
+
except Exception:
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
if test in ("mr", "both"):
|
|
314
|
+
try:
|
|
315
|
+
res_mr = mr_test(y, X, model=model,
|
|
316
|
+
step=max(1, T // 50))
|
|
317
|
+
for m in range(1, 5):
|
|
318
|
+
if res_mr.significant(m, alpha):
|
|
319
|
+
mr_rejections[m] += 1
|
|
320
|
+
if res_mr.significant("max", alpha):
|
|
321
|
+
mr_rejections["max"] += 1
|
|
322
|
+
except Exception:
|
|
323
|
+
pass
|
|
324
|
+
|
|
325
|
+
results = {}
|
|
326
|
+
if test in ("kim", "both"):
|
|
327
|
+
results["kim"] = {
|
|
328
|
+
s: kim_rejections[s] / n_reps for s in kim_rejections}
|
|
329
|
+
if test in ("mr", "both"):
|
|
330
|
+
results["mr"] = {
|
|
331
|
+
s: mr_rejections[s] / n_reps for s in mr_rejections}
|
|
332
|
+
|
|
333
|
+
return results
|