panelbox 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. panelbox/__init__.py +67 -0
  2. panelbox/__version__.py +14 -0
  3. panelbox/cli/__init__.py +0 -0
  4. panelbox/cli/{commands}/__init__.py +0 -0
  5. panelbox/core/__init__.py +0 -0
  6. panelbox/core/base_model.py +164 -0
  7. panelbox/core/formula_parser.py +318 -0
  8. panelbox/core/panel_data.py +387 -0
  9. panelbox/core/results.py +366 -0
  10. panelbox/datasets/__init__.py +0 -0
  11. panelbox/datasets/{data}/__init__.py +0 -0
  12. panelbox/gmm/__init__.py +65 -0
  13. panelbox/gmm/difference_gmm.py +645 -0
  14. panelbox/gmm/estimator.py +562 -0
  15. panelbox/gmm/instruments.py +580 -0
  16. panelbox/gmm/results.py +550 -0
  17. panelbox/gmm/system_gmm.py +621 -0
  18. panelbox/gmm/tests.py +535 -0
  19. panelbox/models/__init__.py +11 -0
  20. panelbox/models/dynamic/__init__.py +0 -0
  21. panelbox/models/iv/__init__.py +0 -0
  22. panelbox/models/static/__init__.py +13 -0
  23. panelbox/models/static/fixed_effects.py +516 -0
  24. panelbox/models/static/pooled_ols.py +298 -0
  25. panelbox/models/static/random_effects.py +512 -0
  26. panelbox/report/__init__.py +61 -0
  27. panelbox/report/asset_manager.py +410 -0
  28. panelbox/report/css_manager.py +472 -0
  29. panelbox/report/exporters/__init__.py +15 -0
  30. panelbox/report/exporters/html_exporter.py +440 -0
  31. panelbox/report/exporters/latex_exporter.py +510 -0
  32. panelbox/report/exporters/markdown_exporter.py +446 -0
  33. panelbox/report/renderers/__init__.py +11 -0
  34. panelbox/report/renderers/static/__init__.py +0 -0
  35. panelbox/report/renderers/static_validation_renderer.py +341 -0
  36. panelbox/report/report_manager.py +502 -0
  37. panelbox/report/template_manager.py +337 -0
  38. panelbox/report/transformers/__init__.py +0 -0
  39. panelbox/report/transformers/static/__init__.py +0 -0
  40. panelbox/report/validation_transformer.py +449 -0
  41. panelbox/standard_errors/__init__.py +0 -0
  42. panelbox/templates/__init__.py +0 -0
  43. panelbox/templates/assets/css/base_styles.css +382 -0
  44. panelbox/templates/assets/css/report_components.css +747 -0
  45. panelbox/templates/assets/js/tab-navigation.js +161 -0
  46. panelbox/templates/assets/js/utils.js +276 -0
  47. panelbox/templates/common/footer.html +24 -0
  48. panelbox/templates/common/header.html +44 -0
  49. panelbox/templates/common/meta.html +5 -0
  50. panelbox/templates/validation/interactive/index.html +272 -0
  51. panelbox/templates/validation/interactive/partials/charts.html +58 -0
  52. panelbox/templates/validation/interactive/partials/methodology.html +201 -0
  53. panelbox/templates/validation/interactive/partials/overview.html +146 -0
  54. panelbox/templates/validation/interactive/partials/recommendations.html +101 -0
  55. panelbox/templates/validation/interactive/partials/test_results.html +231 -0
  56. panelbox/utils/__init__.py +0 -0
  57. panelbox/utils/formatting.py +172 -0
  58. panelbox/utils/matrix_ops.py +233 -0
  59. panelbox/utils/statistical.py +173 -0
  60. panelbox/validation/__init__.py +58 -0
  61. panelbox/validation/base.py +175 -0
  62. panelbox/validation/cointegration/__init__.py +0 -0
  63. panelbox/validation/cross_sectional_dependence/__init__.py +13 -0
  64. panelbox/validation/cross_sectional_dependence/breusch_pagan_lm.py +222 -0
  65. panelbox/validation/cross_sectional_dependence/frees.py +297 -0
  66. panelbox/validation/cross_sectional_dependence/pesaran_cd.py +188 -0
  67. panelbox/validation/heteroskedasticity/__init__.py +13 -0
  68. panelbox/validation/heteroskedasticity/breusch_pagan.py +222 -0
  69. panelbox/validation/heteroskedasticity/modified_wald.py +172 -0
  70. panelbox/validation/heteroskedasticity/white.py +208 -0
  71. panelbox/validation/instruments/__init__.py +0 -0
  72. panelbox/validation/robustness/__init__.py +0 -0
  73. panelbox/validation/serial_correlation/__init__.py +13 -0
  74. panelbox/validation/serial_correlation/baltagi_wu.py +220 -0
  75. panelbox/validation/serial_correlation/breusch_godfrey.py +260 -0
  76. panelbox/validation/serial_correlation/wooldridge_ar.py +200 -0
  77. panelbox/validation/specification/__init__.py +16 -0
  78. panelbox/validation/specification/chow.py +273 -0
  79. panelbox/validation/specification/hausman.py +264 -0
  80. panelbox/validation/specification/mundlak.py +331 -0
  81. panelbox/validation/specification/reset.py +273 -0
  82. panelbox/validation/unit_root/__init__.py +0 -0
  83. panelbox/validation/validation_report.py +257 -0
  84. panelbox/validation/validation_suite.py +401 -0
  85. panelbox-0.2.0.dist-info/METADATA +337 -0
  86. panelbox-0.2.0.dist-info/RECORD +90 -0
  87. panelbox-0.2.0.dist-info/WHEEL +5 -0
  88. panelbox-0.2.0.dist-info/entry_points.txt +2 -0
  89. panelbox-0.2.0.dist-info/licenses/LICENSE +21 -0
  90. panelbox-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,621 @@
1
+ """
2
+ System GMM Estimator
3
+ ====================
4
+
5
+ Blundell-Bond (1998) System GMM estimator for dynamic panel data models.
6
+
7
+ Classes
8
+ -------
9
+ SystemGMM : Blundell-Bond System GMM estimator
10
+
11
+ References
12
+ ----------
13
+ .. [1] Blundell, R., & Bond, S. (1998). "Initial Conditions and Moment
14
+ Restrictions in Dynamic Panel Data Models." Journal of Econometrics,
15
+ 87(1), 115-143.
16
+ """
17
+
18
+ from typing import Union, List, Optional, Dict
19
+ import numpy as np
20
+ import pandas as pd
21
+ from panelbox.gmm.difference_gmm import DifferenceGMM
22
+ from panelbox.gmm.results import GMMResults
23
+ from panelbox.gmm.instruments import InstrumentSet, InstrumentBuilder
24
+
25
+
26
+ class SystemGMM(DifferenceGMM):
27
+ """
28
+ Blundell-Bond (1998) System GMM estimator.
29
+
30
+ Combines difference and level equations in a stacked system:
31
+ - Difference equations (instruments: lags of levels)
32
+ - Level equations (instruments: lags of differences)
33
+
34
+ Advantages over Difference GMM:
35
+ - More efficient when series are persistent
36
+ - Better precision for coefficient estimates
37
+ - Additional moment conditions
38
+
39
+ Requires assumption:
40
+ E[Δy_{i,t-1} · η_i] = 0 (initial conditions)
41
+
42
+ Parameters
43
+ ----------
44
+ data : pd.DataFrame
45
+ Panel data in long format
46
+ dep_var : str
47
+ Name of dependent variable
48
+ lags : Union[int, List[int]]
49
+ Lags of dependent variable to include
50
+ id_var : str
51
+ Name of cross-sectional identifier (default: 'id')
52
+ time_var : str
53
+ Name of time variable (default: 'year')
54
+ exog_vars : List[str], optional
55
+ List of strictly exogenous variables
56
+ endogenous_vars : List[str], optional
57
+ List of endogenous variables
58
+ predetermined_vars : List[str], optional
59
+ List of predetermined variables
60
+ time_dummies : bool
61
+ Include time dummies (default: True)
62
+ collapse : bool
63
+ Collapse instruments (default: False)
64
+ two_step : bool
65
+ Use two-step GMM (default: True)
66
+ robust : bool
67
+ Use robust variance with Windmeijer correction (default: True)
68
+ gmm_type : str
69
+ GMM type: 'one_step', 'two_step', 'iterative' (default: 'two_step')
70
+ level_instruments : Dict, optional
71
+ Configuration for level equation instruments
72
+ Example: {'max_lags': 1} uses L.D.y as instrument
73
+
74
+ Attributes
75
+ ----------
76
+ level_instruments : Dict
77
+ Configuration for level equation instruments
78
+
79
+ Examples
80
+ --------
81
+ **When to use System GMM:**
82
+
83
+ System GMM is preferred over Difference GMM when:
84
+ - Variables are highly persistent (AR coefficient near 1)
85
+ - Lagged levels are weak instruments for differences
86
+ - You want more efficient estimates (smaller standard errors)
87
+
88
+ **Basic System GMM with production function:**
89
+
90
+ >>> import pandas as pd
91
+ >>> from panelbox.gmm import SystemGMM
92
+ >>>
93
+ >>> # Load production data
94
+ >>> data = pd.read_csv('production.csv')
95
+ >>>
96
+ >>> # Estimate System GMM
97
+ >>> model = SystemGMM(
98
+ ... data=data,
99
+ ... dep_var='output',
100
+ ... lags=1, # Include output_{t-1}
101
+ ... id_var='firm_id',
102
+ ... time_var='year',
103
+ ... exog_vars=['capital', 'labor'],
104
+ ... collapse=True, # Always recommended
105
+ ... two_step=True,
106
+ ... robust=True,
107
+ ... level_instruments={'max_lags': 1} # Use Δy_{t-1} for level equation
108
+ ... )
109
+ >>>
110
+ >>> results = model.fit()
111
+ >>> print(results.summary())
112
+ >>>
113
+ >>> # Check if more efficient than Difference GMM
114
+ >>> print(f"Standard error: {results.std_errors['L1.output']:.4f}")
115
+
116
+ **Comparing Difference vs System GMM:**
117
+
118
+ >>> from panelbox.gmm import DifferenceGMM, SystemGMM
119
+ >>>
120
+ >>> # Estimate both
121
+ >>> diff_gmm = DifferenceGMM(
122
+ ... data=data,
123
+ ... dep_var='y',
124
+ ... lags=1,
125
+ ... exog_vars=['x1', 'x2'],
126
+ ... collapse=True,
127
+ ... two_step=True
128
+ ... )
129
+ >>> diff_results = diff_gmm.fit()
130
+ >>>
131
+ >>> sys_gmm = SystemGMM(
132
+ ... data=data,
133
+ ... dep_var='y',
134
+ ... lags=1,
135
+ ... exog_vars=['x1', 'x2'],
136
+ ... collapse=True,
137
+ ... two_step=True,
138
+ ... level_instruments={'max_lags': 1}
139
+ ... )
140
+ >>> sys_results = sys_gmm.fit()
141
+ >>>
142
+ >>> # Compare efficiency
143
+ >>> coef_name = 'L1.y'
144
+ >>> diff_se = diff_results.std_errors[coef_name]
145
+ >>> sys_se = sys_results.std_errors[coef_name]
146
+ >>> efficiency_gain = (diff_se - sys_se) / diff_se * 100
147
+ >>> print(f"System GMM SE is {efficiency_gain:.1f}% smaller")
148
+ >>>
149
+ >>> # Check if both are valid
150
+ >>> if sys_results.ar2_test.pvalue > 0.10 and sys_results.hansen_j.pvalue > 0.10:
151
+ ... print("System GMM preferred (more efficient and valid)")
152
+
153
+ **With custom level instruments:**
154
+
155
+ >>> # Control instrument depth for level equation
156
+ >>> model = SystemGMM(
157
+ ... data=data,
158
+ ... dep_var='n',
159
+ ... lags=1,
160
+ ... exog_vars=['w', 'k'],
161
+ ... collapse=True,
162
+ ... level_instruments={'max_lags': 1}
163
+ ... )
164
+ >>> results = model.fit()
165
+
166
+ Notes
167
+ -----
168
+ System combines:
169
+
170
+ Difference equation:
171
+ Δy_{it} = γ Δy_{i,t-1} + β' Δx_{it} + Δε_{it}
172
+ Instruments: lags of levels (y_{i,t-2}, y_{i,t-3}, ...)
173
+
174
+ Level equation:
175
+ y_{it} = γ y_{i,t-1} + β' x_{it} + η_i + ε_{it}
176
+ Instruments: lags of differences (Δy_{i,t-1}, Δy_{i,t-2}, ...)
177
+
178
+ Critical assumption:
179
+ E[Δy_{i,1} · η_i] = 0
180
+ Violated if initial conditions are correlated with fixed effects
181
+
182
+ References
183
+ ----------
184
+ Blundell, R., & Bond, S. (1998). Journal of Econometrics, 87(1), 115-143.
185
+ """
186
+
187
+ def __init__(self,
188
+ data: pd.DataFrame,
189
+ dep_var: str,
190
+ lags: Union[int, List[int]],
191
+ id_var: str = 'id',
192
+ time_var: str = 'year',
193
+ exog_vars: Optional[List[str]] = None,
194
+ endogenous_vars: Optional[List[str]] = None,
195
+ predetermined_vars: Optional[List[str]] = None,
196
+ time_dummies: bool = True,
197
+ collapse: bool = False,
198
+ two_step: bool = True,
199
+ robust: bool = True,
200
+ gmm_type: str = 'two_step',
201
+ level_instruments: Optional[Dict] = None):
202
+ """Initialize System GMM model."""
203
+ # Initialize parent Difference GMM
204
+ super().__init__(
205
+ data=data,
206
+ dep_var=dep_var,
207
+ lags=lags,
208
+ id_var=id_var,
209
+ time_var=time_var,
210
+ exog_vars=exog_vars,
211
+ endogenous_vars=endogenous_vars,
212
+ predetermined_vars=predetermined_vars,
213
+ time_dummies=time_dummies,
214
+ collapse=collapse,
215
+ two_step=two_step,
216
+ robust=robust,
217
+ gmm_type=gmm_type
218
+ )
219
+
220
+ # Level instruments configuration
221
+ self.level_instruments = level_instruments or {'max_lags': 1}
222
+
223
+ def fit(self) -> GMMResults:
224
+ """
225
+ Estimate the System GMM model.
226
+
227
+ Returns
228
+ -------
229
+ GMMResults
230
+ Estimation results
231
+
232
+ Notes
233
+ -----
234
+ Estimation procedure:
235
+ 1. Create difference equations (as in Difference GMM)
236
+ 2. Create level equations
237
+ 3. Stack equations and instruments
238
+ 4. Estimate using stacked system
239
+ 5. Compute specification tests including Diff-in-Hansen
240
+ """
241
+ # Step 1 & 2: Transform data (both differences and levels)
242
+ y_diff, X_diff, y_level, X_level, ids, times = self._transform_data_system()
243
+
244
+ # Step 3: Generate instruments (difference + level)
245
+ # Note: _generate_instruments_system will recreate InstrumentBuilder internally
246
+ Z_diff, Z_level = self._generate_instruments_system()
247
+
248
+ # Step 4: Stack equations
249
+ y_stacked = np.vstack([y_diff, y_level])
250
+ X_stacked = np.vstack([X_diff, X_level])
251
+ Z_stacked = self._stack_instruments(Z_diff, Z_level)
252
+
253
+ # Repeat ids and times for stacked system
254
+ ids_stacked = np.concatenate([ids, ids])
255
+ times_stacked = np.concatenate([times, times])
256
+
257
+ # Step 5: Estimate GMM on stacked system
258
+ if self.gmm_type == 'one_step':
259
+ beta, W, residuals = self.estimator.one_step(y_stacked, X_stacked, Z_stacked)
260
+ vcov = self._compute_one_step_vcov(X_stacked, Z_stacked, residuals, W)
261
+ converged = True
262
+ elif self.gmm_type == 'two_step':
263
+ beta, vcov, W, residuals = self.estimator.two_step(
264
+ y_stacked, X_stacked, Z_stacked, robust=self.robust
265
+ )
266
+ converged = True
267
+ else: # iterative
268
+ beta, vcov, W, converged = self.estimator.iterative(
269
+ y_stacked, X_stacked, Z_stacked
270
+ )
271
+ residuals = y_stacked - X_stacked @ beta
272
+
273
+ # Ensure beta is 1D for pandas Series
274
+ beta = beta.flatten()
275
+
276
+ # Step 6: Compute standard errors and statistics
277
+ std_errors = np.sqrt(np.diag(vcov))
278
+ tvalues = beta / std_errors
279
+ from scipy import stats as scipy_stats
280
+ pvalues = 2 * (1 - scipy_stats.norm.cdf(np.abs(tvalues)))
281
+
282
+ # Step 7: Get variable names
283
+ var_names = self._get_variable_names()
284
+
285
+ # Step 8: Compute specification tests
286
+ n_params = len(beta)
287
+
288
+ # Hansen J-test on full system
289
+ hansen = self.tester.hansen_j_test(
290
+ residuals, Z_stacked, W, n_params
291
+ )
292
+
293
+ # Sargan test
294
+ sargan = self.tester.sargan_test(
295
+ residuals, Z_stacked, n_params
296
+ )
297
+
298
+ # AR tests (on difference residuals only)
299
+ n_diff = len(y_diff)
300
+ residuals_diff_only = residuals[:n_diff]
301
+ ids_diff_only = ids_stacked[:n_diff] # Use stacked ids, first half
302
+
303
+ valid_mask_diff = ~np.isnan(residuals_diff_only.flatten())
304
+ resid_diff_clean = residuals_diff_only.flatten()[valid_mask_diff]
305
+ ids_diff_clean = ids_diff_only[valid_mask_diff]
306
+
307
+ ar1 = self.tester.arellano_bond_ar_test(
308
+ resid_diff_clean, ids_diff_clean, order=1
309
+ )
310
+ ar2 = self.tester.arellano_bond_ar_test(
311
+ resid_diff_clean, ids_diff_clean, order=2
312
+ )
313
+
314
+ # Difference-in-Hansen test for level instruments
315
+ diff_hansen = self._compute_diff_hansen(
316
+ residuals, Z_diff, Z_level, W, n_params
317
+ )
318
+
319
+ # Step 9: Create results object
320
+ valid_mask = ~np.isnan(residuals.flatten())
321
+ self.results = GMMResults(
322
+ params=pd.Series(beta, index=var_names),
323
+ std_errors=pd.Series(std_errors, index=var_names),
324
+ tvalues=pd.Series(tvalues, index=var_names),
325
+ pvalues=pd.Series(pvalues, index=var_names),
326
+ nobs=int(np.sum(valid_mask)),
327
+ n_groups=self.instrument_builder.n_groups,
328
+ n_instruments=Z_stacked.shape[1],
329
+ n_params=n_params,
330
+ hansen_j=hansen,
331
+ sargan=sargan,
332
+ ar1_test=ar1,
333
+ ar2_test=ar2,
334
+ diff_hansen=diff_hansen,
335
+ vcov=vcov,
336
+ weight_matrix=W,
337
+ converged=converged,
338
+ two_step=self.two_step,
339
+ windmeijer_corrected=self.robust and self.two_step,
340
+ model_type='system',
341
+ transformation='fd',
342
+ residuals=residuals
343
+ )
344
+
345
+ self.params = self.results.params
346
+
347
+ # Post-estimation warning for low observation retention
348
+ retention_rate = self.results.nobs / len(self.data)
349
+ if retention_rate < 0.30:
350
+ import warnings
351
+ warnings.warn(
352
+ f"\nLow observation retention: {self.results.nobs}/{len(self.data)} "
353
+ f"({retention_rate*100:.1f}%).\n"
354
+ f"Many observations were dropped due to insufficient valid instruments.\n\n"
355
+ f"Recommendations:\n"
356
+ f" 1. Simplify specification (fewer variables/lags)\n"
357
+ f" 2. Set time_dummies=False (or use linear trend)\n"
358
+ f" 3. Ensure collapse=True (currently: {self.collapse})\n"
359
+ f" 4. Check data for excessive missing values\n"
360
+ f" 5. Consider using DifferenceGMM (more robust for weak instruments)\n\n"
361
+ f"See examples/gmm/unbalanced_panel_guide.py for detailed guidance.",
362
+ UserWarning
363
+ )
364
+
365
+ return self.results
366
+
367
+ def _transform_data_system(self) -> tuple:
368
+ """
369
+ Transform data for System GMM (both differences and levels).
370
+
371
+ Returns
372
+ -------
373
+ y_diff : np.ndarray
374
+ Differenced dependent variable
375
+ X_diff : np.ndarray
376
+ Differenced regressors
377
+ y_level : np.ndarray
378
+ Level dependent variable
379
+ X_level : np.ndarray
380
+ Level regressors
381
+ ids : np.ndarray
382
+ ID variable
383
+ times : np.ndarray
384
+ Time variable
385
+ """
386
+ # Get difference transformation from parent
387
+ y_diff, X_diff, ids, times = super()._transform_data()
388
+
389
+ # Also need levels
390
+ df = self.data.sort_values([self.id_var, self.time_var])
391
+
392
+ # Create lagged dependent variable for levels
393
+ for lag in self.lags:
394
+ lag_name = f'{self.dep_var}_L{lag}'
395
+ df[lag_name] = df.groupby(self.id_var)[self.dep_var].shift(lag)
396
+
397
+ # Build regressor list (same as difference)
398
+ regressors = []
399
+ for lag in self.lags:
400
+ regressors.append(f'{self.dep_var}_L{lag}')
401
+ regressors.extend(self.exog_vars)
402
+ regressors.extend(self.endogenous_vars)
403
+ regressors.extend(self.predetermined_vars)
404
+
405
+ # Add time dummies if requested
406
+ if self.time_dummies:
407
+ time_dummies = pd.get_dummies(df[self.time_var], prefix='year', drop_first=True)
408
+ for col in time_dummies.columns:
409
+ df[col] = time_dummies[col]
410
+ if col not in regressors:
411
+ regressors.append(col)
412
+
413
+ # Extract level data
414
+ y_level = df[self.dep_var].values.reshape(-1, 1)
415
+ X_level = np.column_stack([df[var].values for var in regressors])
416
+
417
+ return y_diff, X_diff, y_level, X_level, ids, times
418
+
419
+ def _generate_instruments_system(self) -> tuple:
420
+ """
421
+ Generate instruments for System GMM.
422
+
423
+ Returns
424
+ -------
425
+ Z_diff : np.ndarray
426
+ Instruments for difference equations
427
+ Z_level : np.ndarray
428
+ Instruments for level equations
429
+ """
430
+ # Difference equation instruments (same as Difference GMM)
431
+ Z_diff = self._generate_instruments()
432
+
433
+ # FIRST: Create ALL differenced variables and add to data
434
+ df = self.data.sort_values([self.id_var, self.time_var]).copy()
435
+
436
+ # Create differences of lagged dependent variable
437
+ for lag in self.lags:
438
+ lag_name = f'{self.dep_var}_L{lag}'
439
+ if lag_name in df.columns:
440
+ df[f'{lag_name}_diff'] = df.groupby(self.id_var)[lag_name].diff()
441
+ self.data[f'{lag_name}_diff'] = df[f'{lag_name}_diff']
442
+
443
+ # Create differences of predetermined/endogenous variables
444
+ for var in self.predetermined_vars + self.endogenous_vars:
445
+ if var in df.columns:
446
+ df[f'{var}_diff'] = df.groupby(self.id_var)[var].diff()
447
+ self.data[f'{var}_diff'] = df[f'{var}_diff']
448
+
449
+ # SECOND: Recreate InstrumentBuilder with updated data
450
+ self.instrument_builder = InstrumentBuilder(self.data, self.id_var, self.time_var)
451
+
452
+ # THIRD: Generate level instruments using the differenced variables
453
+ instrument_sets_level = []
454
+
455
+ # For lagged dependent variable in levels, use differences as instruments
456
+ for lag in self.lags:
457
+ lag_name = f'{self.dep_var}_L{lag}'
458
+
459
+ # Use lagged differences as instruments for levels
460
+ max_lags_level = self.level_instruments.get('max_lags', 1)
461
+ Z_level_lag = self.instrument_builder.create_gmm_style_instruments(
462
+ var=f'{lag_name}_diff',
463
+ min_lag=0, # Can use contemporaneous difference
464
+ max_lag=max_lags_level,
465
+ equation='level',
466
+ collapse=self.collapse
467
+ )
468
+ instrument_sets_level.append(Z_level_lag)
469
+
470
+ # For exogenous variables in levels, use themselves
471
+ for var in self.exog_vars:
472
+ Z_level_exog = self.instrument_builder.create_iv_style_instruments(
473
+ var=var,
474
+ min_lag=0,
475
+ max_lag=0,
476
+ equation='level'
477
+ )
478
+ instrument_sets_level.append(Z_level_exog)
479
+
480
+ # For predetermined/endogenous in levels, use lagged differences
481
+ for var in self.predetermined_vars + self.endogenous_vars:
482
+ # Variable differences already created above
483
+ max_lags_level = self.level_instruments.get('max_lags', 1)
484
+ Z_level_var = self.instrument_builder.create_gmm_style_instruments(
485
+ var=f'{var}_diff',
486
+ min_lag=1,
487
+ max_lag=max_lags_level,
488
+ equation='level',
489
+ collapse=self.collapse
490
+ )
491
+ instrument_sets_level.append(Z_level_var)
492
+
493
+ # Combine level instruments
494
+ if instrument_sets_level:
495
+ Z_level = self.instrument_builder.combine_instruments(*instrument_sets_level)
496
+ else:
497
+ # No level-specific instruments, use empty matrix
498
+ Z_level = InstrumentSet(
499
+ Z=np.empty((len(self.data), 0)),
500
+ variable_names=[],
501
+ instrument_names=[],
502
+ equation='level',
503
+ style='mixed',
504
+ collapsed=False
505
+ )
506
+
507
+ return Z_diff, Z_level
508
+
509
+ def _stack_instruments(self,
510
+ Z_diff: InstrumentSet,
511
+ Z_level: InstrumentSet) -> np.ndarray:
512
+ """
513
+ Stack instruments for System GMM.
514
+
515
+ Creates block-diagonal matrix:
516
+ [ Z_diff 0 ]
517
+ [ 0 Z_level ]
518
+
519
+ Parameters
520
+ ----------
521
+ Z_diff : InstrumentSet
522
+ Difference equation instruments
523
+ Z_level : InstrumentSet
524
+ Level equation instruments
525
+
526
+ Returns
527
+ -------
528
+ np.ndarray
529
+ Stacked instrument matrix
530
+ """
531
+ n_obs = Z_diff.n_obs
532
+
533
+ # Create block diagonal matrix
534
+ n_instruments_total = Z_diff.n_instruments + Z_level.n_instruments
535
+
536
+ Z_stacked = np.zeros((2 * n_obs, n_instruments_total))
537
+
538
+ # Fill difference block
539
+ Z_stacked[:n_obs, :Z_diff.n_instruments] = Z_diff.Z
540
+
541
+ # Fill level block
542
+ Z_stacked[n_obs:, Z_diff.n_instruments:] = Z_level.Z
543
+
544
+ return Z_stacked
545
+
546
+ def _compute_diff_hansen(self,
547
+ residuals: np.ndarray,
548
+ Z_diff: InstrumentSet,
549
+ Z_level: InstrumentSet,
550
+ W_full: np.ndarray,
551
+ n_params: int):
552
+ """
553
+ Compute Difference-in-Hansen test for level instruments.
554
+
555
+ Tests the validity of level equation instruments by comparing
556
+ Hansen J statistics with and without level instruments.
557
+
558
+ Parameters
559
+ ----------
560
+ residuals : np.ndarray
561
+ Residuals from full system
562
+ Z_diff : InstrumentSet
563
+ Difference instruments
564
+ Z_level : InstrumentSet
565
+ Level instruments
566
+ W_full : np.ndarray
567
+ Weight matrix from full system
568
+ n_params : int
569
+ Number of parameters
570
+
571
+ Returns
572
+ -------
573
+ TestResult
574
+ Difference-in-Hansen test result
575
+ """
576
+ # Full system instruments
577
+ Z_full = self._stack_instruments(Z_diff, Z_level)
578
+
579
+ # Subset system (difference only)
580
+ n_obs = Z_diff.n_obs
581
+ Z_subset = np.zeros((2 * n_obs, Z_diff.n_instruments))
582
+ Z_subset[:n_obs, :] = Z_diff.Z
583
+ # Level equations get same instruments as difference (for subset comparison)
584
+ Z_subset[n_obs:, :] = Z_diff.Z
585
+
586
+ # Compute weight matrix for subset
587
+ # (simplified - in practice should re-estimate)
588
+ W_subset = W_full[:Z_diff.n_instruments, :Z_diff.n_instruments]
589
+
590
+ # Compute Difference-in-Hansen test
591
+ diff_hansen = self.tester.difference_in_hansen(
592
+ residuals=residuals,
593
+ Z_full=Z_full,
594
+ Z_subset=Z_subset,
595
+ W_full=W_full,
596
+ W_subset=W_subset,
597
+ n_params=n_params,
598
+ subset_name='level instruments'
599
+ )
600
+
601
+ return diff_hansen
602
+
603
+ def summary(self) -> str:
604
+ """
605
+ Print model summary.
606
+
607
+ Returns
608
+ -------
609
+ str
610
+ Summary string
611
+ """
612
+ if self.results is None:
613
+ raise ValueError("Model has not been fit yet. Call fit() first.")
614
+
615
+ return self.results.summary(title='System GMM (Blundell-Bond)')
616
+
617
+ def __repr__(self) -> str:
618
+ """Representation of the model."""
619
+ status = "fitted" if self.results is not None else "not fitted"
620
+ return (f"SystemGMM(dep_var='{self.dep_var}', lags={self.lags}, "
621
+ f"status='{status}')")