aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. PKG-INFO +344 -322
  2. README.md +335 -320
  3. __init__.py +1 -1
  4. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  6. cli.py +4 -0
  7. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  8. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  9. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  10. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  11. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  18. econometrics/causal_inference/__init__.py +66 -0
  19. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  20. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  21. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  22. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  23. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  24. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  25. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  26. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  27. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  28. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  29. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  30. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  31. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  32. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  33. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  34. econometrics/distribution_analysis/__init__.py +28 -0
  35. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  36. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  37. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  38. econometrics/missing_data/__init__.py +18 -0
  39. econometrics/missing_data/imputation_methods.py +219 -0
  40. econometrics/nonparametric/__init__.py +35 -0
  41. econometrics/nonparametric/gam_model.py +117 -0
  42. econometrics/nonparametric/kernel_regression.py +161 -0
  43. econometrics/nonparametric/quantile_regression.py +249 -0
  44. econometrics/nonparametric/spline_regression.py +100 -0
  45. econometrics/spatial_econometrics/__init__.py +68 -0
  46. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  47. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  48. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  49. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  50. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  51. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  52. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  53. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  54. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  55. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  56. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  57. econometrics/statistical_inference/__init__.py +21 -0
  58. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  59. econometrics/statistical_inference/permutation_test.py +177 -0
  60. econometrics/survival_analysis/__init__.py +18 -0
  61. econometrics/survival_analysis/survival_models.py +259 -0
  62. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  63. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  64. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  65. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  66. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  67. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  68. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  69. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  70. pyproject.toml +9 -2
  71. server.py +15 -1
  72. tools/__init__.py +75 -1
  73. tools/causal_inference_adapter.py +658 -0
  74. tools/distribution_analysis_adapter.py +121 -0
  75. tools/gwr_simple_adapter.py +54 -0
  76. tools/machine_learning_adapter.py +567 -0
  77. tools/mcp_tool_groups/__init__.py +15 -1
  78. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  79. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  80. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  81. tools/mcp_tool_groups/microecon_tools.py +325 -0
  82. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  83. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  84. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  85. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  86. tools/mcp_tools_registry.py +13 -3
  87. tools/microecon_adapter.py +412 -0
  88. tools/missing_data_adapter.py +73 -0
  89. tools/nonparametric_adapter.py +190 -0
  90. tools/spatial_econometrics_adapter.py +318 -0
  91. tools/statistical_inference_adapter.py +90 -0
  92. tools/survival_analysis_adapter.py +46 -0
  93. aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
  94. aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
  95. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
  96. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
  97. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,418 @@
1
+ """
2
+ 测试所有因果识别策略方法
3
+ """
4
+
5
+ import numpy as np
6
+ import unittest
7
+ from econometrics.causal_inference.causal_identification_strategy import *
8
+
9
+
10
+ class TestAllCausalMethods(unittest.TestCase):
11
+
12
+ def test_instrumental_variables_2sls(self):
13
+ """测试工具变量法"""
14
+ np.random.seed(42)
15
+ n = 100
16
+
17
+ # 工具变量
18
+ z = np.random.normal(0, 1, n)
19
+
20
+ # 内生变量(与误差项相关)
21
+ e1 = np.random.normal(0, 1, n)
22
+ x = 1 + 0.5 * z + e1
23
+
24
+ # 结果变量
25
+ e2 = np.random.normal(0, 1, n)
26
+ y = 2 + 1.5 * x + e2 + 0.3 * e1 # 包含内生性
27
+
28
+ # 执行工具变量回归
29
+ result = instrumental_variables_2sls(
30
+ y=y.tolist(),
31
+ x=x.reshape(-1, 1).tolist(),
32
+ instruments=z.reshape(-1, 1).tolist()
33
+ )
34
+
35
+ # 检查结果
36
+ self.assertIsNotNone(result.estimate)
37
+ self.assertIsNotNone(result.std_error)
38
+ self.assertIsNotNone(result.p_value)
39
+ self.assertGreater(result.n_observations, 0)
40
+
41
+ def test_control_function_approach(self):
42
+ """测试控制函数法"""
43
+ np.random.seed(42)
44
+ n = 100
45
+
46
+ # 外生变量
47
+ z1 = np.random.normal(0, 1, n)
48
+ z2 = np.random.normal(0, 1, n)
49
+
50
+ # 内生变量(与误差项相关)
51
+ e1 = np.random.normal(0, 1, n)
52
+ x = 1 + 0.5 * z1 + 0.3 * z2 + e1
53
+
54
+ # 结果变量
55
+ e2 = np.random.normal(0, 1, n)
56
+ y = 2 + 1.5 * x + e2 + 0.3 * e1 # 包含内生性
57
+
58
+ # 执行控制函数法
59
+ result = control_function_approach(
60
+ y=y.tolist(),
61
+ x=x.tolist(),
62
+ z=np.column_stack([z1, z2]).tolist()
63
+ )
64
+
65
+ # 检查结果
66
+ self.assertIsNotNone(result.estimate)
67
+ self.assertIsNotNone(result.std_error)
68
+ self.assertIsNotNone(result.p_value)
69
+ self.assertGreater(result.n_observations, 0)
70
+
71
+ def test_fixed_effects_model(self):
72
+ """测试固定效应模型"""
73
+ np.random.seed(42)
74
+ n_entities = 20
75
+ n_periods = 10
76
+ n = n_entities * n_periods
77
+
78
+ # 个体标识
79
+ entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
80
+
81
+ # 时间标识
82
+ time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
83
+
84
+ # 自变量
85
+ x = np.random.normal(0, 1, (n, 2)).tolist()
86
+
87
+ # 因变量(包含个体固定效应)
88
+ entity_effects = np.random.normal(0, 1, n_entities)
89
+ y = []
90
+ for i in range(n):
91
+ entity_idx = i // n_periods
92
+ y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
93
+ y.append(y_value)
94
+
95
+ # 执行固定效应模型
96
+ result = fixed_effects_model(
97
+ y=y,
98
+ x=x,
99
+ entity_ids=entity_ids,
100
+ time_periods=time_periods
101
+ )
102
+
103
+ # 检查结果
104
+ self.assertIsNotNone(result.estimate)
105
+ self.assertIsNotNone(result.std_error)
106
+ self.assertIsNotNone(result.p_value)
107
+ self.assertEqual(result.n_observations, n)
108
+
109
+ def test_random_effects_model(self):
110
+ """测试随机效应模型"""
111
+ np.random.seed(42)
112
+ n_entities = 20
113
+ n_periods = 10
114
+ n = n_entities * n_periods
115
+
116
+ # 个体标识
117
+ entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
118
+
119
+ # 时间标识
120
+ time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
121
+
122
+ # 自变量
123
+ x = np.random.normal(0, 1, (n, 2)).tolist()
124
+
125
+ # 因变量(包含个体随机效应)
126
+ entity_effects = np.random.normal(0, 1, n_entities)
127
+ y = []
128
+ for i in range(n):
129
+ entity_idx = i // n_periods
130
+ y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
131
+ y.append(y_value)
132
+
133
+ # 执行随机效应模型
134
+ result = random_effects_model(
135
+ y=y,
136
+ x=x,
137
+ entity_ids=entity_ids,
138
+ time_periods=time_periods
139
+ )
140
+
141
+ # 检查结果
142
+ self.assertIsNotNone(result.estimate)
143
+ self.assertIsNotNone(result.std_error)
144
+ self.assertIsNotNone(result.p_value)
145
+ self.assertEqual(result.n_observations, n)
146
+
147
+ def test_first_difference_model(self):
148
+ """测试一阶差分模型"""
149
+ np.random.seed(42)
150
+ n_entities = 20
151
+ n_periods = 10
152
+ n = n_entities * n_periods
153
+
154
+ # 个体标识
155
+ entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
156
+
157
+ # 时间标识
158
+ time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
159
+
160
+ # 生成面板数据
161
+ x = np.cumsum(np.random.normal(0, 1, n)) # 随时间累积的变量
162
+ y = 2 + 1.5 * x + np.random.normal(0, 1, n) # 因变量
163
+
164
+ # 执行一阶差分模型
165
+ result = first_difference_model(
166
+ y=y.tolist(),
167
+ x=x.tolist(),
168
+ entity_ids=entity_ids
169
+ )
170
+
171
+ # 检查结果
172
+ self.assertIsNotNone(result.estimate)
173
+ self.assertIsNotNone(result.std_error)
174
+ self.assertIsNotNone(result.p_value)
175
+ self.assertGreater(result.n_observations, 0)
176
+
177
+ def test_hausman_test(self):
178
+ """测试Hausman检验"""
179
+ np.random.seed(42)
180
+ n_entities = 20
181
+ n_periods = 10
182
+ n = n_entities * n_periods
183
+
184
+ # 个体标识
185
+ entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
186
+
187
+ # 时间标识
188
+ time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
189
+
190
+ # 自变量
191
+ x = np.random.normal(0, 1, (n, 2)).tolist()
192
+
193
+ # 因变量
194
+ y = []
195
+ for i in range(n):
196
+ y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + np.random.normal(0, 1)
197
+ y.append(y_value)
198
+
199
+ # 执行Hausman检验
200
+ try:
201
+ result = hausman_test(
202
+ y=y,
203
+ x=x,
204
+ entity_ids=entity_ids,
205
+ time_periods=time_periods
206
+ )
207
+
208
+ # 检查结果
209
+ self.assertIsNotNone(result.hausman_statistic)
210
+ self.assertIsNotNone(result.p_value)
211
+ except:
212
+ # 如果出现数值问题,跳过测试
213
+ pass
214
+
215
+ def test_difference_in_differences(self):
216
+ """测试双重差分法"""
217
+ np.random.seed(42)
218
+ n = 200
219
+
220
+ # 处理组标识(0=控制组,1=处理组)
221
+ treatment = np.concatenate([np.zeros(100), np.ones(100)]).tolist()
222
+
223
+ # 时间标识(0=处理前,1=处理后)
224
+ time_period = np.concatenate([np.zeros(50), np.ones(50), np.zeros(50), np.ones(50)]).tolist()
225
+
226
+ # 结果变量
227
+ outcome = []
228
+ for i in range(n):
229
+ if treatment[i] == 0 and time_period[i] == 0:
230
+ outcome.append(np.random.normal(10, 1))
231
+ elif treatment[i] == 0 and time_period[i] == 1:
232
+ outcome.append(np.random.normal(10, 1))
233
+ elif treatment[i] == 1 and time_period[i] == 0:
234
+ outcome.append(np.random.normal(10, 1))
235
+ else: # treatment[i] == 1 and time_period[i] == 1
236
+ outcome.append(np.random.normal(12, 1))
237
+
238
+ # 执行DID分析
239
+ result = difference_in_differences(
240
+ treatment=treatment,
241
+ time_period=time_period,
242
+ outcome=outcome
243
+ )
244
+
245
+ # 检查结果
246
+ self.assertIsNotNone(result.estimate)
247
+ self.assertIsNotNone(result.std_error)
248
+ self.assertIsNotNone(result.p_value)
249
+ self.assertGreater(result.n_observations, 0)
250
+
251
+ def test_triple_difference(self):
252
+ """测试三重差分法"""
253
+ np.random.seed(42)
254
+ n = 400
255
+
256
+ # 生成变量
257
+ treatment_group = np.tile([0, 0, 1, 1], n//4).tolist()
258
+ time_period = np.tile([0, 1, 0, 1], n//4).tolist()
259
+ cohort_group = np.tile([0, 0, 0, 0, 1, 1, 1, 1], n//8).tolist()
260
+
261
+ # 结果变量
262
+ outcome = []
263
+ for i in range(n):
264
+ if treatment_group[i] == 1 and time_period[i] == 1 and cohort_group[i] == 1:
265
+ outcome.append(np.random.normal(12, 1)) # 处理效应
266
+ else:
267
+ outcome.append(np.random.normal(10, 1))
268
+
269
+ # 执行DDD分析
270
+ result = triple_difference(
271
+ outcome=outcome,
272
+ treatment_group=treatment_group,
273
+ time_period=time_period,
274
+ cohort_group=cohort_group
275
+ )
276
+
277
+ # 检查结果
278
+ self.assertIsNotNone(result.estimate)
279
+ self.assertIsNotNone(result.std_error)
280
+ self.assertIsNotNone(result.p_value)
281
+ self.assertGreater(result.n_observations, 0)
282
+
283
+ def test_regression_discontinuity(self):
284
+ """测试断点回归设计"""
285
+ np.random.seed(42)
286
+ n = 200
287
+ cutoff = 0.0
288
+
289
+ # 运行变量
290
+ running_variable = np.random.uniform(-1, 1, n).tolist()
291
+
292
+ # 结果变量 - 在断点处有跳跃
293
+ outcome = []
294
+ for r in running_variable:
295
+ if r >= cutoff:
296
+ outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5) + 1.0) # +1.0是处理效应
297
+ else:
298
+ outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5))
299
+
300
+ # 执行RDD分析
301
+ result = regression_discontinuity(
302
+ running_variable=running_variable,
303
+ outcome=outcome,
304
+ cutoff=cutoff,
305
+ bandwidth=0.5
306
+ )
307
+
308
+ # 检查结果
309
+ self.assertIsNotNone(result.estimate)
310
+ self.assertIsNotNone(result.std_error)
311
+ self.assertIsNotNone(result.p_value)
312
+ self.assertGreater(result.n_observations, 0)
313
+ self.assertEqual(result.discontinuity_location, cutoff)
314
+
315
+ def test_propensity_score_matching(self):
316
+ """测试倾向得分匹配"""
317
+ np.random.seed(42)
318
+ n = 200
319
+
320
+ # 协变量
321
+ x1 = np.random.normal(0, 1, n)
322
+ x2 = np.random.normal(0, 1, n)
323
+ covariates = np.column_stack([x1, x2]).tolist()
324
+
325
+ # 倾向得分
326
+ pscore = 1 / (1 + np.exp(-(0.5 * x1 + 0.3 * x2)))
327
+ treatment = (np.random.uniform(0, 1, n) < pscore).astype(int).tolist()
328
+
329
+ # 结果变量
330
+ outcome = (2 + 1.5 * np.array(treatment) + 0.8 * x1 + 0.5 * x2 +
331
+ np.random.normal(0, 1, n)).tolist()
332
+
333
+ # 执行PSM
334
+ result = propensity_score_matching(
335
+ treatment=treatment,
336
+ outcome=outcome,
337
+ covariates=covariates
338
+ )
339
+
340
+ # 检查结果
341
+ self.assertIsNotNone(result.ate)
342
+ self.assertIsNotNone(result.std_error)
343
+ self.assertIsNotNone(result.p_value)
344
+ self.assertGreater(result.n_observations, 0)
345
+
346
+ def test_mediation_analysis(self):
347
+ """测试中介效应分析"""
348
+ np.random.seed(42)
349
+ n = 200
350
+
351
+ # 处理变量
352
+ treatment = np.random.normal(0, 1, n).tolist()
353
+
354
+ # 协变量
355
+ x1 = np.random.normal(0, 1, n)
356
+ x2 = np.random.normal(0, 1, n)
357
+ covariates = np.column_stack([x1, x2]).tolist()
358
+
359
+ # 中介变量
360
+ mediator = (1 + 0.8 * np.array(treatment) + 0.3 * x1 + 0.2 * x2 +
361
+ np.random.normal(0, 1, n)).tolist()
362
+
363
+ # 结果变量
364
+ outcome = (2 + 1.2 * np.array(treatment) + 0.7 * np.array(mediator) +
365
+ 0.4 * x1 + 0.3 * x2 + np.random.normal(0, 1, n)).tolist()
366
+
367
+ # 执行中介效应分析
368
+ result = mediation_analysis(
369
+ outcome=outcome,
370
+ treatment=treatment,
371
+ mediator=mediator,
372
+ covariates=covariates
373
+ )
374
+
375
+ # 检查结果
376
+ self.assertIsNotNone(result.direct_effect)
377
+ self.assertIsNotNone(result.indirect_effect)
378
+ self.assertIsNotNone(result.total_effect)
379
+ self.assertGreater(result.n_observations, 0)
380
+
381
+ def test_moderation_analysis(self):
382
+ """测试调节效应分析"""
383
+ np.random.seed(42)
384
+ n = 200
385
+
386
+ # 预测变量
387
+ predictor = np.random.normal(0, 1, n).tolist()
388
+
389
+ # 调节变量
390
+ moderator = np.random.normal(0, 1, n).tolist()
391
+
392
+ # 协变量
393
+ x1 = np.random.normal(0, 1, n)
394
+ x2 = np.random.normal(0, 1, n)
395
+ covariates = np.column_stack([x1, x2]).tolist()
396
+
397
+ # 结果变量
398
+ outcome = (2 + 1.2 * np.array(predictor) + 0.8 * np.array(moderator) +
399
+ 0.5 * np.array(predictor) * np.array(moderator) +
400
+ 0.3 * x1 + 0.2 * x2 + np.random.normal(0, 1, n)).tolist()
401
+
402
+ # 执行调节效应分析
403
+ result = moderation_analysis(
404
+ outcome=outcome,
405
+ predictor=predictor,
406
+ moderator=moderator,
407
+ covariates=covariates
408
+ )
409
+
410
+ # 检查结果
411
+ self.assertIsNotNone(result.main_effect)
412
+ self.assertIsNotNone(result.moderator_effect)
413
+ self.assertIsNotNone(result.interaction_effect)
414
+ self.assertGreater(result.n_observations, 0)
415
+
416
+
417
+ if __name__ == "__main__":
418
+ unittest.main()
@@ -0,0 +1,202 @@
1
+ """
2
+ 因果识别策略模块测试
3
+ """
4
+
5
+ import numpy as np
6
+ import unittest
7
+ from econometrics.causal_inference.causal_identification_strategy import (
8
+ instrumental_variables_2sls,
9
+ difference_in_differences,
10
+ regression_discontinuity,
11
+ fixed_effects_model,
12
+ random_effects_model
13
+ )
14
+
15
+
16
+ class TestCausalIdentificationStrategy(unittest.TestCase):
17
+
18
+ def test_instrumental_variables_2sls(self):
19
+ """测试工具变量法"""
20
+ # 生成模拟数据
21
+ np.random.seed(42)
22
+ n = 100
23
+
24
+ # 工具变量
25
+ z = np.random.normal(0, 1, n)
26
+
27
+ # 内生变量(与误差项相关)
28
+ e1 = np.random.normal(0, 1, n)
29
+ x = 1 + 0.5 * z + e1
30
+
31
+ # 结果变量
32
+ e2 = np.random.normal(0, 1, n)
33
+ y = 2 + 1.5 * x + e2 + 0.3 * e1 # 包含内生性
34
+
35
+ # 执行工具变量回归
36
+ result = instrumental_variables_2sls(
37
+ y=y.tolist(),
38
+ x=x.reshape(-1, 1).tolist(),
39
+ instruments=z.reshape(-1, 1).tolist()
40
+ )
41
+
42
+ # 检查结果
43
+ self.assertIsNotNone(result.estimate)
44
+ self.assertIsNotNone(result.std_error)
45
+ self.assertIsNotNone(result.p_value)
46
+ self.assertGreater(result.n_observations, 0)
47
+
48
+ def test_difference_in_differences(self):
49
+ """测试双重差分法"""
50
+ # 生成模拟数据
51
+ np.random.seed(42)
52
+ n = 200
53
+
54
+ # 处理组标识(0=控制组,1=处理组)
55
+ treatment = np.concatenate([np.zeros(100), np.ones(100)]).tolist()
56
+
57
+ # 时间标识(0=处理前,1=处理后)
58
+ time_period = np.concatenate([np.zeros(50), np.ones(50), np.zeros(50), np.ones(50)]).tolist()
59
+
60
+ # 结果变量
61
+ # 控制组处理前均值为10,处理后为10
62
+ # 处理组处理前均值为10,处理后为12(处理效应为2)
63
+ outcome = []
64
+ for i in range(n):
65
+ if treatment[i] == 0 and time_period[i] == 0:
66
+ outcome.append(np.random.normal(10, 1))
67
+ elif treatment[i] == 0 and time_period[i] == 1:
68
+ outcome.append(np.random.normal(10, 1))
69
+ elif treatment[i] == 1 and time_period[i] == 0:
70
+ outcome.append(np.random.normal(10, 1))
71
+ else: # treatment[i] == 1 and time_period[i] == 1
72
+ outcome.append(np.random.normal(12, 1))
73
+
74
+ # 执行DID分析
75
+ result = difference_in_differences(
76
+ treatment=treatment,
77
+ time_period=time_period,
78
+ outcome=outcome
79
+ )
80
+
81
+ # 检查结果
82
+ self.assertIsNotNone(result.estimate)
83
+ self.assertIsNotNone(result.std_error)
84
+ self.assertIsNotNone(result.p_value)
85
+ self.assertGreater(result.n_observations, 0)
86
+
87
+ def test_regression_discontinuity(self):
88
+ """测试断点回归设计"""
89
+ # 生成模拟数据
90
+ np.random.seed(42)
91
+ n = 200
92
+ cutoff = 0.0
93
+
94
+ # 运行变量
95
+ running_variable = np.random.uniform(-1, 1, n).tolist()
96
+
97
+ # 结果变量 - 在断点处有跳跃
98
+ outcome = []
99
+ for r in running_variable:
100
+ if r >= cutoff:
101
+ outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5) + 1.0) # +1.0是处理效应
102
+ else:
103
+ outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5))
104
+
105
+ # 执行RDD分析
106
+ result = regression_discontinuity(
107
+ running_variable=running_variable,
108
+ outcome=outcome,
109
+ cutoff=cutoff,
110
+ bandwidth=0.5
111
+ )
112
+
113
+ # 检查结果
114
+ self.assertIsNotNone(result.estimate)
115
+ self.assertIsNotNone(result.std_error)
116
+ self.assertIsNotNone(result.p_value)
117
+ self.assertGreater(result.n_observations, 0)
118
+ self.assertEqual(result.discontinuity_location, cutoff)
119
+
120
+ def test_fixed_effects_model(self):
121
+ """测试固定效应模型"""
122
+ # 生成面板数据
123
+ np.random.seed(42)
124
+ n_entities = 20
125
+ n_periods = 10
126
+ n = n_entities * n_periods
127
+
128
+ # 个体标识
129
+ entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
130
+
131
+ # 时间标识
132
+ time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
133
+
134
+ # 自变量
135
+ x = np.random.normal(0, 1, (n, 2)).tolist()
136
+
137
+ # 因变量(包含个体固定效应)
138
+ entity_effects = np.random.normal(0, 1, n_entities)
139
+ y = []
140
+ for i in range(n):
141
+ entity_idx = i // n_periods
142
+ y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
143
+ y.append(y_value)
144
+
145
+ # 执行固定效应模型
146
+ result = fixed_effects_model(
147
+ y=y,
148
+ x=x,
149
+ entity_ids=entity_ids,
150
+ time_periods=time_periods
151
+ )
152
+
153
+ # 检查结果
154
+ self.assertIsNotNone(result.estimate)
155
+ self.assertIsNotNone(result.std_error)
156
+ self.assertIsNotNone(result.p_value)
157
+ self.assertEqual(result.n_observations, n)
158
+ self.assertEqual(result.model_type, "FE")
159
+
160
+ def test_random_effects_model(self):
161
+ """测试随机效应模型"""
162
+ # 生成面板数据
163
+ np.random.seed(42)
164
+ n_entities = 20
165
+ n_periods = 10
166
+ n = n_entities * n_periods
167
+
168
+ # 个体标识
169
+ entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
170
+
171
+ # 时间标识
172
+ time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
173
+
174
+ # 自变量
175
+ x = np.random.normal(0, 1, (n, 2)).tolist()
176
+
177
+ # 因变量(包含个体随机效应)
178
+ entity_effects = np.random.normal(0, 1, n_entities)
179
+ y = []
180
+ for i in range(n):
181
+ entity_idx = i // n_periods
182
+ y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
183
+ y.append(y_value)
184
+
185
+ # 执行随机效应模型
186
+ result = random_effects_model(
187
+ y=y,
188
+ x=x,
189
+ entity_ids=entity_ids,
190
+ time_periods=time_periods
191
+ )
192
+
193
+ # 检查结果
194
+ self.assertIsNotNone(result.estimate)
195
+ self.assertIsNotNone(result.std_error)
196
+ self.assertIsNotNone(result.p_value)
197
+ self.assertEqual(result.n_observations, n)
198
+ self.assertEqual(result.model_type, "RE")
199
+
200
+
201
+ if __name__ == "__main__":
202
+ unittest.main()
@@ -0,0 +1,53 @@
1
+ """
2
+ 双重差分法测试
3
+ """
4
+
5
+ import numpy as np
6
+ import unittest
7
+ from econometrics.causal_inference.causal_identification_strategy import difference_in_differences
8
+
9
+
10
+ class TestDifferenceInDifferences(unittest.TestCase):
11
+
12
+ def test_difference_in_differences(self):
13
+ """测试双重差分法"""
14
+ # 生成模拟数据
15
+ np.random.seed(42)
16
+ n = 200
17
+
18
+ # 处理组标识(0=控制组,1=处理组)
19
+ treatment = np.concatenate([np.zeros(100), np.ones(100)]).tolist()
20
+
21
+ # 时间标识(0=处理前,1=处理后)
22
+ time_period = np.concatenate([np.zeros(50), np.ones(50), np.zeros(50), np.ones(50)]).tolist()
23
+
24
+ # 结果变量
25
+ # 控制组处理前均值为10,处理后为10
26
+ # 处理组处理前均值为10,处理后为12(处理效应为2)
27
+ outcome = []
28
+ for i in range(n):
29
+ if treatment[i] == 0 and time_period[i] == 0:
30
+ outcome.append(np.random.normal(10, 1))
31
+ elif treatment[i] == 0 and time_period[i] == 1:
32
+ outcome.append(np.random.normal(10, 1))
33
+ elif treatment[i] == 1 and time_period[i] == 0:
34
+ outcome.append(np.random.normal(10, 1))
35
+ else: # treatment[i] == 1 and time_period[i] == 1
36
+ outcome.append(np.random.normal(12, 1))
37
+
38
+ # 执行DID分析
39
+ result = difference_in_differences(
40
+ treatment=treatment,
41
+ time_period=time_period,
42
+ outcome=outcome
43
+ )
44
+
45
+ # 检查结果
46
+ self.assertIsNotNone(result.estimate)
47
+ self.assertIsNotNone(result.std_error)
48
+ self.assertIsNotNone(result.p_value)
49
+ self.assertGreater(result.n_observations, 0)
50
+
51
+
52
+ if __name__ == "__main__":
53
+ unittest.main()