aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. PKG-INFO +344 -322
  2. README.md +335 -320
  3. __init__.py +1 -1
  4. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  6. cli.py +4 -0
  7. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  8. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  9. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  10. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  11. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  18. econometrics/causal_inference/__init__.py +66 -0
  19. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  20. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  21. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  22. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  23. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  24. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  25. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  26. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  27. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  28. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  29. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  30. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  31. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  32. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  33. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  34. econometrics/distribution_analysis/__init__.py +28 -0
  35. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  36. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  37. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  38. econometrics/missing_data/__init__.py +18 -0
  39. econometrics/missing_data/imputation_methods.py +219 -0
  40. econometrics/nonparametric/__init__.py +35 -0
  41. econometrics/nonparametric/gam_model.py +117 -0
  42. econometrics/nonparametric/kernel_regression.py +161 -0
  43. econometrics/nonparametric/quantile_regression.py +249 -0
  44. econometrics/nonparametric/spline_regression.py +100 -0
  45. econometrics/spatial_econometrics/__init__.py +68 -0
  46. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  47. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  48. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  49. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  50. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  51. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  52. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  53. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  54. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  55. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  56. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  57. econometrics/statistical_inference/__init__.py +21 -0
  58. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  59. econometrics/statistical_inference/permutation_test.py +177 -0
  60. econometrics/survival_analysis/__init__.py +18 -0
  61. econometrics/survival_analysis/survival_models.py +259 -0
  62. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  63. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  64. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  65. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  66. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  67. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  68. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  69. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  70. pyproject.toml +9 -2
  71. server.py +15 -1
  72. tools/__init__.py +75 -1
  73. tools/causal_inference_adapter.py +658 -0
  74. tools/distribution_analysis_adapter.py +121 -0
  75. tools/gwr_simple_adapter.py +54 -0
  76. tools/machine_learning_adapter.py +567 -0
  77. tools/mcp_tool_groups/__init__.py +15 -1
  78. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  79. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  80. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  81. tools/mcp_tool_groups/microecon_tools.py +325 -0
  82. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  83. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  84. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  85. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  86. tools/mcp_tools_registry.py +13 -3
  87. tools/microecon_adapter.py +412 -0
  88. tools/missing_data_adapter.py +73 -0
  89. tools/nonparametric_adapter.py +190 -0
  90. tools/spatial_econometrics_adapter.py +318 -0
  91. tools/statistical_inference_adapter.py +90 -0
  92. tools/survival_analysis_adapter.py +46 -0
  93. aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
  94. aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
  95. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
  96. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
  97. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,282 @@
1
+ """
2
+ 受限因变量模型模块
3
+ 基于statsmodels等现有库实现
4
+ """
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ from scipy import stats
9
+ try:
10
+ import statsmodels.api as sm
11
+ from statsmodels.regression.linear_model import OLS
12
+ HAS_STATSMODELS = True
13
+ except ImportError:
14
+ HAS_STATSMODELS = False
15
+ OLS = None
16
+
17
+ try:
18
+ from statsmodels.base.model import GenericLikelihoodModel
19
+ HAS_GENERIC_MODEL = True
20
+ except ImportError:
21
+ HAS_GENERIC_MODEL = False
22
+
23
+
24
+ class _PlaceholderModel:
25
+ def __init__(self, *args, **kwargs):
26
+ if not HAS_STATSMODELS:
27
+ raise ImportError("需要安装statsmodels库: pip install statsmodels")
28
+
29
+ def fit(self, *args, **kwargs):
30
+ pass
31
+
32
+
33
+ class TobitModel:
34
+ """
35
+ Tobit模型(截断回归模型)
36
+ 由于statsmodels中没有内置的Tobit模型,这里提供一个基于GenericLikelihoodModel的实现
37
+ """
38
+
39
+ def __init__(self, lower_bound=0, upper_bound=None):
40
+ """
41
+ 初始化Tobit模型
42
+
43
+ 参数:
44
+ lower_bound: 下界阈值,默认为0
45
+ upper_bound: 上界阈值,默认为None(无上界)
46
+ """
47
+ if not HAS_STATSMODELS:
48
+ raise ImportError("需要安装statsmodels库: pip install statsmodels")
49
+
50
+ if not HAS_GENERIC_MODEL:
51
+ raise ImportError("需要statsmodels的GenericLikelihoodModel支持")
52
+
53
+ self.lower_bound = lower_bound
54
+ self.upper_bound = upper_bound
55
+ self.model_ = None
56
+ self.results_ = None
57
+ self.fitted_ = False
58
+
59
+ def fit(self, X, y):
60
+ """拟合Tobit模型"""
61
+ X = np.array(X)
62
+ y = np.array(y)
63
+
64
+ # 添加常数项
65
+ X_with_const = sm.add_constant(X)
66
+
67
+ # 定义Tobit似然函数
68
+ class TobitLikelihoodModel(GenericLikelihoodModel):
69
+ def __init__(self, endog, exog, lower_bound=0, upper_bound=None, **kwds):
70
+ self.lower_bound = lower_bound
71
+ self.upper_bound = upper_bound
72
+ super(TobitLikelihoodModel, self).__init__(endog, exog, **kwds)
73
+
74
+ def loglikeobs(self, params):
75
+ # 分离系数和sigma
76
+ beta = params[:-1]
77
+ sigma = params[-1]
78
+
79
+ if sigma <= 0:
80
+ return np.full_like(self.endog, -np.inf)
81
+
82
+ # 预测值
83
+ xb = np.dot(self.exog, beta)
84
+ z = (self.endog - xb) / sigma
85
+
86
+ # 计算对数似然
87
+ if self.upper_bound is None:
88
+ # 只有下界的情况
89
+ censored = self.endog <= self.lower_bound
90
+ uncensored = ~censored
91
+
92
+ ll = np.zeros_like(self.endog)
93
+ # 截断观测的对数似然
94
+ ll[censored] = stats.norm.logcdf((self.lower_bound - xb[censored]) / sigma)
95
+ # 未截断观测的对数似然
96
+ ll[uncensored] = -0.5 * np.log(2 * np.pi * sigma**2) - 0.5 * z[uncensored]**2
97
+ else:
98
+ # 双边截断的情况
99
+ left_censored = self.endog <= self.lower_bound
100
+ right_censored = self.endog >= self.upper_bound
101
+ uncensored = ~(left_censored | right_censored)
102
+
103
+ ll = np.zeros_like(self.endog)
104
+ # 左截断观测的对数似然
105
+ ll[left_censored] = stats.norm.logcdf((self.lower_bound - xb[left_censored]) / sigma)
106
+ # 右截断观测的对数似然
107
+ ll[right_censored] = stats.norm.logsf((self.upper_bound - xb[right_censored]) / sigma)
108
+ # 未截断观测的对数似然
109
+ ll[uncensored] = -0.5 * np.log(2 * np.pi * sigma**2) - 0.5 * z[uncensored]**2
110
+
111
+ return ll
112
+
113
+ # 创建并拟合模型
114
+ self.model_ = TobitLikelihoodModel(
115
+ endog=y,
116
+ exog=X_with_const,
117
+ lower_bound=self.lower_bound,
118
+ upper_bound=self.upper_bound
119
+ )
120
+
121
+ # 初始化参数
122
+ n_features = X_with_const.shape[1]
123
+ initial_params = np.concatenate([
124
+ np.zeros(n_features), # beta
125
+ [np.std(y[y > self.lower_bound]) if self.upper_bound is None else np.std(y)] # sigma
126
+ ])
127
+
128
+ self.results_ = self.model_.fit(start_params=initial_params, method='bfgs', disp=0)
129
+ self.fitted_ = True
130
+ return self
131
+
132
+ def predict(self, X):
133
+ """预测期望值"""
134
+ if not self.fitted_:
135
+ raise ValueError("模型尚未拟合")
136
+ X = np.array(X)
137
+ X_with_const = sm.add_constant(X)
138
+
139
+ # 手动计算预测值
140
+ beta = self.results_.params[:-1] # 排除sigma参数
141
+ sigma = self.results_.params[-1]
142
+
143
+ xb = np.dot(X_with_const, beta)
144
+
145
+ if self.upper_bound is None:
146
+ # 只有下界的情况
147
+ z = (self.lower_bound - xb) / sigma
148
+ lambda_val = stats.norm.pdf(z) / np.clip(1 - stats.norm.cdf(z), 1e-10, 1)
149
+ return xb + sigma * lambda_val
150
+ else:
151
+ # 双边截断的情况
152
+ z_lower = (self.lower_bound - xb) / sigma
153
+ z_upper = (self.upper_bound - xb) / sigma
154
+
155
+ lambda_lower = stats.norm.pdf(z_lower) / np.clip(stats.norm.cdf(z_upper) - stats.norm.cdf(z_lower), 1e-10, 1)
156
+ lambda_upper = stats.norm.pdf(z_upper) / np.clip(stats.norm.cdf(z_upper) - stats.norm.cdf(z_lower), 1e-10, 1)
157
+
158
+ return xb + sigma * (lambda_lower - lambda_upper)
159
+
160
+ def predict_linear(self, X):
161
+ """预测线性预测值"""
162
+ if not self.fitted_:
163
+ raise ValueError("模型尚未拟合")
164
+ X = np.array(X)
165
+ X_with_const = sm.add_constant(X)
166
+ xb = np.dot(X_with_const, self.results_.params[:-1]) # 排除sigma参数
167
+ return xb
168
+
169
+ def summary(self):
170
+ """返回模型摘要"""
171
+ if not self.fitted_:
172
+ raise ValueError("模型尚未拟合")
173
+ return self.results_.summary()
174
+
175
+
176
+ class HeckmanModel:
177
+ """
178
+ Heckman两阶段选择模型 (基于statsmodels实现)
179
+ """
180
+
181
+ def __init__(self):
182
+ self.selection_model_ = None
183
+ self.selection_results_ = None
184
+ self.outcome_model_ = None
185
+ self.outcome_results_ = None
186
+ self.fitted_ = False
187
+
188
+ def fit(self, X_select, Z, y, s):
189
+ """
190
+ 拟合Heckman模型
191
+
192
+ 参数:
193
+ X_select: 选择方程的解释变量矩阵
194
+ Z: 结果方程的解释变量矩阵
195
+ y: 结果变量向量(仅对选择样本可观测)
196
+ s: 选择指示变量向量(1表示被选择,0表示未被选择)
197
+ """
198
+ if not HAS_STATSMODELS:
199
+ raise ImportError("需要安装statsmodels库: pip install statsmodels")
200
+
201
+ X_select = np.array(X_select)
202
+ Z = np.array(Z)
203
+ y = np.array(y)
204
+ s = np.array(s)
205
+
206
+ # 第一阶段:Probit模型估计选择方程
207
+ X_select_with_const = sm.add_constant(X_select)
208
+ self.selection_model_ = sm.Probit(s, X_select_with_const)
209
+ self.selection_results_ = self.selection_model_.fit(disp=0)
210
+
211
+ # 计算逆米尔斯比率 (Inverse Mills Ratio)
212
+ X_select_linpred = np.dot(X_select_with_const, self.selection_results_.params)
213
+ mills_ratio = stats.norm.pdf(X_select_linpred) / np.clip(stats.norm.cdf(X_select_linpred), 1e-10, 1-1e-10)
214
+ # 对于未被选择的样本,米尔斯比率为0
215
+ mills_ratio = mills_ratio * s
216
+
217
+ # 第二阶段:加入逆米尔斯比率的结果方程OLS
218
+ Z_with_mills = np.column_stack([Z, mills_ratio])
219
+ Z_with_mills_const = sm.add_constant(Z_with_mills)
220
+
221
+ # 只对被选择的样本进行回归
222
+ selected_mask = s == 1
223
+ Z_selected = Z_with_mills_const[selected_mask]
224
+ y_selected = y[selected_mask]
225
+
226
+ self.outcome_model_ = OLS(y_selected, Z_selected)
227
+ self.outcome_results_ = self.outcome_model_.fit()
228
+
229
+ self.fitted_ = True
230
+ return self
231
+
232
+ def predict(self, X_select, Z):
233
+ """预测结果值"""
234
+ if not self.fitted_:
235
+ raise ValueError("模型尚未拟合")
236
+
237
+ X_select = np.array(X_select)
238
+ Z = np.array(Z)
239
+
240
+ # 添加常数项
241
+ X_select_with_const = sm.add_constant(X_select)
242
+
243
+ # 计算逆米尔斯比率
244
+ X_select_linpred = np.dot(X_select_with_const, self.selection_results_.params)
245
+ mills_ratio = stats.norm.pdf(X_select_linpred) / np.clip(stats.norm.cdf(X_select_linpred), 1e-10, 1-1e-10)
246
+
247
+ # 构建预测矩阵:Z + 逆米尔斯比率 + 常数项
248
+ Z_with_mills = np.column_stack([Z, mills_ratio])
249
+ Z_with_mills_const = sm.add_constant(Z_with_mills)
250
+
251
+ # 计算结果方程预测值
252
+ outcome_pred = self.outcome_results_.predict(Z_with_mills_const)
253
+
254
+ return outcome_pred
255
+
256
+ def summary(self):
257
+ """返回模型摘要"""
258
+ if not self.fitted_:
259
+ raise ValueError("模型尚未拟合")
260
+ return {
261
+ 'selection_summary': self.selection_results_.summary(),
262
+ 'outcome_summary': self.outcome_results_.summary()
263
+ }
264
+
265
+
266
+ # 如果statsmodels不可用,则使用占位符
267
+ if not HAS_STATSMODELS:
268
+ TobitModel = _PlaceholderModel
269
+ HeckmanModel = _PlaceholderModel
270
+
271
+ def multinomial_logit():
272
+ """
273
+ 多项Logit模型占位符
274
+ """
275
+ pass
276
+
277
+
278
+ def nested_logit():
279
+ """
280
+ 嵌套Logit模型占位符
281
+ """
282
+ pass
@@ -0,0 +1,21 @@
1
+ """
2
+ 统计推断技术模块
3
+ 提供重采样、模拟和渐近推断方法
4
+ """
5
+
6
+ from .bootstrap_methods import (
7
+ bootstrap_inference,
8
+ BootstrapResult
9
+ )
10
+
11
+ from .permutation_test import (
12
+ permutation_test,
13
+ PermutationTestResult
14
+ )
15
+
16
+ __all__ = [
17
+ 'bootstrap_inference',
18
+ 'BootstrapResult',
19
+ 'permutation_test',
20
+ 'PermutationTestResult'
21
+ ]
@@ -0,0 +1,162 @@
1
+ """
2
+ Bootstrap重采样推断方法
3
+ 基于 scipy.stats 实现多种Bootstrap方法
4
+ """
5
+
6
+ from typing import List, Optional, Callable, Tuple, Dict
7
+ from pydantic import BaseModel, Field
8
+ import numpy as np
9
+
10
+ try:
11
+ from scipy import stats
12
+ SCIPY_AVAILABLE = True
13
+ except ImportError:
14
+ SCIPY_AVAILABLE = False
15
+ stats = None
16
+
17
+
18
+ class BootstrapResult(BaseModel):
19
+ """Bootstrap推断结果"""
20
+ statistic: float = Field(..., description="统计量估计值")
21
+ bootstrap_mean: float = Field(..., description="Bootstrap均值")
22
+ bootstrap_std: float = Field(..., description="Bootstrap标准误")
23
+ confidence_interval: Tuple[float, float] = Field(..., description="置信区间")
24
+ bias: float = Field(..., description="偏差估计")
25
+ confidence_level: float = Field(..., description="置信水平")
26
+ n_bootstrap: int = Field(..., description="Bootstrap重采样次数")
27
+ method: str = Field(..., description="Bootstrap方法")
28
+ bootstrap_distribution: List[float] = Field(..., description="Bootstrap统计量分布(前100个)")
29
+ summary: str = Field(..., description="摘要信息")
30
+
31
+
32
+ def bootstrap_inference(
33
+ data: List[float],
34
+ statistic_func: Optional[str] = "mean",
35
+ n_bootstrap: int = 1000,
36
+ confidence_level: float = 0.95,
37
+ method: str = "percentile",
38
+ random_state: Optional[int] = None
39
+ ) -> BootstrapResult:
40
+ """
41
+ Bootstrap置信区间估计
42
+
43
+ Args:
44
+ data: 样本数据
45
+ statistic_func: 统计量函数 - "mean"(均值), "median"(中位数),
46
+ "std"(标准差), "var"(方差)
47
+ n_bootstrap: Bootstrap重采样次数
48
+ confidence_level: 置信水平
49
+ method: 置信区间方法 - "percentile"(百分位法), "bca"(BCa法)
50
+ random_state: 随机种子
51
+
52
+ Returns:
53
+ BootstrapResult: Bootstrap推断结果
54
+
55
+ Raises:
56
+ ImportError: scipy库未安装
57
+ ValueError: 输入数据无效
58
+ """
59
+ if not SCIPY_AVAILABLE:
60
+ raise ImportError("scipy库未安装。请运行: pip install scipy")
61
+
62
+ # 输入验证
63
+ if not data:
64
+ raise ValueError("data不能为空")
65
+
66
+ # 数据准备
67
+ data_arr = np.array(data, dtype=np.float64)
68
+ n = len(data_arr)
69
+
70
+ # 设置随机种子
71
+ if random_state is not None:
72
+ np.random.seed(random_state)
73
+
74
+ # 定义统计量函数
75
+ if statistic_func == "mean":
76
+ stat_fn = np.mean
77
+ elif statistic_func == "median":
78
+ stat_fn = np.median
79
+ elif statistic_func == "std":
80
+ stat_fn = lambda x: np.std(x, ddof=1)
81
+ elif statistic_func == "var":
82
+ stat_fn = lambda x: np.var(x, ddof=1)
83
+ elif callable(statistic_func):
84
+ stat_fn = statistic_func
85
+ else:
86
+ raise ValueError(f"不支持的统计量: {statistic_func}")
87
+
88
+ # 计算原始统计量
89
+ original_stat = float(stat_fn(data_arr))
90
+
91
+ # 执行Bootstrap重采样
92
+ bootstrap_stats = []
93
+ for _ in range(n_bootstrap):
94
+ # 有放回抽样
95
+ bootstrap_sample = np.random.choice(data_arr, size=n, replace=True)
96
+ bootstrap_stat = stat_fn(bootstrap_sample)
97
+ bootstrap_stats.append(bootstrap_stat)
98
+
99
+ bootstrap_stats = np.array(bootstrap_stats)
100
+
101
+ # 计算Bootstrap统计量
102
+ bootstrap_mean = float(bootstrap_stats.mean())
103
+ bootstrap_std = float(bootstrap_stats.std(ddof=1))
104
+ bias = bootstrap_mean - original_stat
105
+
106
+ # 计算置信区间
107
+ alpha = 1 - confidence_level
108
+
109
+ if method == "percentile":
110
+ # 百分位法
111
+ lower_percentile = alpha / 2 * 100
112
+ upper_percentile = (1 - alpha / 2) * 100
113
+ ci_lower = float(np.percentile(bootstrap_stats, lower_percentile))
114
+ ci_upper = float(np.percentile(bootstrap_stats, upper_percentile))
115
+ elif method == "normal":
116
+ # 正态近似法
117
+ z_score = stats.norm.ppf(1 - alpha / 2)
118
+ ci_lower = original_stat - z_score * bootstrap_std
119
+ ci_upper = original_stat + z_score * bootstrap_std
120
+ elif method == "basic":
121
+ # 基本Bootstrap法
122
+ lower_percentile = alpha / 2 * 100
123
+ upper_percentile = (1 - alpha / 2) * 100
124
+ ci_lower = 2 * original_stat - float(np.percentile(bootstrap_stats, upper_percentile))
125
+ ci_upper = 2 * original_stat - float(np.percentile(bootstrap_stats, lower_percentile))
126
+ else:
127
+ raise ValueError(f"不支持的置信区间方法: {method}")
128
+
129
+ # 保存前100个Bootstrap统计量(用于展示)
130
+ bootstrap_dist_sample = bootstrap_stats[:min(100, len(bootstrap_stats))].tolist()
131
+
132
+ # 生成摘要
133
+ summary = f"""Bootstrap推断:
134
+ - 样本量: {n}
135
+ - Bootstrap次数: {n_bootstrap}
136
+ - 统计量: {statistic_func}
137
+ - 置信区间方法: {method}
138
+
139
+ 估计结果:
140
+ - 统计量估计: {original_stat:.4f}
141
+ - Bootstrap均值: {bootstrap_mean:.4f}
142
+ - Bootstrap标准误: {bootstrap_std:.4f}
143
+ - 偏差: {bias:.4f}
144
+
145
+ {int(confidence_level*100)}% 置信区间:
146
+ - 下界: {ci_lower:.4f}
147
+ - 上界: {ci_upper:.4f}
148
+ - 区间宽度: {ci_upper - ci_lower:.4f}
149
+ """
150
+
151
+ return BootstrapResult(
152
+ statistic=original_stat,
153
+ bootstrap_mean=bootstrap_mean,
154
+ bootstrap_std=bootstrap_std,
155
+ confidence_interval=(ci_lower, ci_upper),
156
+ bias=bias,
157
+ confidence_level=confidence_level,
158
+ n_bootstrap=n_bootstrap,
159
+ method=method,
160
+ bootstrap_distribution=bootstrap_dist_sample,
161
+ summary=summary
162
+ )
@@ -0,0 +1,177 @@
1
+ """
2
+ 置换检验 (Permutation Test)
3
+ 非参数假设检验方法
4
+ 基于 scipy.stats 实现
5
+ """
6
+
7
+ from typing import List, Optional
8
+ from pydantic import BaseModel, Field
9
+ import numpy as np
10
+
11
+ try:
12
+ from scipy import stats
13
+ SCIPY_AVAILABLE = True
14
+ except ImportError:
15
+ SCIPY_AVAILABLE = False
16
+ stats = None
17
+
18
+
19
+ class PermutationTestResult(BaseModel):
20
+ """置换检验结果"""
21
+ statistic: float = Field(..., description="观测统计量")
22
+ p_value: float = Field(..., description="P值")
23
+ null_distribution_mean: float = Field(..., description="零假设分布均值")
24
+ null_distribution_std: float = Field(..., description="零假设分布标准差")
25
+ n_permutations: int = Field(..., description="置换次数")
26
+ alternative: str = Field(..., description="备择假设")
27
+ test_type: str = Field(..., description="检验类型")
28
+ n_sample_a: int = Field(..., description="样本A大小")
29
+ n_sample_b: int = Field(..., description="样本B大小")
30
+ permutation_distribution: List[float] = Field(..., description="置换分布(前100个)")
31
+ summary: str = Field(..., description="摘要信息")
32
+
33
+
34
+ def permutation_test(
35
+ sample_a: List[float],
36
+ sample_b: List[float],
37
+ test_type: str = "mean_difference",
38
+ alternative: str = "two-sided",
39
+ n_permutations: int = 10000,
40
+ random_state: Optional[int] = None
41
+ ) -> PermutationTestResult:
42
+ """
43
+ 置换检验(两样本)
44
+
45
+ Args:
46
+ sample_a: 样本A
47
+ sample_b: 样本B
48
+ test_type: 检验类型 - "mean_difference"(均值差异),
49
+ "median_difference"(中位数差异),
50
+ "variance_ratio"(方差比)
51
+ alternative: 备择假设 - "two-sided", "less", "greater"
52
+ n_permutations: 置换次数
53
+ random_state: 随机种子
54
+
55
+ Returns:
56
+ PermutationTestResult: 置换检验结果
57
+
58
+ Raises:
59
+ ImportError: scipy库未安装
60
+ ValueError: 输入数据无效
61
+ """
62
+ if not SCIPY_AVAILABLE:
63
+ raise ImportError("scipy库未安装。请运行: pip install scipy")
64
+
65
+ # 输入验证
66
+ if not sample_a or not sample_b:
67
+ raise ValueError("两个样本都不能为空")
68
+
69
+ # 数据准备
70
+ a = np.array(sample_a, dtype=np.float64)
71
+ b = np.array(sample_b, dtype=np.float64)
72
+
73
+ n_a = len(a)
74
+ n_b = len(b)
75
+
76
+ # 设置随机种子
77
+ if random_state is not None:
78
+ np.random.seed(random_state)
79
+
80
+ # 合并数据
81
+ combined = np.concatenate([a, b])
82
+ n_total = len(combined)
83
+
84
+ # 定义统计量函数
85
+ if test_type == "mean_difference":
86
+ def stat_func(x, y):
87
+ return np.mean(x) - np.mean(y)
88
+ elif test_type == "median_difference":
89
+ def stat_func(x, y):
90
+ return np.median(x) - np.median(y)
91
+ elif test_type == "variance_ratio":
92
+ def stat_func(x, y):
93
+ return np.var(x, ddof=1) / np.var(y, ddof=1) if np.var(y, ddof=1) > 0 else 0
94
+ else:
95
+ raise ValueError(f"不支持的检验类型: {test_type}")
96
+
97
+ # 计算观测统计量
98
+ observed_stat = stat_func(a, b)
99
+
100
+ # 执行置换检验
101
+ perm_stats = []
102
+ for _ in range(n_permutations):
103
+ # 随机置换
104
+ perm = np.random.permutation(combined)
105
+ perm_a = perm[:n_a]
106
+ perm_b = perm[n_a:]
107
+ perm_stat = stat_func(perm_a, perm_b)
108
+ perm_stats.append(perm_stat)
109
+
110
+ perm_stats = np.array(perm_stats)
111
+
112
+ # 计算p值
113
+ if alternative == "two-sided":
114
+ p_value = np.mean(np.abs(perm_stats) >= np.abs(observed_stat))
115
+ elif alternative == "greater":
116
+ p_value = np.mean(perm_stats >= observed_stat)
117
+ elif alternative == "less":
118
+ p_value = np.mean(perm_stats <= observed_stat)
119
+ else:
120
+ raise ValueError(f"不支持的备择假设: {alternative}")
121
+
122
+ # 零假设分布的统计特征
123
+ null_mean = float(perm_stats.mean())
124
+ null_std = float(perm_stats.std(ddof=1))
125
+
126
+ # 保存前100个置换统计量
127
+ perm_dist_sample = perm_stats[:min(100, len(perm_stats))].tolist()
128
+
129
+ # 判断显著性
130
+ if p_value < 0.01:
131
+ significance = "高度显著"
132
+ elif p_value < 0.05:
133
+ significance = "显著"
134
+ elif p_value < 0.10:
135
+ significance = "边际显著"
136
+ else:
137
+ significance = "不显著"
138
+
139
+ # 生成摘要
140
+ test_names = {
141
+ "mean_difference": "均值差异",
142
+ "median_difference": "中位数差异",
143
+ "variance_ratio": "方差比"
144
+ }
145
+
146
+ summary = f"""置换检验:
147
+ - 检验类型: {test_names.get(test_type, test_type)}
148
+ - 备择假设: {alternative}
149
+ - 置换次数: {n_permutations}
150
+
151
+ 样本信息:
152
+ - 样本A: n={n_a}, 均值={a.mean():.4f}
153
+ - 样本B: n={n_b}, 均值={b.mean():.4f}
154
+
155
+ 检验结果:
156
+ - 观测统计量: {observed_stat:.4f}
157
+ - P值: {p_value:.4f}
158
+ - 显著性: {significance}
159
+
160
+ 零假设分布:
161
+ - 均值: {null_mean:.4f}
162
+ - 标准差: {null_std:.4f}
163
+ """
164
+
165
+ return PermutationTestResult(
166
+ statistic=float(observed_stat),
167
+ p_value=float(p_value),
168
+ null_distribution_mean=null_mean,
169
+ null_distribution_std=null_std,
170
+ n_permutations=n_permutations,
171
+ alternative=alternative,
172
+ test_type=test_type,
173
+ n_sample_a=n_a,
174
+ n_sample_b=n_b,
175
+ permutation_distribution=perm_dist_sample,
176
+ summary=summary
177
+ )
@@ -0,0 +1,18 @@
1
+ """
2
+ 生存分析模块
3
+ 分析事件发生时间数据
4
+ """
5
+
6
+ from .survival_models import (
7
+ kaplan_meier_estimation_simple,
8
+ cox_regression_simple,
9
+ KaplanMeierResult,
10
+ CoxRegressionResult
11
+ )
12
+
13
+ __all__ = [
14
+ 'kaplan_meier_estimation_simple',
15
+ 'cox_regression_simple',
16
+ 'KaplanMeierResult',
17
+ 'CoxRegressionResult'
18
+ ]