aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -1,214 +0,0 @@
1
- """
2
- 回归分析工具
3
- """
4
-
5
- import numpy as np
6
- import pandas as pd
7
- import statsmodels.api as sm
8
- from typing import List, Dict, Any, Optional
9
- from pydantic import BaseModel
10
-
11
-
12
- class OLSResult(BaseModel):
13
- """OLS回归结果"""
14
- coefficients: Dict[str, Dict[str, float]]
15
- rsquared: float
16
- rsquared_adj: float
17
- f_statistic: float
18
- f_pvalue: float
19
- aic: float
20
- bic: float
21
- n_obs: int
22
-
23
-
24
- class DiagnosticTests(BaseModel):
25
- """模型诊断结果"""
26
- jb_statistic: float
27
- jb_pvalue: float
28
- bp_statistic: float
29
- bp_pvalue: float
30
- dw_statistic: float
31
- vif: Dict[str, float]
32
-
33
-
34
- def perform_ols_regression(
35
- y: List[float],
36
- X: List[List[float]],
37
- feature_names: Optional[List[str]] = None,
38
- add_constant: bool = True
39
- ) -> OLSResult:
40
- """执行OLS回归分析"""
41
- # 准备数据
42
- X_matrix = np.array(X)
43
- y_vector = np.array(y)
44
-
45
- if add_constant:
46
- X_matrix = sm.add_constant(X_matrix)
47
-
48
- # 拟合模型
49
- model = sm.OLS(y_vector, X_matrix).fit()
50
-
51
- # 构建结果
52
- result = OLSResult(
53
- coefficients={},
54
- rsquared=model.rsquared,
55
- rsquared_adj=model.rsquared_adj,
56
- f_statistic=model.fvalue,
57
- f_pvalue=model.f_pvalue,
58
- aic=model.aic,
59
- bic=model.bic,
60
- n_obs=model.nobs
61
- )
62
-
63
- # 添加系数详情
64
- conf_int = model.conf_int()
65
- for i, coef in enumerate(model.params):
66
- var_name = "const" if i == 0 and add_constant else feature_names[i-1] if feature_names else f"x{i}"
67
- result.coefficients[var_name] = {
68
- "coef": coef,
69
- "std_err": model.bse[i],
70
- "t_value": model.tvalues[i],
71
- "p_value": model.pvalues[i],
72
- "ci_lower": conf_int[i][0],
73
- "ci_upper": conf_int[i][1]
74
- }
75
-
76
- return result
77
-
78
-
79
- def calculate_vif(X: List[List[float]], feature_names: Optional[List[str]] = None) -> Dict[str, float]:
80
- """计算方差膨胀因子(VIF)"""
81
- X_matrix = np.array(X)
82
-
83
- # 添加常数项用于VIF计算
84
- X_with_const = sm.add_constant(X_matrix)
85
-
86
- if feature_names is None:
87
- feature_names = [f"x{i}" for i in range(X_matrix.shape[1])]
88
-
89
- # 计算每个变量的VIF
90
- vif_values = {}
91
-
92
- for i in range(1, X_with_const.shape[1]): # 跳过常数项
93
- var_name = feature_names[i-1] if i-1 < len(feature_names) else f"x{i-1}"
94
-
95
- # 将当前变量作为因变量,其他作为自变量
96
- y_temp = X_with_const[:, i]
97
- X_temp = np.delete(X_with_const, i, axis=1)
98
-
99
- # 拟合辅助回归
100
- aux_model = sm.OLS(y_temp, X_temp).fit()
101
- r_squared = aux_model.rsquared
102
-
103
- # 计算VIF
104
- if r_squared < 1:
105
- vif = 1 / (1 - r_squared)
106
- else:
107
- vif = float('inf')
108
-
109
- vif_values[var_name] = vif
110
-
111
- return vif_values
112
-
113
-
114
- def run_diagnostic_tests(
115
- y: List[float],
116
- X: List[List[float]],
117
- residuals: Optional[List[float]] = None
118
- ) -> DiagnosticTests:
119
- """运行模型诊断检验"""
120
- X_matrix = np.array(X)
121
- y_vector = np.array(y)
122
-
123
- # 拟合模型获取残差
124
- if residuals is None:
125
- X_with_const = sm.add_constant(X_matrix)
126
- model = sm.OLS(y_vector, X_with_const).fit()
127
- residuals = model.resid
128
-
129
- # Jarque-Bera正态性检验
130
- jb_stat, jb_p_value, _, _ = sm.stats.stattools.jarque_bera(residuals)
131
-
132
- # Breusch-Pagan异方差检验
133
- X_with_const = sm.add_constant(X_matrix)
134
- bp_stat, bp_p_value, _, _ = sm.stats.diagnostic.het_breuschpagan(residuals, X_with_const)
135
-
136
- # Durbin-Watson序列相关检验
137
- dw_stat = sm.stats.stattools.durbin_watson(residuals)
138
-
139
- # 计算VIF
140
- vif_values = calculate_vif(X_matrix)
141
-
142
- return DiagnosticTests(
143
- jb_statistic=jb_stat,
144
- jb_pvalue=jb_p_value,
145
- bp_statistic=bp_stat,
146
- bp_pvalue=bp_p_value,
147
- dw_statistic=dw_stat,
148
- vif=vif_values
149
- )
150
-
151
-
152
- def stepwise_regression(
153
- y: List[float],
154
- X: List[List[float]],
155
- feature_names: List[str],
156
- direction: str = "both",
157
- alpha_in: float = 0.05,
158
- alpha_out: float = 0.10
159
- ) -> Dict[str, Any]:
160
- """逐步回归(简化版本)"""
161
- X_matrix = np.array(X)
162
- y_vector = np.array(y)
163
-
164
- # 为了简化,这里返回所有变量的模型
165
- # 实际的逐步回归需要更复杂的实现
166
- X_with_const = sm.add_constant(X_matrix)
167
- final_model = sm.OLS(y_vector, X_with_const).fit()
168
-
169
- # 找出显著的变量(p值 < alpha_in)
170
- significant_features = []
171
- significant_indices = []
172
-
173
- for i, p_val in enumerate(final_model.pvalues[1:], 1): # 跳过常数项
174
- if p_val < alpha_in:
175
- significant_features.append(feature_names[i-1])
176
- significant_indices.append(i)
177
-
178
- # 如果有显著变量,返回只包含显著变量的模型
179
- if significant_indices:
180
- X_significant = sm.add_constant(X_matrix[:, [i-1 for i in significant_indices]])
181
- significant_model = sm.OLS(y_vector, X_significant).fit()
182
-
183
- return {
184
- "selected_features": significant_features,
185
- "model_summary": {
186
- "rsquared": significant_model.rsquared,
187
- "rsquared_adj": significant_model.rsquared_adj,
188
- "aic": significant_model.aic,
189
- "bic": significant_model.bic,
190
- "f_statistic": significant_model.fvalue,
191
- "f_pvalue": significant_model.f_pvalue
192
- },
193
- "coefficients": dict(zip(
194
- ["const"] + significant_features,
195
- zip(significant_model.params, significant_model.pvalues)
196
- ))
197
- }
198
- else:
199
- # 如果没有显著变量,返回全模型
200
- return {
201
- "selected_features": feature_names,
202
- "model_summary": {
203
- "rsquared": final_model.rsquared,
204
- "rsquared_adj": final_model.rsquared_adj,
205
- "aic": final_model.aic,
206
- "bic": final_model.bic,
207
- "f_statistic": final_model.fvalue,
208
- "f_pvalue": final_model.f_pvalue
209
- },
210
- "coefficients": dict(zip(
211
- ["const"] + feature_names,
212
- zip(final_model.params, final_model.pvalues)
213
- ))
214
- }
@@ -1,154 +0,0 @@
1
- """
2
- 统计分析工具
3
- """
4
-
5
- import numpy as np
6
- import pandas as pd
7
- from scipy import stats
8
- from typing import Dict, List, Any
9
- from pydantic import BaseModel
10
- import statsmodels.api as sm
11
-
12
-
13
- class DescriptiveStats(BaseModel):
14
- """描述性统计结果"""
15
- mean: float
16
- median: float
17
- std: float
18
- min: float
19
- max: float
20
- skewness: float
21
- kurtosis: float
22
- count: int
23
-
24
-
25
- class CorrelationResult(BaseModel):
26
- """相关性分析结果"""
27
- correlation_matrix: Dict[str, Dict[str, float]]
28
- method: str
29
-
30
-
31
- def calculate_descriptive_stats(data: Dict[str, List[float]]) -> Dict[str, Dict[str, Any]]:
32
- """计算多变量描述性统计量"""
33
- results = {}
34
- for var_name, var_data in data.items():
35
- # 使用numpy计算统计量,避免pandas问题
36
- arr = np.array(var_data, dtype=float)
37
-
38
- stats_result = DescriptiveStats(
39
- mean=float(np.mean(arr)),
40
- median=float(np.median(arr)),
41
- std=float(np.std(arr)),
42
- min=float(np.min(arr)),
43
- max=float(np.max(arr)),
44
- skewness=float(stats.skew(arr)),
45
- kurtosis=float(stats.kurtosis(arr)),
46
- count=len(arr)
47
- )
48
- # 转换为字典格式
49
- results[var_name] = stats_result.dict()
50
- return results
51
-
52
-
53
- def calculate_correlation_matrix(
54
- data: Dict[str, List[float]],
55
- method: str = "pearson"
56
- ) -> CorrelationResult:
57
- """计算相关系数矩阵"""
58
- df = pd.DataFrame(data)
59
- corr_matrix = df.corr(method=method)
60
-
61
- return CorrelationResult(
62
- correlation_matrix=corr_matrix.to_dict(),
63
- method=method
64
- )
65
-
66
-
67
- def perform_hypothesis_test(
68
- data1: List[float],
69
- data2: List[float] = None,
70
- test_type: str = "t_test",
71
- alpha: float = 0.05
72
- ) -> Dict[str, Any]:
73
- """执行假设检验"""
74
- if test_type == "t_test":
75
- if data2 is None:
76
- # 单样本t检验
77
- t_stat, p_value = stats.ttest_1samp(data1, 0)
78
- test_name = "单样本t检验"
79
- else:
80
- # 双样本t检验
81
- t_stat, p_value = stats.ttest_ind(data1, data2)
82
- test_name = "双样本t检验"
83
-
84
- return {
85
- "test_type": test_name,
86
- "statistic": t_stat,
87
- "p_value": p_value,
88
- "significant": p_value < alpha,
89
- "alpha": alpha
90
- }
91
-
92
- elif test_type == "f_test":
93
- # F检验(方差齐性检验)
94
- if data2 is None:
95
- raise ValueError("F检验需要两组数据")
96
-
97
- f_stat, p_value = stats.f_oneway(data1, data2)
98
- return {
99
- "test_type": "F检验",
100
- "statistic": f_stat,
101
- "p_value": p_value,
102
- "significant": p_value < alpha,
103
- "alpha": alpha
104
- }
105
-
106
- elif test_type == "chi_square":
107
- # 卡方检验
108
- # 这里简化实现,实际需要频数数据
109
- chi2_stat, p_value = stats.chisquare(data1)
110
- return {
111
- "test_type": "卡方检验",
112
- "statistic": chi2_stat,
113
- "p_value": p_value,
114
- "significant": p_value < alpha,
115
- "alpha": alpha
116
- }
117
-
118
- elif test_type == "adf":
119
- # ADF单位根检验
120
- from statsmodels.tsa.stattools import adfuller
121
- adf_result = adfuller(data1)
122
- return {
123
- "test_type": "ADF单位根检验",
124
- "statistic": adf_result[0],
125
- "p_value": adf_result[1],
126
- "critical_values": adf_result[4],
127
- "significant": adf_result[1] < alpha,
128
- "alpha": alpha
129
- }
130
-
131
- else:
132
- raise ValueError(f"不支持的检验类型: {test_type}")
133
-
134
-
135
- def normality_test(data: List[float]) -> Dict[str, Any]:
136
- """正态性检验"""
137
- # Shapiro-Wilk检验
138
- shapiro_stat, shapiro_p = stats.shapiro(data)
139
-
140
- # Kolmogorov-Smirnov检验
141
- ks_stat, ks_p = stats.kstest(data, 'norm', args=(np.mean(data), np.std(data)))
142
-
143
- return {
144
- "shapiro_wilk": {
145
- "statistic": shapiro_stat,
146
- "p_value": shapiro_p,
147
- "normal": shapiro_p > 0.05
148
- },
149
- "kolmogorov_smirnov": {
150
- "statistic": ks_stat,
151
- "p_value": ks_p,
152
- "normal": ks_p > 0.05
153
- }
154
- }