aigroup-econ-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aigroup-econ-mcp might be problematic. Click here for more details.

@@ -0,0 +1,214 @@
1
+ """
2
+ 回归分析工具
3
+ """
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import statsmodels.api as sm
8
+ from typing import List, Dict, Any, Optional
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class OLSResult(BaseModel):
13
+ """OLS回归结果"""
14
+ coefficients: Dict[str, Dict[str, float]]
15
+ rsquared: float
16
+ rsquared_adj: float
17
+ f_statistic: float
18
+ f_pvalue: float
19
+ aic: float
20
+ bic: float
21
+ n_obs: int
22
+
23
+
24
+ class DiagnosticTests(BaseModel):
25
+ """模型诊断结果"""
26
+ jb_statistic: float
27
+ jb_pvalue: float
28
+ bp_statistic: float
29
+ bp_pvalue: float
30
+ dw_statistic: float
31
+ vif: Dict[str, float]
32
+
33
+
34
+ def perform_ols_regression(
35
+ y: List[float],
36
+ X: List[List[float]],
37
+ feature_names: Optional[List[str]] = None,
38
+ add_constant: bool = True
39
+ ) -> OLSResult:
40
+ """执行OLS回归分析"""
41
+ # 准备数据
42
+ X_matrix = np.array(X)
43
+ y_vector = np.array(y)
44
+
45
+ if add_constant:
46
+ X_matrix = sm.add_constant(X_matrix)
47
+
48
+ # 拟合模型
49
+ model = sm.OLS(y_vector, X_matrix).fit()
50
+
51
+ # 构建结果
52
+ result = OLSResult(
53
+ coefficients={},
54
+ rsquared=model.rsquared,
55
+ rsquared_adj=model.rsquared_adj,
56
+ f_statistic=model.fvalue,
57
+ f_pvalue=model.f_pvalue,
58
+ aic=model.aic,
59
+ bic=model.bic,
60
+ n_obs=model.nobs
61
+ )
62
+
63
+ # 添加系数详情
64
+ conf_int = model.conf_int()
65
+ for i, coef in enumerate(model.params):
66
+ var_name = "const" if i == 0 and add_constant else feature_names[i-1] if feature_names else f"x{i}"
67
+ result.coefficients[var_name] = {
68
+ "coef": coef,
69
+ "std_err": model.bse[i],
70
+ "t_value": model.tvalues[i],
71
+ "p_value": model.pvalues[i],
72
+ "ci_lower": conf_int[i][0],
73
+ "ci_upper": conf_int[i][1]
74
+ }
75
+
76
+ return result
77
+
78
+
79
+ def calculate_vif(X: List[List[float]], feature_names: Optional[List[str]] = None) -> Dict[str, float]:
80
+ """计算方差膨胀因子(VIF)"""
81
+ X_matrix = np.array(X)
82
+
83
+ # 添加常数项用于VIF计算
84
+ X_with_const = sm.add_constant(X_matrix)
85
+
86
+ if feature_names is None:
87
+ feature_names = [f"x{i}" for i in range(X_matrix.shape[1])]
88
+
89
+ # 计算每个变量的VIF
90
+ vif_values = {}
91
+
92
+ for i in range(1, X_with_const.shape[1]): # 跳过常数项
93
+ var_name = feature_names[i-1] if i-1 < len(feature_names) else f"x{i-1}"
94
+
95
+ # 将当前变量作为因变量,其他作为自变量
96
+ y_temp = X_with_const[:, i]
97
+ X_temp = np.delete(X_with_const, i, axis=1)
98
+
99
+ # 拟合辅助回归
100
+ aux_model = sm.OLS(y_temp, X_temp).fit()
101
+ r_squared = aux_model.rsquared
102
+
103
+ # 计算VIF
104
+ if r_squared < 1:
105
+ vif = 1 / (1 - r_squared)
106
+ else:
107
+ vif = float('inf')
108
+
109
+ vif_values[var_name] = vif
110
+
111
+ return vif_values
112
+
113
+
114
+ def run_diagnostic_tests(
115
+ y: List[float],
116
+ X: List[List[float]],
117
+ residuals: Optional[List[float]] = None
118
+ ) -> DiagnosticTests:
119
+ """运行模型诊断检验"""
120
+ X_matrix = np.array(X)
121
+ y_vector = np.array(y)
122
+
123
+ # 拟合模型获取残差
124
+ if residuals is None:
125
+ X_with_const = sm.add_constant(X_matrix)
126
+ model = sm.OLS(y_vector, X_with_const).fit()
127
+ residuals = model.resid
128
+
129
+ # Jarque-Bera正态性检验
130
+ jb_stat, jb_p_value, _, _ = sm.stats.stattools.jarque_bera(residuals)
131
+
132
+ # Breusch-Pagan异方差检验
133
+ X_with_const = sm.add_constant(X_matrix)
134
+ bp_stat, bp_p_value, _, _ = sm.stats.diagnostic.het_breuschpagan(residuals, X_with_const)
135
+
136
+ # Durbin-Watson序列相关检验
137
+ dw_stat = sm.stats.stattools.durbin_watson(residuals)
138
+
139
+ # 计算VIF
140
+ vif_values = calculate_vif(X_matrix)
141
+
142
+ return DiagnosticTests(
143
+ jb_statistic=jb_stat,
144
+ jb_pvalue=jb_p_value,
145
+ bp_statistic=bp_stat,
146
+ bp_pvalue=bp_p_value,
147
+ dw_statistic=dw_stat,
148
+ vif=vif_values
149
+ )
150
+
151
+
152
+ def stepwise_regression(
153
+ y: List[float],
154
+ X: List[List[float]],
155
+ feature_names: List[str],
156
+ direction: str = "both",
157
+ alpha_in: float = 0.05,
158
+ alpha_out: float = 0.10
159
+ ) -> Dict[str, Any]:
160
+ """逐步回归(简化版本)"""
161
+ X_matrix = np.array(X)
162
+ y_vector = np.array(y)
163
+
164
+ # 为了简化,这里返回所有变量的模型
165
+ # 实际的逐步回归需要更复杂的实现
166
+ X_with_const = sm.add_constant(X_matrix)
167
+ final_model = sm.OLS(y_vector, X_with_const).fit()
168
+
169
+ # 找出显著的变量(p值 < alpha_in)
170
+ significant_features = []
171
+ significant_indices = []
172
+
173
+ for i, p_val in enumerate(final_model.pvalues[1:], 1): # 跳过常数项
174
+ if p_val < alpha_in:
175
+ significant_features.append(feature_names[i-1])
176
+ significant_indices.append(i)
177
+
178
+ # 如果有显著变量,返回只包含显著变量的模型
179
+ if significant_indices:
180
+ X_significant = sm.add_constant(X_matrix[:, [i-1 for i in significant_indices]])
181
+ significant_model = sm.OLS(y_vector, X_significant).fit()
182
+
183
+ return {
184
+ "selected_features": significant_features,
185
+ "model_summary": {
186
+ "rsquared": significant_model.rsquared,
187
+ "rsquared_adj": significant_model.rsquared_adj,
188
+ "aic": significant_model.aic,
189
+ "bic": significant_model.bic,
190
+ "f_statistic": significant_model.fvalue,
191
+ "f_pvalue": significant_model.f_pvalue
192
+ },
193
+ "coefficients": dict(zip(
194
+ ["const"] + significant_features,
195
+ zip(significant_model.params, significant_model.pvalues)
196
+ ))
197
+ }
198
+ else:
199
+ # 如果没有显著变量,返回全模型
200
+ return {
201
+ "selected_features": feature_names,
202
+ "model_summary": {
203
+ "rsquared": final_model.rsquared,
204
+ "rsquared_adj": final_model.rsquared_adj,
205
+ "aic": final_model.aic,
206
+ "bic": final_model.bic,
207
+ "f_statistic": final_model.fvalue,
208
+ "f_pvalue": final_model.f_pvalue
209
+ },
210
+ "coefficients": dict(zip(
211
+ ["const"] + feature_names,
212
+ zip(final_model.params, final_model.pvalues)
213
+ ))
214
+ }
@@ -0,0 +1,134 @@
1
+ """
2
+ 统计分析工具
3
+ """
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from scipy import stats
8
+ from typing import Dict, List, Any
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class DescriptiveStats(BaseModel):
13
+ """描述性统计结果"""
14
+ mean: float
15
+ median: float
16
+ std: float
17
+ min: float
18
+ max: float
19
+ skewness: float
20
+ kurtosis: float
21
+ count: int
22
+
23
+
24
+ class CorrelationResult(BaseModel):
25
+ """相关性分析结果"""
26
+ correlation_matrix: Dict[str, Dict[str, float]]
27
+ method: str
28
+
29
+
30
+ def calculate_descriptive_stats(data: List[float]) -> DescriptiveStats:
31
+ """计算描述性统计量"""
32
+ series = pd.Series(data)
33
+
34
+ return DescriptiveStats(
35
+ mean=series.mean(),
36
+ median=series.median(),
37
+ std=series.std(),
38
+ min=series.min(),
39
+ max=series.max(),
40
+ skewness=series.skew(),
41
+ kurtosis=series.kurtosis(),
42
+ count=len(series)
43
+ )
44
+
45
+
46
+ def calculate_correlation_matrix(
47
+ data: Dict[str, List[float]],
48
+ method: str = "pearson"
49
+ ) -> CorrelationResult:
50
+ """计算相关系数矩阵"""
51
+ df = pd.DataFrame(data)
52
+ corr_matrix = df.corr(method=method)
53
+
54
+ return CorrelationResult(
55
+ correlation_matrix=corr_matrix.to_dict(),
56
+ method=method
57
+ )
58
+
59
+
60
+ def perform_hypothesis_test(
61
+ data1: List[float],
62
+ data2: List[float] = None,
63
+ test_type: str = "t_test",
64
+ alpha: float = 0.05
65
+ ) -> Dict[str, Any]:
66
+ """执行假设检验"""
67
+ if test_type == "t_test":
68
+ if data2 is None:
69
+ # 单样本t检验
70
+ t_stat, p_value = stats.ttest_1samp(data1, 0)
71
+ test_name = "单样本t检验"
72
+ else:
73
+ # 双样本t检验
74
+ t_stat, p_value = stats.ttest_ind(data1, data2)
75
+ test_name = "双样本t检验"
76
+
77
+ return {
78
+ "test_type": test_name,
79
+ "statistic": t_stat,
80
+ "p_value": p_value,
81
+ "significant": p_value < alpha,
82
+ "alpha": alpha
83
+ }
84
+
85
+ elif test_type == "f_test":
86
+ # F检验(方差齐性检验)
87
+ if data2 is None:
88
+ raise ValueError("F检验需要两组数据")
89
+
90
+ f_stat, p_value = stats.f_oneway(data1, data2)
91
+ return {
92
+ "test_type": "F检验",
93
+ "statistic": f_stat,
94
+ "p_value": p_value,
95
+ "significant": p_value < alpha,
96
+ "alpha": alpha
97
+ }
98
+
99
+ elif test_type == "chi_square":
100
+ # 卡方检验
101
+ # 这里简化实现,实际需要频数数据
102
+ chi2_stat, p_value = stats.chisquare(data1)
103
+ return {
104
+ "test_type": "卡方检验",
105
+ "statistic": chi2_stat,
106
+ "p_value": p_value,
107
+ "significant": p_value < alpha,
108
+ "alpha": alpha
109
+ }
110
+
111
+ else:
112
+ raise ValueError(f"不支持的检验类型: {test_type}")
113
+
114
+
115
+ def normality_test(data: List[float]) -> Dict[str, Any]:
116
+ """正态性检验"""
117
+ # Shapiro-Wilk检验
118
+ shapiro_stat, shapiro_p = stats.shapiro(data)
119
+
120
+ # Kolmogorov-Smirnov检验
121
+ ks_stat, ks_p = stats.kstest(data, 'norm', args=(np.mean(data), np.std(data)))
122
+
123
+ return {
124
+ "shapiro_wilk": {
125
+ "statistic": shapiro_stat,
126
+ "p_value": shapiro_p,
127
+ "normal": shapiro_p > 0.05
128
+ },
129
+ "kolmogorov_smirnov": {
130
+ "statistic": ks_stat,
131
+ "p_value": ks_p,
132
+ "normal": ks_p > 0.05
133
+ }
134
+ }
@@ -0,0 +1,260 @@
1
+ """
2
+ 时间序列分析工具
3
+ """
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ from typing import List, Dict, Any, Optional, Tuple
8
+ from pydantic import BaseModel
9
+ import statsmodels.api as sm
10
+ from statsmodels.tsa.stattools import adfuller, kpss, acf, pacf
11
+ from statsmodels.tsa.arima.model import ARIMA
12
+ from statsmodels.tsa.statespace.sarimax import SARIMAX
13
+
14
+
15
+ class StationarityTest(BaseModel):
16
+ """平稳性检验结果"""
17
+ adf_statistic: float
18
+ adf_pvalue: float
19
+ adf_critical_values: Dict[str, float]
20
+ kpss_statistic: float
21
+ kpss_pvalue: float
22
+ is_stationary: bool
23
+
24
+
25
+ class ACFPACFResult(BaseModel):
26
+ """自相关分析结果"""
27
+ acf_values: List[float]
28
+ pacf_values: List[float]
29
+ acf_confidence: List[Tuple[float, float]]
30
+ pacf_confidence: List[Tuple[float, float]]
31
+
32
+
33
+ class ARIMAResult(BaseModel):
34
+ """ARIMA模型结果"""
35
+ order: Tuple[int, int, int]
36
+ aic: float
37
+ bic: float
38
+ coefficients: Dict[str, float]
39
+ fitted_values: List[float]
40
+ residuals: List[float]
41
+ forecast: Optional[List[float]] = None
42
+
43
+
44
+ def check_stationarity(data: List[float], max_lags: int = None) -> StationarityTest:
45
+ """平稳性检验(ADF和KPSS)"""
46
+ series = pd.Series(data)
47
+
48
+ # ADF检验
49
+ adf_result = adfuller(series, maxlag=max_lags, autolag='AIC')
50
+ adf_stat, adf_pvalue = adf_result[0], adf_result[1]
51
+ adf_critical = adf_result[4]
52
+
53
+ # KPSS检验
54
+ kpss_result = kpss(series, regression='c', nlags='auto')
55
+ kpss_stat, kpss_pvalue = kpss_result[0], kpss_result[1]
56
+
57
+ # 综合判断平稳性
58
+ is_stationary = (adf_pvalue < 0.05) and (kpss_pvalue > 0.05)
59
+
60
+ return StationarityTest(
61
+ adf_statistic=adf_stat,
62
+ adf_pvalue=adf_pvalue,
63
+ adf_critical_values=adf_critical,
64
+ kpss_statistic=kpss_stat,
65
+ kpss_pvalue=kpss_pvalue,
66
+ is_stationary=is_stationary
67
+ )
68
+
69
+
70
+ def calculate_acf_pacf(
71
+ data: List[float],
72
+ nlags: int = 20,
73
+ alpha: float = 0.05
74
+ ) -> ACFPACFResult:
75
+ """计算自相关和偏自相关函数"""
76
+ series = pd.Series(data)
77
+
78
+ # 计算ACF和PACF
79
+ acf_values = acf(series, nlags=nlags, alpha=alpha)
80
+ pacf_values = pacf(series, nlags=nlags, alpha=alpha)
81
+
82
+ # 构建置信区间
83
+ acf_conf = []
84
+ pacf_conf = []
85
+
86
+ for i in range(len(acf_values[1])):
87
+ acf_conf.append((acf_values[1][i][0], acf_values[1][i][1]))
88
+ pacf_conf.append((pacf_values[1][i][0], pacf_values[1][i][1]))
89
+
90
+ return ACFPACFResult(
91
+ acf_values=acf_values[0].tolist(),
92
+ pacf_values=pacf_values[0].tolist(),
93
+ acf_confidence=acf_conf,
94
+ pacf_confidence=pacf_conf
95
+ )
96
+
97
+
98
+ def fit_arima_model(
99
+ data: List[float],
100
+ order: Tuple[int, int, int] = (1, 1, 1),
101
+ seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0)
102
+ ) -> ARIMAResult:
103
+ """拟合ARIMA模型"""
104
+ series = pd.Series(data)
105
+
106
+ try:
107
+ if seasonal_order != (0, 0, 0, 0):
108
+ # 季节性ARIMA
109
+ model = SARIMAX(series, order=order, seasonal_order=seasonal_order)
110
+ else:
111
+ # 普通ARIMA
112
+ model = ARIMA(series, order=order)
113
+
114
+ fitted_model = model.fit()
115
+
116
+ return ARIMAResult(
117
+ order=order,
118
+ aic=fitted_model.aic,
119
+ bic=fitted_model.bic,
120
+ coefficients=fitted_model.params.to_dict(),
121
+ fitted_values=fitted_model.fittedvalues.tolist(),
122
+ residuals=fitted_model.resid.tolist()
123
+ )
124
+
125
+ except Exception as e:
126
+ raise ValueError(f"ARIMA模型拟合失败: {str(e)}")
127
+
128
+
129
+ def find_best_arima_order(
130
+ data: List[float],
131
+ max_p: int = 3,
132
+ max_d: int = 2,
133
+ max_q: int = 3,
134
+ seasonal: bool = False,
135
+ max_P: int = 1,
136
+ max_D: int = 1,
137
+ max_Q: int = 1,
138
+ m: int = 12
139
+ ) -> Dict[str, Any]:
140
+ """自动寻找最佳ARIMA模型阶数"""
141
+ series = pd.Series(data)
142
+ best_aic = float('inf')
143
+ best_order = (0, 0, 0)
144
+ best_seasonal_order = (0, 0, 0, 0)
145
+ best_model = None
146
+
147
+ # 非季节性ARIMA
148
+ if not seasonal:
149
+ for p in range(max_p + 1):
150
+ for d in range(max_d + 1):
151
+ for q in range(max_q + 1):
152
+ try:
153
+ model = ARIMA(series, order=(p, d, q))
154
+ fitted_model = model.fit()
155
+ if fitted_model.aic < best_aic:
156
+ best_aic = fitted_model.aic
157
+ best_order = (p, d, q)
158
+ best_model = fitted_model
159
+ except:
160
+ continue
161
+
162
+ # 季节性ARIMA
163
+ else:
164
+ for p in range(max_p + 1):
165
+ for d in range(max_d + 1):
166
+ for q in range(max_q + 1):
167
+ for P in range(max_P + 1):
168
+ for D in range(max_D + 1):
169
+ for Q in range(max_Q + 1):
170
+ try:
171
+ seasonal_order = (P, D, Q, m)
172
+ model = SARIMAX(series, order=(p, d, q), seasonal_order=seasonal_order)
173
+ fitted_model = model.fit()
174
+ if fitted_model.aic < best_aic:
175
+ best_aic = fitted_model.aic
176
+ best_order = (p, d, q)
177
+ best_seasonal_order = seasonal_order
178
+ best_model = fitted_model
179
+ except:
180
+ continue
181
+
182
+ if best_model is None:
183
+ raise ValueError("无法找到合适的ARIMA模型")
184
+
185
+ return {
186
+ "best_order": best_order,
187
+ "best_seasonal_order": best_seasonal_order if seasonal else None,
188
+ "best_aic": best_aic,
189
+ "best_bic": best_model.bic,
190
+ "coefficients": best_model.params.to_dict(),
191
+ "model_summary": str(best_model.summary())
192
+ }
193
+
194
+
195
+ def decompose_time_series(
196
+ data: List[float],
197
+ model: str = "additive",
198
+ period: Optional[int] = None
199
+ ) -> Dict[str, List[float]]:
200
+ """时间序列分解"""
201
+ series = pd.Series(data)
202
+
203
+ if period is None:
204
+ # 自动检测周期(简单方法)
205
+ from statsmodels.tsa.seasonal import seasonal_decompose
206
+ decomposition = seasonal_decompose(series, model=model, extrapolate_trend='freq')
207
+
208
+ return {
209
+ "trend": decomposition.trend.fillna(0).tolist(),
210
+ "seasonal": decomposition.seasonal.fillna(0).tolist(),
211
+ "residual": decomposition.resid.fillna(0).tolist(),
212
+ "observed": decomposition.observed.tolist()
213
+ }
214
+ else:
215
+ # 指定周期的分解
216
+ decomposition = seasonal_decompose(series, model=model, period=period)
217
+
218
+ return {
219
+ "trend": decomposition.trend.fillna(0).tolist(),
220
+ "seasonal": decomposition.seasonal.fillna(0).tolist(),
221
+ "residual": decomposition.resid.fillna(0).tolist(),
222
+ "observed": decomposition.observed.tolist()
223
+ }
224
+
225
+
226
+ def forecast_arima(
227
+ data: List[float],
228
+ order: Tuple[int, int, int] = (1, 1, 1),
229
+ steps: int = 10,
230
+ seasonal_order: Tuple[int, int, int, int] = (0, 0, 0, 0)
231
+ ) -> Dict[str, Any]:
232
+ """ARIMA模型预测"""
233
+ series = pd.Series(data)
234
+
235
+ try:
236
+ if seasonal_order != (0, 0, 0, 0):
237
+ model = SARIMAX(series, order=order, seasonal_order=seasonal_order)
238
+ else:
239
+ model = ARIMA(series, order=order)
240
+
241
+ fitted_model = model.fit()
242
+
243
+ # 生成预测
244
+ forecast_result = fitted_model.forecast(steps=steps)
245
+ forecast_values = forecast_result.tolist()
246
+
247
+ # 预测置信区间
248
+ pred_conf = fitted_model.get_forecast(steps=steps)
249
+ conf_int = pred_conf.conf_int()
250
+
251
+ return {
252
+ "forecast": forecast_values,
253
+ "conf_int_lower": conf_int.iloc[:, 0].tolist(),
254
+ "conf_int_upper": conf_int.iloc[:, 1].tolist(),
255
+ "model_aic": fitted_model.aic,
256
+ "model_bic": fitted_model.bic
257
+ }
258
+
259
+ except Exception as e:
260
+ raise ValueError(f"ARIMA预测失败: {str(e)}")