PyPI - aigroup-econ-mcp - Versions diffs - 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

aigroup-econ-mcp 1.3.3py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py ADDED Viewed

@@ -0,0 +1,127 @@
+"""
+加权最小二乘法 (Weighted Least Squares, WLS) 模型实现
+"""
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+from pydantic import BaseModel, Field
+import numpy as np
+import pandas as pd
+from scipy import stats
+import statsmodels.api as sm
+from tools.decorators import with_file_support_decorator as econometric_tool, validate_input
+class WLSResult(BaseModel):
+    """WLS回归结果"""
+    coefficients: List[float] = Field(..., description="回归系数")
+    std_errors: List[float] = Field(..., description="系数标准误")
+    t_values: List[float] = Field(..., description="t统计量")
+    p_values: List[float] = Field(..., description="p值")
+    conf_int_lower: List[float] = Field(..., description="置信区间下界")
+    conf_int_upper: List[float] = Field(..., description="置信区间上界")
+    r_squared: float = Field(..., description="R方")
+    adj_r_squared: float = Field(..., description="调整R方")
+    f_statistic: float = Field(..., description="F统计量")
+    f_p_value: float = Field(..., description="F统计量p值")
+    n_obs: int = Field(..., description="观测数量")
+    feature_names: List[str] = Field(..., description="特征名称")
+    weights: List[float] = Field(..., description="使用的权重")
+@econometric_tool("wls_regression")
+@validate_input(data_type="econometric")
+def wls_regression(
+    y_data: List[float],
+    x_data: List[List[float]],
+    weights: List[float],
+    feature_names: Optional[List[str]] = None,
+    constant: bool = True,
+    confidence_level: float = 0.95
+) -> WLSResult:
+    """
+    加权最小二乘法回归
+    Args:
+        y_data: 因变量数据
+        x_data: 自变量数据
+        weights: 权重列表（与观测值一一对应）
+        feature_names: 特征名称
+        constant: 是否包含常数项
+        confidence_level: 置信水平
+    Returns:
+        WLSResult: WLS回归结果
+    """
+    # 转换为numpy数组
+    y = np.asarray(y_data, dtype=np.float64)
+    X = np.asarray(x_data, dtype=np.float64)
+    w = np.asarray(weights, dtype=np.float64)
+    # 检查数据维度
+    if len(w) != len(y):
+        raise ValueError("权重数量必须与观测值数量相同")
+    # 检查权重是否为正数
+    if np.any(w <= 0):
+        raise ValueError("所有权重必须为正数")
+    # 添加常数项
+    if constant:
+        X = sm.add_constant(X)
+        if feature_names:
+            feature_names = ["const"] + feature_names
+        else:
+            feature_names = [f"x{i}" for i in range(X.shape[1])]
+    else:
+        if not feature_names:
+            feature_names = [f"x{i}" for i in range(X.shape[1])]
+    # 检查数据维度
+    n, k = X.shape
+    if n <= k:
+        raise ValueError(f"观测数量({n})必须大于变量数量({k})")
+    # 使用statsmodels执行WLS回归
+    try:
+        model = sm.WLS(y, X, weights=w)
+        results = model.fit()
+    except Exception as e:
+        raise ValueError(f"无法拟合WLS模型: {str(e)}")
+    # 提取结果
+    coefficients = results.params.tolist()
+    std_errors = results.bse.tolist()
+    t_values = results.tvalues.tolist()
+    p_values = results.pvalues.tolist()
+    # 计算置信区间
+    alpha = 1 - confidence_level
+    conf_int = results.conf_int(alpha=alpha)
+    conf_int_lower = conf_int[:, 0].tolist()
+    conf_int_upper = conf_int[:, 1].tolist()
+    # 其他统计量
+    r_squared = float(results.rsquared)
+    adj_r_squared = float(results.rsquared_adj)
+    # F统计量
+    f_statistic = float(results.fvalue) if not np.isnan(results.fvalue) else 0.0
+    f_p_value = float(results.f_pvalue) if not np.isnan(results.f_pvalue) else 1.0
+    return WLSResult(
+        coefficients=coefficients,
+        std_errors=std_errors,
+        t_values=t_values,
+        p_values=p_values,
+        conf_int_lower=conf_int_lower,
+        conf_int_upper=conf_int_upper,
+        r_squared=r_squared,
+        adj_r_squared=adj_r_squared,
+        f_statistic=f_statistic,
+        f_p_value=f_p_value,
+        n_obs=int(results.nobs),
+        feature_names=feature_names,
+        weights=weights
+    )

econometrics/nonparametric/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+非参数与半参数方法模块
+放宽函数形式的线性或参数化假设
+"""
+from .kernel_regression import (
+    kernel_regression,
+    KernelRegressionResult
+)
+from .quantile_regression import (
+    quantile_regression,
+    QuantileRegressionResult
+)
+from .spline_regression import (
+    spline_regression,
+    SplineRegressionResult
+)
+from .gam_model import (
+    gam_model,
+    GAMResult
+)
+__all__ = [
+    'kernel_regression',
+    'KernelRegressionResult',
+    'quantile_regression',
+    'QuantileRegressionResult',
+    'spline_regression',
+    'SplineRegressionResult',
+    'gam_model',
+    'GAMResult'
+]

econometrics/nonparametric/gam_model.py ADDED Viewed

@@ -0,0 +1,117 @@
+"""
+广义可加模型 (Generalized Additive Model - GAM)
+基于 pygam 库实现
+"""
+from typing import List, Optional
+from pydantic import BaseModel, Field
+import numpy as np
+try:
+    from pygam import LinearGAM, LogisticGAM, s, f
+    PYGAM_AVAILABLE = True
+except ImportError:
+    PYGAM_AVAILABLE = False
+    LinearGAM = None
+class GAMResult(BaseModel):
+    """GAM模型结果"""
+    fitted_values: List[float] = Field(..., description="拟合值")
+    residuals: List[float] = Field(..., description="残差")
+    deviance: float = Field(..., description="偏差")
+    aic: float = Field(..., description="AIC信息准则")
+    aicc: float = Field(..., description="AICc信息准则")
+    r_squared: float = Field(..., description="伪R²")
+    n_splines: List[int] = Field(..., description="每个特征的样条数")
+    problem_type: str = Field(..., description="问题类型")
+    n_observations: int = Field(..., description="观测数量")
+    summary: str = Field(..., description="摘要信息")
+def gam_model(
+    y_data: List[float],
+    x_data: List[List[float]],
+    problem_type: str = "regression",
+    n_splines: int = 10,
+    lam: float = 0.6
+) -> GAMResult:
+    """
+    广义可加模型
+    Args:
+        y_data: 因变量
+        x_data: 自变量（二维列表）
+        problem_type: 问题类型 - "regression"(回归) 或 "classification"(分类)
+        n_splines: 每个特征的样条数
+        lam: 平滑参数（lambda）
+    Returns:
+        GAMResult: GAM模型结果
+    """
+    if not PYGAM_AVAILABLE:
+        raise ImportError("pygam库未安装。请运行: pip install pygam")
+    # 数据准备
+    y = np.array(y_data, dtype=np.float64)
+    X = np.array(x_data, dtype=np.float64)
+    if X.ndim == 1:
+        X = X.reshape(-1, 1)
+    n, k = X.shape
+    # 创建GAM模型
+    if problem_type == "regression":
+        gam = LinearGAM(s(0, n_splines=n_splines, lam=lam))
+        for i in range(1, k):
+            gam = LinearGAM(s(i, n_splines=n_splines, lam=lam))
+    elif problem_type == "classification":
+        gam = LogisticGAM(s(0, n_splines=n_splines, lam=lam))
+    else:
+        raise ValueError(f"不支持的问题类型: {problem_type}")
+    # 拟合模型
+    gam.fit(X, y)
+    # 拟合值
+    y_pred = gam.predict(X)
+    # 残差
+    residuals = y - y_pred
+    # 模型统计量
+    deviance = float(gam.statistics_['deviance'])
+    aic = float(gam.statistics_['AIC'])
+    aicc = float(gam.statistics_['AICc'])
+    # 伪R²
+    r_squared = float(gam.statistics_['pseudo_r2']['explained_deviance'])
+    # 样条数信息
+    n_splines_list = [n_splines] * k
+    summary = f"""广义可加模型 (GAM):
+- 观测数量: {n}
+- 特征数量: {k}
+- 问题类型: {problem_type}
+- 样条数: {n_splines}
+- 平滑参数: {lam}
+- 偏差: {deviance:.4f}
+- AIC: {aic:.2f}
+- AICc: {aicc:.2f}
+- 伪R²: {r_squared:.4f}
+"""
+    return GAMResult(
+        fitted_values=y_pred.tolist(),
+        residuals=residuals.tolist(),
+        deviance=deviance,
+        aic=aic,
+        aicc=aicc,
+        r_squared=r_squared,
+        n_splines=n_splines_list,
+        problem_type=problem_type,
+        n_observations=n,
+        summary=summary
+    )

econometrics/nonparametric/kernel_regression.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+核回归 (Kernel Regression)
+基于 statsmodels.nonparametric 库实现
+"""
+from typing import List, Optional, Tuple
+from pydantic import BaseModel, Field
+import numpy as np
+try:
+    from statsmodels.nonparametric.kernel_regression import KernelReg
+    STATSMODELS_AVAILABLE = True
+except ImportError:
+    STATSMODELS_AVAILABLE = False
+    KernelReg = None
+class KernelRegressionResult(BaseModel):
+    """核回归结果"""
+    fitted_values: List[float] = Field(..., description="拟合值")
+    residuals: List[float] = Field(..., description="残差")
+    bandwidth: List[float] = Field(..., description="带宽参数")
+    kernel_type: str = Field(..., description="核函数类型")
+    n_observations: int = Field(..., description="观测数量")
+    n_predictors: int = Field(..., description="预测变量数量")
+    r_squared: float = Field(..., description="R²统计量")
+    aic: Optional[float] = Field(None, description="AIC信息准则")
+    summary: str = Field(..., description="摘要信息")
+def kernel_regression(
+    y_data: List[float],
+    x_data: List[List[float]],
+    kernel_type: str = "gaussian",
+    bandwidth: Optional[List[float]] = None,
+    bandwidth_method: str = "cv_ls",
+    variable_type: Optional[str] = None
+) -> KernelRegressionResult:
+    """
+    核回归估计
+    Args:
+        y_data: 因变量
+        x_data: 自变量（二维列表）
+        kernel_type: 核函数类型 - "gaussian"(高斯), "epanechnikov"(Epanechnikov核),
+                     "uniform"(均匀核), "triangular"(三角核), "biweight"(双权核)
+        bandwidth: 带宽参数（每个变量一个），如果为None则自动选择
+        bandwidth_method: 带宽选择方法 - "cv_ls"(交叉验证最小二乘),
+                          "aic"(AIC准则), "normal_reference"(正态参考)
+        variable_type: 变量类型 - None(全部连续), "c"(连续), "u"(无序分类), "o"(有序分类)
+                       可以是字符串（如 "cco"表示3个变量：连续、连续、有序）
+    Returns:
+        KernelRegressionResult: 核回归结果
+    Raises:
+        ImportError: statsmodels库未安装
+        ValueError: 输入数据无效
+    """
+    if not STATSMODELS_AVAILABLE:
+        raise ImportError(
+            "statsmodels库未安装。请运行: pip install statsmodels"
+        )
+    # 输入验证
+    if not y_data or not x_data:
+        raise ValueError("y_data和x_data不能为空")
+    # 数据准备
+    y = np.array(y_data, dtype=np.float64)
+    X = np.array(x_data, dtype=np.float64)
+    # 确保X是二维数组
+    if X.ndim == 1:
+        X = X.reshape(-1, 1)
+    n = len(y)
+    k = X.shape[1]
+    # 数据验证
+    if len(y) != X.shape[0]:
+        raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
+    # 变量类型设置
+    if variable_type is None:
+        var_type = 'c' * k  # 默认全部为连续变量
+    else:
+        var_type = variable_type
+        if len(var_type) != k:
+            raise ValueError(f"variable_type长度({len(var_type)})与自变量数量({k})不一致")
+    # 构建核回归模型
+    try:
+        if bandwidth is None:
+            # 自动选择带宽
+            kr = KernelReg(
+                endog=y,
+                exog=X,
+                var_type=var_type,
+                reg_type='ll',  # 局部线性回归
+                bw=bandwidth_method
+            )
+        else:
+            # 使用指定带宽
+            if len(bandwidth) != k:
+                raise ValueError(f"bandwidth长度({len(bandwidth)})与自变量数量({k})不一致")
+            kr = KernelReg(
+                endog=y,
+                exog=X,
+                var_type=var_type,
+                reg_type='ll',
+                bw=np.array(bandwidth)
+            )
+    except Exception as e:
+        raise ValueError(f"核回归模型构建失败: {str(e)}")
+    # 拟合值
+    fitted_values, _ = kr.fit(X)
+    fitted_values = fitted_values.flatten()
+    # 残差
+    residuals = y - fitted_values
+    # 带宽
+    bw = kr.bw.tolist() if hasattr(kr.bw, 'tolist') else [float(kr.bw)]
+    # R²
+    ss_res = np.sum(residuals ** 2)
+    ss_tot = np.sum((y - np.mean(y)) ** 2)
+    r_squared = float(1 - ss_res / ss_tot) if ss_tot > 0 else 0.0
+    # AIC（近似计算）
+    try:
+        log_likelihood = -0.5 * n * (np.log(2 * np.pi) + np.log(ss_res / n) + 1)
+        aic = float(2 * k - 2 * log_likelihood)
+    except:
+        aic = None
+    # 生成摘要
+    summary = f"""核回归分析:
+- 观测数量: {n}
+- 预测变量: {k}
+- 核函数: {kernel_type}
+- 带宽: {[f'{b:.4f}' for b in bw]}
+- 带宽方法: {bandwidth_method}
+- R²: {r_squared:.4f}
+"""
+    if aic is not None:
+        summary += f"- AIC: {aic:.2f}\n"
+    return KernelRegressionResult(
+        fitted_values=fitted_values.tolist(),
+        residuals=residuals.tolist(),
+        bandwidth=bw,
+        kernel_type=kernel_type,
+        n_observations=n,
+        n_predictors=k,
+        r_squared=r_squared,
+        aic=aic,
+        summary=summary
+    )

econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py ADDED Viewed

File without changes

econometrics/nonparametric/quantile_regression.py ADDED Viewed

@@ -0,0 +1,249 @@
+"""
+分位数回归 (Quantile Regression)
+基于 statsmodels.regression.quantile_regression 库实现
+"""
+from typing import List, Optional, Dict
+from pydantic import BaseModel, Field
+import numpy as np
+try:
+    import statsmodels.api as sm
+    from statsmodels.regression.quantile_regression import QuantReg
+    STATSMODELS_AVAILABLE = True
+except ImportError:
+    STATSMODELS_AVAILABLE = False
+    QuantReg = None
+class QuantileRegressionResult(BaseModel):
+    """分位数回归结果"""
+    quantile: float = Field(..., description="分位数水平")
+    coefficients: List[float] = Field(..., description="回归系数")
+    std_errors: List[float] = Field(..., description="标准误")
+    t_values: List[float] = Field(..., description="t统计量")
+    p_values: List[float] = Field(..., description="p值")
+    conf_int_lower: List[float] = Field(..., description="置信区间下界")
+    conf_int_upper: List[float] = Field(..., description="置信区间上界")
+    feature_names: List[str] = Field(..., description="特征名称")
+    pseudo_r_squared: float = Field(..., description="伪R²")
+    n_observations: int = Field(..., description="观测数量")
+    summary: str = Field(..., description="摘要信息")
+class MultiQuantileResult(BaseModel):
+    """多分位数回归结果"""
+    quantiles: List[float] = Field(..., description="分位数水平列表")
+    coefficients_by_quantile: Dict[str, List[float]] = Field(..., description="各分位数的系数")
+    feature_names: List[str] = Field(..., description="特征名称")
+    n_observations: int = Field(..., description="观测数量")
+    summary: str = Field(..., description="摘要信息")
+def quantile_regression(
+    y_data: List[float],
+    x_data: List[List[float]],
+    quantile: float = 0.5,
+    feature_names: Optional[List[str]] = None,
+    confidence_level: float = 0.95
+) -> QuantileRegressionResult:
+    """
+    分位数回归
+    Args:
+        y_data: 因变量
+        x_data: 自变量（二维列表）
+        quantile: 分位数水平（0-1之间），默认0.5为中位数回归
+        feature_names: 特征名称
+        confidence_level: 置信水平
+    Returns:
+        QuantileRegressionResult: 分位数回归结果
+    Raises:
+        ImportError: statsmodels库未安装
+        ValueError: 输入数据无效
+    """
+    if not STATSMODELS_AVAILABLE:
+        raise ImportError(
+            "statsmodels库未安装。请运行: pip install statsmodels"
+        )
+    # 输入验证
+    if not y_data or not x_data:
+        raise ValueError("y_data和x_data不能为空")
+    if not 0 < quantile < 1:
+        raise ValueError("quantile必须在0和1之间")
+    # 数据准备
+    y = np.array(y_data, dtype=np.float64)
+    X = np.array(x_data, dtype=np.float64)
+    # 确保X是二维数组
+    if X.ndim == 1:
+        X = X.reshape(-1, 1)
+    n = len(y)
+    k = X.shape[1]
+    # 数据验证
+    if len(y) != X.shape[0]:
+        raise ValueError(f"因变量长度({len(y)})与自变量长度({X.shape[0]})不一致")
+    # 添加常数项
+    X_with_const = sm.add_constant(X)
+    # 特征名称
+    if feature_names is None:
+        feature_names = [f"X{i+1}" for i in range(k)]
+    all_feature_names = ["const"] + feature_names
+    # 构建并拟合分位数回归模型
+    try:
+        model = QuantReg(y, X_with_const)
+        results = model.fit(q=quantile)
+    except Exception as e:
+        raise ValueError(f"分位数回归拟合失败: {str(e)}")
+    # 提取结果
+    coefficients = results.params.tolist()
+    # 标准误（使用稳健标准误）
+    try:
+        # 尝试使用稳健标准误
+        std_errors = results.bse.tolist()
+    except:
+        # 如果失败，使用常规标准误
+        std_errors = [0.0] * len(coefficients)
+    # t统计量和p值
+    try:
+        t_values = results.tvalues.tolist()
+        p_values = results.pvalues.tolist()
+    except:
+        t_values = [0.0] * len(coefficients)
+        p_values = [1.0] * len(coefficients)
+    # 置信区间
+    try:
+        alpha = 1 - confidence_level
+        conf_int = results.conf_int(alpha=alpha)
+        conf_int_lower = conf_int.iloc[:, 0].tolist()
+        conf_int_upper = conf_int.iloc[:, 1].tolist()
+    except:
+        conf_int_lower = [c - 1.96 * se for c, se in zip(coefficients, std_errors)]
+        conf_int_upper = [c + 1.96 * se for c, se in zip(coefficients, std_errors)]
+    # 伪R²
+    try:
+        pseudo_r_squared = float(results.prsquared)
+    except:
+        pseudo_r_squared = 0.0
+    # 生成摘要
+    summary = f"""分位数回归分析:
+- 分位数τ: {quantile}
+- 观测数量: {n}
+- 协变量数: {k}
+- 伪R²: {pseudo_r_squared:.4f}
+系数估计:
+"""
+    for i, (name, coef, se, t, p) in enumerate(zip(
+        all_feature_names, coefficients, std_errors, t_values, p_values
+    )):
+        sig = "***" if p < 0.01 else "**" if p < 0.05 else "*" if p < 0.10 else ""
+        summary += f"  {name}: {coef:.4f} (SE: {se:.4f}, t={t:.2f}, p={p:.4f}){sig}\n"
+    return QuantileRegressionResult(
+        quantile=quantile,
+        coefficients=coefficients,
+        std_errors=std_errors,
+        t_values=t_values,
+        p_values=p_values,
+        conf_int_lower=conf_int_lower,
+        conf_int_upper=conf_int_upper,
+        feature_names=all_feature_names,
+        pseudo_r_squared=pseudo_r_squared,
+        n_observations=n,
+        summary=summary
+    )
+def multi_quantile_regression(
+    y_data: List[float],
+    x_data: List[List[float]],
+    quantiles: List[float] = [0.1, 0.25, 0.5, 0.75, 0.9],
+    feature_names: Optional[List[str]] = None
+) -> MultiQuantileResult:
+    """
+    多分位数回归
+    同时估计多个分位数水平的回归系数
+    Args:
+        y_data: 因变量
+        x_data: 自变量
+        quantiles: 分位数水平列表
+        feature_names: 特征名称
+    Returns:
+        MultiQuantileResult: 多分位数回归结果
+    """
+    if not STATSMODELS_AVAILABLE:
+        raise ImportError("statsmodels库未安装")
+    # 输入验证
+    if not y_data or not x_data:
+        raise ValueError("y_data和x_data不能为空")
+    # 数据准备
+    y = np.array(y_data, dtype=np.float64)
+    X = np.array(x_data, dtype=np.float64)
+    if X.ndim == 1:
+        X = X.reshape(-1, 1)
+    n = len(y)
+    k = X.shape[1]
+    # 添加常数项
+    X_with_const = sm.add_constant(X)
+    # 特征名称
+    if feature_names is None:
+        feature_names = [f"X{i+1}" for i in range(k)]
+    all_feature_names = ["const"] + feature_names
+    # 对每个分位数进行回归
+    coefficients_by_quantile = {}
+    for q in quantiles:
+        try:
+            model = QuantReg(y, X_with_const)
+            results = model.fit(q=q)
+            coefficients_by_quantile[f"τ={q}"] = results.params.tolist()
+        except Exception as e:
+            coefficients_by_quantile[f"τ={q}"] = [np.nan] * (k + 1)
+    # 生成摘要
+    summary = f"""多分位数回归分析:
+- 观测数量: {n}
+- 协变量数: {k}
+- 分位数: {quantiles}
+各分位数的系数估计:
+"""
+    for name_idx, name in enumerate(all_feature_names):
+        summary += f"\n{name}:\n"
+        for q in quantiles:
+            coef = coefficients_by_quantile[f"τ={q}"][name_idx]
+            summary += f"  τ={q}: {coef:.4f}\n"
+    return MultiQuantileResult(
+        quantiles=quantiles,
+        coefficients_by_quantile=coefficients_by_quantile,
+        feature_names=all_feature_names,
+        n_observations=n,
+        summary=summary
+    )

aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

aigroup-econ-mcp 1.3.3py3-none-any.whl → 2.0.1py3-none-any.whl