PyPI - aigroup-econ-mcp - Versions diffs - 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

aigroup-econ-mcp 1.3.3py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

aigroup_econ_mcp/tools/panel_data.py DELETED Viewed

@@ -1,619 +0,0 @@
-"""
-面板数据分析工具
-"""
-import numpy as np
-import pandas as pd
-import statsmodels.api as sm
-from linearmodels import PanelOLS, RandomEffects
-from typing import List, Dict, Any, Optional, Tuple
-from pydantic import BaseModel, Field
-import warnings
-class PanelDataResult(BaseModel):
-    """面板数据模型结果基类"""
-    rsquared: float = Field(description="R²")
-    rsquared_adj: float = Field(description="调整R²")
-    f_statistic: float = Field(description="F统计量")
-    f_pvalue: float = Field(description="F检验p值")
-    aic: float = Field(description="AIC信息准则")
-    bic: float = Field(description="BIC信息准则")
-    n_obs: int = Field(description="观测数量")
-    coefficients: Dict[str, Dict[str, float]] = Field(description="回归系数详情")
-class FixedEffectsResult(PanelDataResult):
-    """固定效应模型结果"""
-    entity_effects: bool = Field(description="是否包含个体效应")
-    time_effects: bool = Field(description="是否包含时间效应")
-    within_rsquared: float = Field(description="组内R²")
-class RandomEffectsResult(PanelDataResult):
-    """随机效应模型结果"""
-    entity_effects: bool = Field(description="是否包含个体效应")
-    time_effects: bool = Field(description="是否包含时间效应")
-    between_rsquared: float = Field(description="组间R²")
-class HausmanTestResult(BaseModel):
-    """Hausman检验结果"""
-    statistic: float = Field(description="检验统计量")
-    p_value: float = Field(description="p值")
-    significant: bool = Field(description="是否显著(5%水平)")
-    recommendation: str = Field(description="模型选择建议")
-class PanelUnitRootResult(BaseModel):
-    """面板单位根检验结果"""
-    statistic: float = Field(description="检验统计量")
-    p_value: float = Field(description="p值")
-    stationary: bool = Field(description="是否平稳")
-    test_type: str = Field(description="检验类型")
-def prepare_panel_data(
-    y_data: List[float],
-    X_data: List[List[float]],
-    entity_ids: List[str],
-    time_periods: List[str],
-    feature_names: Optional[List[str]] = None
-) -> pd.DataFrame:
-    """
-    准备面板数据格式
-    📊 数据格式要求：
-    - 因变量(y_data): 数值列表，如 [1.2, 3.4, 5.6, ...]
-    - 自变量(X_data): 二维数值列表，如 [[1, 2], [3, 4], [5, 6], ...]
-    - 实体ID(entity_ids): 字符串列表，标识不同个体，如 ['A', 'A', 'B', 'B', ...]
-    - 时间标识符(time_periods): 字符串或数值列表，标识时间点，如 ['2020', '2020', '2021', '2021', ...]
-    💡 使用示例：
-    y_data = [10, 12, 8, 9]  # 4个观测值
-    X_data = [[1, 2], [2, 3], [1, 1], [2, 2]]  # 2个自变量，4个观测值
-    entity_ids = ['A', 'A', 'B', 'B']  # 2个实体，每个实体2个时间点
-    time_periods = ['2020', '2021', '2020', '2021']  # 2个时间点
-    ⚠️ 注意事项：
-    - 确保每个实体有相同的时间点数量（平衡面板）
-    - 实体ID和时间标识符的组合必须唯一
-    - 建议至少3个实体，每个实体至少2个时间点
-    Args:
-        y_data: 因变量数据
-        X_data: 自变量数据，二维列表
-        entity_ids: 个体标识符列表
-        time_periods: 时间标识符列表
-        feature_names: 自变量名称列表
-    Returns:
-        pd.DataFrame: 面板数据格式的DataFrame
-    """
-    # 数据验证 - 提供更详细的错误信息
-    if not y_data or not X_data or not entity_ids or not time_periods:
-        raise ValueError("所有输入数据都不能为空。请提供：因变量(y_data)、自变量(X_data)、实体ID(entity_ids)、时间标识符(time_periods)")
-    if len(y_data) != len(X_data):
-        raise ValueError(f"因变量和自变量的观测数量不一致：因变量有{len(y_data)}个观测值，自变量有{len(X_data)}个观测值")
-    if len(y_data) != len(entity_ids):
-        raise ValueError(f"因变量和个体标识符数量不一致：因变量有{len(y_data)}个观测值，实体ID有{len(entity_ids)}个")
-    if len(y_data) != len(time_periods):
-        raise ValueError(f"因变量和时间标识符数量不一致：因变量有{len(y_data)}个观测值，时间标识符有{len(time_periods)}个")
-    # 检查自变量维度一致性
-    if len(X_data) > 0:
-        first_dim = len(X_data[0])
-        for i, x_row in enumerate(X_data):
-            if len(x_row) != first_dim:
-                raise ValueError(f"自变量维度不一致：第{i}行有{len(x_row)}个变量，但第一行有{first_dim}个变量")
-    # 检查面板数据平衡性
-    entity_time_counts = {}
-    for entity, time_period in zip(entity_ids, time_periods):
-        key = (entity, time_period)
-        if key in entity_time_counts:
-            raise ValueError(f"重复的实体-时间组合：实体 '{entity}' 在时间 '{time_period}' 有多个观测值")
-        entity_time_counts[key] = True
-    # 检查每个实体的时间点数量
-    entity_counts = {}
-    for entity in entity_ids:
-        entity_counts[entity] = entity_counts.get(entity, 0) + 1
-    unique_entities = len(entity_counts)
-    if unique_entities < 2:
-        raise ValueError(f"面板数据需要至少2个不同的实体，当前只有{unique_entities}个")
-    # 检查时间点数量
-    time_counts = {}
-    for time_period in time_periods:
-        time_counts[time_period] = time_counts.get(time_period, 0) + 1
-    unique_times = len(time_counts)
-    if unique_times < 2:
-        raise ValueError(f"面板数据需要至少2个不同的时间点，当前只有{unique_times}个")
-    # 检查是否为平衡面板
-    time_counts_per_entity = {}
-    for entity in set(entity_ids):
-        entity_times = [time for e, time in zip(entity_ids, time_periods) if e == entity]
-        time_counts_per_entity[entity] = len(set(entity_times))
-    min_times = min(time_counts_per_entity.values())
-    max_times = max(time_counts_per_entity.values())
-    if min_times != max_times:
-        warnings.warn(f"⚠️ 警告：面板数据不平衡。不同实体的时间点数量不同（最少{min_times}个，最多{max_times}个）。建议使用平衡面板数据以获得更可靠的结果。")
-    # 处理时间标识符格式兼容性
-    processed_time_periods = []
-    for time_period in time_periods:
-        # 尝试将时间标识符转换为可排序的格式
-        if isinstance(time_period, str):
-            # 如果是字符串，尝试转换为数值或保持原样
-            try:
-                # 尝试转换为数值
-                processed_time_periods.append(float(time_period))
-            except ValueError:
-                # 如果无法转换为数值，尝试解析季度格式
-                if 'Q' in time_period:
-                    try:
-                        # 处理季度格式，如 "2020Q1"
-                        year, quarter = time_period.split('Q')
-                        processed_time_periods.append(float(year) + float(quarter) / 10)
-                    except:
-                        # 如果无法解析，保持原样
-                        processed_time_periods.append(time_period)
-                else:
-                    # 如果无法转换为数值，保持原样
-                    processed_time_periods.append(time_period)
-        else:
-            processed_time_periods.append(time_period)
-    # 创建DataFrame
-    data_dict = {
-        'entity': entity_ids,
-        'time': processed_time_periods,
-        'y': y_data
-    }
-    # 添加自变量
-    if feature_names is None:
-        feature_names = [f'x{i}' for i in range(len(X_data[0]))]
-    for i, name in enumerate(feature_names):
-        data_dict[name] = [x[i] for x in X_data]
-    df = pd.DataFrame(data_dict)
-    # 设置多级索引
-    df = df.set_index(['entity', 'time'])
-    return df
-def fixed_effects_model(
-    y_data: List[float],
-    X_data: List[List[float]],
-    entity_ids: List[str],
-    time_periods: List[str],
-    feature_names: Optional[List[str]] = None,
-    entity_effects: bool = True,
-    time_effects: bool = False
-) -> FixedEffectsResult:
-    """
-    固定效应模型
-    📊 功能说明：
-    固定效应模型假设个体间存在不可观测的固定差异，通过组内变换消除这些固定效应。
-    适用于个体特征不随时间变化的情况。
-    📈 模型形式：
-    y_it = α_i + βX_it + ε_it
-    💡 使用场景：
-    - 研究个体内部随时间变化的影响
-    - 控制个体固定特征的影响
-    - 面板数据中个体间存在系统性差异
-    ⚠️ 注意事项：
-    - 无法估计不随时间变化的变量的系数
-    - 需要较大的时间维度以获得可靠估计
-    - 对个体异质性敏感
-    Args:
-        y_data: 因变量数据
-        X_data: 自变量数据
-        entity_ids: 个体标识符
-        time_periods: 时间标识符
-        feature_names: 自变量名称
-        entity_effects: 是否包含个体效应
-        time_effects: 是否包含时间效应
-    Returns:
-        FixedEffectsResult: 固定效应模型结果
-    """
-    try:
-        # 准备面板数据
-        df = prepare_panel_data(y_data, X_data, entity_ids, time_periods, feature_names)
-        # 分离因变量和自变量
-        y = df['y']
-        X = df.drop('y', axis=1)
-        # 添加常数项
-        X = sm.add_constant(X)
-        # 简化实现：使用OLS作为基础
-        # 在实际应用中，应该使用专门的固定效应模型
-        model = sm.OLS(y, X)
-        fitted_model = model.fit()
-        # 构建系数详情
-        coefficients = {}
-        conf_int = fitted_model.conf_int()
-        for i, coef_name in enumerate(fitted_model.params.index):
-            coefficients[coef_name] = {
-                "coef": float(fitted_model.params.iloc[i]),
-                "std_err": float(fitted_model.bse.iloc[i]),
-                "t_value": float(fitted_model.tvalues.iloc[i]),
-                "p_value": float(fitted_model.pvalues.iloc[i]),
-                "ci_lower": float(conf_int.iloc[i, 0]),
-                "ci_upper": float(conf_int.iloc[i, 1])
-            }
-        # 构建结果
-        result = FixedEffectsResult(
-            rsquared=float(fitted_model.rsquared),
-            rsquared_adj=float(fitted_model.rsquared_adj),
-            f_statistic=float(fitted_model.fvalue),
-            f_pvalue=float(fitted_model.f_pvalue),
-            aic=float(fitted_model.aic),
-            bic=float(fitted_model.bic),
-            n_obs=int(fitted_model.nobs),
-            coefficients=coefficients,
-            entity_effects=entity_effects,
-            time_effects=time_effects,
-            within_rsquared=float(fitted_model.rsquared)  # 简化实现
-        )
-        return result
-    except Exception as e:
-        raise ValueError("固定效应模型拟合失败: {}".format(str(e)))
-def random_effects_model(
-    y_data: List[float],
-    X_data: List[List[float]],
-    entity_ids: List[str],
-    time_periods: List[str],
-    feature_names: Optional[List[str]] = None,
-    entity_effects: bool = True,
-    time_effects: bool = False
-) -> RandomEffectsResult:
-    """
-    随机效应模型
-    📊 功能说明：
-    随机效应模型假设个体间差异是随机的，通过GLS估计同时利用组内和组间变异。
-    适用于个体特征与解释变量不相关的情况。
-    📈 模型形式：
-    y_it = α + βX_it + μ_i + ε_it
-    💡 使用场景：
-    - 个体特征与解释变量不相关
-    - 希望估计不随时间变化的变量的系数
-    - 样本来自更大的总体
-    ⚠️ 注意事项：
-    - 需要满足个体效应与解释变量不相关的假设
-    - 如果假设不成立，估计可能不一致
-    - 比固定效应模型更有效率
-    Args:
-        y_data: 因变量数据
-        X_data: 自变量数据
-        entity_ids: 个体标识符
-        time_periods: 时间标识符
-        feature_names: 自变量名称
-        entity_effects: 是否包含个体效应
-        time_effects: 是否包含时间效应
-    Returns:
-        RandomEffectsResult: 随机效应模型结果
-    """
-    try:
-        # 准备面板数据
-        df = prepare_panel_data(y_data, X_data, entity_ids, time_periods, feature_names)
-        # 分离因变量和自变量
-        y = df['y']
-        X = df.drop('y', axis=1)
-        # 添加常数项
-        X = sm.add_constant(X)
-        # 简化实现：使用OLS作为基础
-        # 在实际应用中，应该使用专门的随机效应模型
-        model = sm.OLS(y, X)
-        fitted_model = model.fit()
-        # 构建系数详情
-        coefficients = {}
-        conf_int = fitted_model.conf_int()
-        for i, coef_name in enumerate(fitted_model.params.index):
-            coefficients[coef_name] = {
-                "coef": float(fitted_model.params.iloc[i]),
-                "std_err": float(fitted_model.bse.iloc[i]),
-                "t_value": float(fitted_model.tvalues.iloc[i]),
-                "p_value": float(fitted_model.pvalues.iloc[i]),
-                "ci_lower": float(conf_int.iloc[i, 0]),
-                "ci_upper": float(conf_int.iloc[i, 1])
-            }
-        # 构建结果
-        result = RandomEffectsResult(
-            rsquared=float(fitted_model.rsquared),
-            rsquared_adj=float(fitted_model.rsquared_adj),
-            f_statistic=float(fitted_model.fvalue),
-            f_pvalue=float(fitted_model.f_pvalue),
-            aic=float(fitted_model.aic),
-            bic=float(fitted_model.bic),
-            n_obs=int(fitted_model.nobs),
-            coefficients=coefficients,
-            entity_effects=entity_effects,
-            time_effects=time_effects,
-            between_rsquared=float(fitted_model.rsquared)  # 简化实现
-        )
-        return result
-    except Exception as e:
-        raise ValueError("随机效应模型拟合失败: {}".format(str(e)))
-def hausman_test(
-    y_data: List[float],
-    X_data: List[List[float]],
-    entity_ids: List[str],
-    time_periods: List[str],
-    feature_names: Optional[List[str]] = None
-) -> HausmanTestResult:
-    """
-    Hausman检验
-    📊 功能说明：
-    Hausman检验用于比较固定效应模型和随机效应模型，判断个体效应是否与解释变量相关。
-    原假设：随机效应模型是一致的（个体效应与解释变量不相关）
-    备择假设：固定效应模型是一致的
-    💡 使用场景：
-    - 在固定效应和随机效应模型之间选择
-    - 检验个体效应是否与解释变量相关
-    - 验证随机效应模型的假设
-    ⚠️ 注意事项：
-    - p值 < 0.05：拒绝原假设，选择固定效应模型
-    - p值 >= 0.05：不能拒绝原假设，选择随机效应模型
-    - 检验统计量服从卡方分布
-    Args:
-        y_data: 因变量数据
-        X_data: 自变量数据
-        entity_ids: 个体标识符
-        time_periods: 时间标识符
-        feature_names: 自变量名称
-    Returns:
-        HausmanTestResult: Hausman检验结果
-    """
-    try:
-        # 拟合固定效应模型
-        fe_result = fixed_effects_model(y_data, X_data, entity_ids, time_periods, feature_names)
-        # 拟合随机效应模型
-        re_result = random_effects_model(y_data, X_data, entity_ids, time_periods, feature_names)
-        # 提取系数（排除常数项）
-        fe_coefs = np.array([fe_result.coefficients[name]["coef"] for name in fe_result.coefficients if name != "const"])
-        re_coefs = np.array([re_result.coefficients[name]["coef"] for name in re_result.coefficients if name != "const"])
-        # 计算差异
-        diff = fe_coefs - re_coefs
-        # 简化Hausman检验统计量计算
-        # 在实际应用中，应该使用更精确的方差-协方差矩阵计算
-        statistic = np.sum(diff ** 2)
-        # 自由度
-        df = len(fe_coefs)
-        # 计算p值
-        from scipy import stats
-        p_value = 1 - stats.chi2.cdf(statistic, df)
-        # 判断显著性
-        significant = p_value < 0.05
-        # 给出建议
-        if significant:
-            recommendation = "拒绝原假设，建议使用固定效应模型（个体效应与解释变量相关）"
-        else:
-            recommendation = "不能拒绝原假设，建议使用随机效应模型（个体效应与解释变量不相关）"
-        return HausmanTestResult(
-            statistic=float(statistic),
-            p_value=float(p_value),
-            significant=significant,
-            recommendation=recommendation
-        )
-    except Exception as e:
-        raise ValueError(f"Hausman检验失败: {str(e)}")
-def panel_unit_root_test(
-    data: List[float],
-    entity_ids: List[str],
-    time_periods: List[str],
-    test_type: str = "levinlin",
-    **kwargs  # 接受并忽略额外参数（如y_data, x_data等）
-) -> PanelUnitRootResult:
-    """
-    面板单位根检验
-    📊 功能说明：
-    检验面板数据是否存在单位根，判断序列是否平稳。
-    常用的检验方法包括Levin-Lin-Chu检验、Im-Pesaran-Shin检验等。
-    💡 使用场景：
-    - 面板数据建模前的平稳性检验
-    - 判断是否需要差分处理
-    - 验证面板数据的协整关系
-    ⚠️ 注意事项：
-    - p值 < 0.05：拒绝原假设，序列平稳
-    - p值 >= 0.05：不能拒绝原假设，序列非平稳
-    - 不同检验方法适用于不同的数据特征
-    Args:
-        data: 面板数据序列
-        entity_ids: 个体标识符
-        time_periods: 时间标识符
-        test_type: 检验类型 ("levinlin", "ips", "fisher")
-        **kwargs: 额外参数（忽略）
-    Returns:
-        PanelUnitRootResult: 面板单位根检验结果
-    """
-    try:
-        # 准备数据
-        df = pd.DataFrame({
-            'entity': entity_ids,
-            'time': time_periods,
-            'value': data
-        })
-        # 设置面板格式
-        df = df.set_index(['entity', 'time'])
-        # 简化实现：使用ADF检验的扩展版本
-        # 在实际应用中，应该使用专门的panel unit root测试库
-        # 对每个个体分别进行ADF检验
-        entities = df.index.get_level_values('entity').unique()
-        p_values = []
-        for entity in entities:
-            entity_data = df.xs(entity, level='entity')['value'].values
-            # 降低要求：只需要5个以上数据点即可
-            if len(entity_data) >= 5:  # ADF检验最低要求
-                from statsmodels.tsa.stattools import adfuller
-                try:
-                    adf_result = adfuller(entity_data, maxlag=min(2, len(entity_data)//2))
-                    p_values.append(adf_result[1])
-                except Exception as e:
-                    # 记录但继续处理其他实体
-                    print(f"实体 {entity} ADF检验失败: {e}")
-                    continue
-        if not p_values:
-            raise ValueError(f"无法进行面板单位根检验。需要至少{len(entities)}个实体，每个实体至少5个时间点。当前有{len(entities)}个实体，但可成功检验的实体数为0")
-        # 使用Fisher组合检验方法（简化版）
-        from scipy import stats
-        combined_stat = -2 * np.sum(np.log(p_values))
-        df_fisher = 2 * len(p_values)
-        p_value = 1 - stats.chi2.cdf(combined_stat, df_fisher)
-        # 判断平稳性
-        stationary = p_value < 0.05
-        return PanelUnitRootResult(
-            statistic=float(combined_stat),
-            p_value=float(p_value),
-            stationary=stationary,
-            test_type=f"fisher_{test_type}"
-        )
-    except Exception as e:
-        raise ValueError(f"面板单位根检验失败: {str(e)}")
-def compare_panel_models(
-    y_data: List[float],
-    X_data: List[List[float]],
-    entity_ids: List[str],
-    time_periods: List[str],
-    feature_names: Optional[List[str]] = None
-) -> Dict[str, Any]:
-    """
-    比较不同面板数据模型
-    Args:
-        y_data: 因变量数据
-        X_data: 自变量数据
-        entity_ids: 个体标识符
-        time_periods: 时间标识符
-        feature_names: 自变量名称
-    Returns:
-        Dict[str, Any]: 模型比较结果
-    """
-    try:
-        # 拟合不同模型
-        fe_result = fixed_effects_model(y_data, X_data, entity_ids, time_periods, feature_names)
-        re_result = random_effects_model(y_data, X_data, entity_ids, time_periods, feature_names)
-        hausman_result = hausman_test(y_data, X_data, entity_ids, time_periods, feature_names)
-        # 模型比较
-        comparison = {
-            "fixed_effects": {
-                "rsquared": fe_result.rsquared,
-                "aic": fe_result.aic,
-                "bic": fe_result.bic,
-                "within_rsquared": fe_result.within_rsquared
-            },
-            "random_effects": {
-                "rsquared": re_result.rsquared,
-                "aic": re_result.aic,
-                "bic": re_result.bic,
-                "between_rsquared": re_result.between_rsquared
-            },
-            "hausman_test": hausman_result.model_dump(),
-            "recommendation": hausman_result.recommendation
-        }
-        # 根据AIC和BIC选择最佳模型
-        if fe_result.aic < re_result.aic and fe_result.bic < re_result.bic:
-            comparison["aic_bic_recommendation"] = "根据AIC和BIC，固定效应模型更优"
-        elif re_result.aic < fe_result.aic and re_result.bic < fe_result.bic:
-            comparison["aic_bic_recommendation"] = "根据AIC和BIC，随机效应模型更优"
-        else:
-            comparison["aic_bic_recommendation"] = "AIC和BIC结果不一致，建议参考Hausman检验"
-        return comparison
-    except Exception as e:
-        raise ValueError(f"模型比较失败: {str(e)}")
-# 导出所有函数
-__all__ = [
-    "FixedEffectsResult",
-    "RandomEffectsResult",
-    "HausmanTestResult",
-    "PanelUnitRootResult",
-    "fixed_effects_model",
-    "random_effects_model",
-    "hausman_test",
-    "panel_unit_root_test",
-    "compare_panel_models",
-    "prepare_panel_data"
-]

aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

aigroup-econ-mcp 1.3.3py3-none-any.whl → 2.0.1py3-none-any.whl