PyPI - aigroup-econ-mcp - Versions diffs - 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

aigroup-econ-mcp 1.3.3py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (198) hide show

econometrics/causal_inference/__init__.py ADDED Viewed

@@ -0,0 +1,66 @@
+"""
+因果推断模块
+"""
+# 从因果识别策略模块导入
+from .causal_identification_strategy import (
+    instrumental_variables_2sls,
+    difference_in_differences,
+    regression_discontinuity,
+    fixed_effects_model,
+    random_effects_model,
+    control_function_approach,
+    first_difference_model,
+    triple_difference,
+    event_study,
+    synthetic_control_method,
+    propensity_score_matching,
+    mediation_analysis,
+    moderation_analysis,
+    hausman_test,
+    IVResult,
+    DIDResult,
+    RDDResult,
+    FixedEffectsResult,
+    RandomEffectsResult,
+    ControlFunctionResult,
+    FirstDifferenceResult,
+    TripeDifferenceResult,
+    EventStudyResult,
+    SyntheticControlResult,
+    PSMMatchResult,
+    MediationResult,
+    ModerationResult,
+    HausmanResult
+)
+__all__ = [
+    "instrumental_variables_2sls",
+    "difference_in_differences",
+    "regression_discontinuity",
+    "fixed_effects_model",
+    "random_effects_model",
+    "control_function_approach",
+    "first_difference_model",
+    "triple_difference",
+    "event_study",
+    "synthetic_control_method",
+    "propensity_score_matching",
+    "mediation_analysis",
+    "moderation_analysis",
+    "hausman_test",
+    "IVResult",
+    "DIDResult",
+    "RDDResult",
+    "FixedEffectsResult",
+    "RandomEffectsResult",
+    "ControlFunctionResult",
+    "FirstDifferenceResult",
+    "TripeDifferenceResult",
+    "EventStudyResult",
+    "SyntheticControlResult",
+    "PSMMatchResult",
+    "MediationResult",
+    "ModerationResult",
+    "HausmanResult"
+]

econometrics/causal_inference/causal_identification_strategy/__init__.py ADDED Viewed

@@ -0,0 +1,104 @@
+"""
+因果识别策略模块
+"""
+from .instrumental_variables import (
+    instrumental_variables_2sls,
+    IVResult
+)
+from .difference_in_differences import (
+    difference_in_differences,
+    DIDResult
+)
+from .regression_discontinuity import (
+    regression_discontinuity,
+    RDDResult
+)
+from .fixed_effects import (
+    fixed_effects_model,
+    FixedEffectsResult
+)
+from .random_effects import (
+    random_effects_model,
+    RandomEffectsResult
+)
+from .control_function import (
+    control_function_approach,
+    ControlFunctionResult
+)
+from .first_difference import (
+    first_difference_model,
+    FirstDifferenceResult
+)
+from .triple_difference import (
+    triple_difference,
+    TripeDifferenceResult
+)
+from .event_study import (
+    event_study,
+    EventStudyResult
+)
+from .synthetic_control import (
+    synthetic_control_method,
+    SyntheticControlResult
+)
+from .propensity_score_matching import (
+    propensity_score_matching,
+    PSMMatchResult
+)
+from .mediation_analysis import (
+    mediation_analysis,
+    MediationResult
+)
+from .moderation_analysis import (
+    moderation_analysis,
+    ModerationResult
+)
+from .hausman_test import (
+    hausman_test,
+    HausmanResult
+)
+__all__ = [
+    "instrumental_variables_2sls",
+    "difference_in_differences",
+    "regression_discontinuity",
+    "fixed_effects_model",
+    "random_effects_model",
+    "control_function_approach",
+    "first_difference_model",
+    "triple_difference",
+    "event_study",
+    "synthetic_control_method",
+    "propensity_score_matching",
+    "mediation_analysis",
+    "moderation_analysis",
+    "hausman_test",
+    "IVResult",
+    "DIDResult",
+    "RDDResult",
+    "FixedEffectsResult",
+    "RandomEffectsResult",
+    "ControlFunctionResult",
+    "FirstDifferenceResult",
+    "TripeDifferenceResult",
+    "EventStudyResult",
+    "SyntheticControlResult",
+    "PSMMatchResult",
+    "MediationResult",
+    "ModerationResult",
+    "HausmanResult"
+]

econometrics/causal_inference/causal_identification_strategy/control_function.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""
+控制函数法实现
+"""
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+import statsmodels.api as sm
+from scipy import stats
+class ControlFunctionResult(BaseModel):
+    """控制函数法结果"""
+    method: str = Field(default="Control Function Approach", description="使用的因果识别方法")
+    estimate: float = Field(..., description="因果效应估计值")
+    std_error: float = Field(..., description="标准误")
+    t_statistic: float = Field(..., description="t统计量")
+    p_value: float = Field(..., description="p值")
+    confidence_interval: List[float] = Field(..., description="置信区间")
+    n_observations: int = Field(..., description="观测数量")
+    endogeneity_test: Optional[dict] = Field(None, description="内生性检验结果")
+def control_function_approach(
+    y: List[float],
+    x: List[float],
+    z: List[List[float]],
+    constant: bool = True
+) -> ControlFunctionResult:
+    """
+    控制函数法
+    控制函数法是一种解决内生性问题的方法，通过在第二阶段回归中加入第一阶段回归的残差来控制内生性。
+    Args:
+        y: 因变量
+        x: 内生自变量
+        z: 外生变量（包括工具变量和外生控制变量）
+        constant: 是否包含常数项
+    Returns:
+        ControlFunctionResult: 控制函数法结果
+    """
+    # 转换为numpy数组
+    y_array = np.array(y)
+    x_array = np.array(x)
+    z_array = np.array(z)
+    if z_array.ndim == 1:
+        z_array = z_array.reshape(-1, 1)
+    n = len(y)
+    # 第一阶段：将内生变量x对所有外生变量z回归
+    if constant:
+        Z = np.column_stack([np.ones(n), z_array])
+    else:
+        Z = z_array
+    # 第一阶段回归
+    first_stage_model = sm.OLS(x_array, Z)
+    first_stage_results = first_stage_model.fit()
+    # 获取第一阶段残差
+    x_residuals = first_stage_results.resid
+    # 第二阶段：将y对x和第一阶段残差回归
+    if constant:
+        X_second = np.column_stack([np.ones(n), x_array, x_residuals])
+    else:
+        X_second = np.column_stack([x_array, x_residuals])
+    second_stage_model = sm.OLS(y_array, X_second)
+    second_stage_results = second_stage_model.fit()
+    # 提取x的系数作为因果效应估计
+    # 如果有常数项，x是第2列；否则是第1列
+    x_coef_idx = 1 if constant else 0
+    coef = second_stage_results.params[x_coef_idx]
+    stderr = second_stage_results.bse[x_coef_idx]
+    tstat = second_stage_results.tvalues[x_coef_idx]
+    pval = second_stage_results.pvalues[x_coef_idx]
+    # 计算置信区间
+    ci_lower = coef - 1.96 * stderr
+    ci_upper = coef + 1.96 * stderr
+    # 内生性检验（检验控制函数/残差项的系数是否显著）
+    residual_coef_idx = 2 if constant else 1
+    residual_coef = second_stage_results.params[residual_coef_idx]
+    residual_stderr = second_stage_results.bse[residual_coef_idx]
+    residual_tstat = second_stage_results.tvalues[residual_coef_idx]
+    residual_pval = second_stage_results.pvalues[residual_coef_idx]
+    endogeneity_test = {
+        "residual_coefficient": float(residual_coef),
+        "residual_std_error": float(residual_stderr),
+        "t_statistic": float(residual_tstat),
+        "p_value": float(residual_pval),
+        "interpretation": "如果残差项系数显著，表明存在内生性问题"
+    }
+    return ControlFunctionResult(
+        estimate=float(coef),
+        std_error=float(stderr),
+        t_statistic=float(tstat),
+        p_value=float(pval),
+        confidence_interval=[float(ci_lower), float(ci_upper)],
+        n_observations=n,
+        endogeneity_test=endogeneity_test
+    )

econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""
+双重差分法 (DID) 实现
+"""
+from typing import List, Optional, Dict, Any
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+from scipy import stats
+import statsmodels.api as sm
+class DIDResult(BaseModel):
+    """双重差分法结果"""
+    method: str = Field(default="Difference-in-Differences", description="使用的因果识别方法")
+    estimate: float = Field(..., description="因果效应估计值")
+    std_error: float = Field(..., description="标准误")
+    t_statistic: float = Field(..., description="t统计量")
+    p_value: float = Field(..., description="p值")
+    confidence_interval: List[float] = Field(..., description="置信区间")
+    n_observations: int = Field(..., description="观测数量")
+    parallel_trend_test: Optional[Dict[str, Any]] = Field(None, description="平行趋势检验")
+def difference_in_differences(
+    treatment: List[int],
+    time_period: List[int],
+    outcome: List[float],
+    covariates: Optional[List[List[float]]] = None
+) -> DIDResult:
+    """
+    双重差分法 (DID)
+    使用statsmodels实现双重差分法，评估处理效应。
+    Args:
+        treatment: 处理组虚拟变量 (0/1)
+        time_period: 时间虚拟变量 (0/1)
+        outcome: 结果变量
+        covariates: 协变量
+    Returns:
+        DIDResult: 双重差分法结果
+    """
+    # 构建数据
+    data = {
+        'treatment': treatment,
+        'time': time_period,
+        'outcome': outcome
+    }
+    # 添加协变量
+    if covariates:
+        covariates_array = np.array(covariates)
+        if covariates_array.ndim == 1:
+            covariates_array = covariates_array.reshape(-1, 1)
+        k_cov = covariates_array.shape[1]
+        for i in range(k_cov):
+            data[f"covariate_{i+1}"] = covariates_array[:, i]
+    df = pd.DataFrame(data)
+    # 构建交互项
+    df['treatment_time'] = df['treatment'] * df['time']
+    # 构建回归公式
+    independent_vars = ['treatment', 'time', 'treatment_time']
+    if covariates:
+        independent_vars.extend([f"covariate_{i+1}" for i in range(k_cov)])
+    # 添加常数项
+    df['const'] = 1
+    independent_vars = ['const'] + independent_vars
+    # 使用statsmodels进行OLS回归
+    X = df[independent_vars]
+    y = df['outcome']
+    model = sm.OLS(y, X)
+    results = model.fit()
+    # 提取DID估计结果（交互项系数）
+    coef = results.params['treatment_time']
+    stderr = results.bse['treatment_time']
+    tstat = results.tvalues['treatment_time']
+    pval = results.pvalues['treatment_time']
+    # 计算置信区间
+    ci_lower = coef - 1.96 * stderr
+    ci_upper = coef + 1.96 * stderr
+    # 平行趋势检验（简化处理）
+    # 这里只是一个示例，实际的平行趋势检验需要更多的前期数据
+    parallel_trend = {
+        "description": "Simplified parallel trend test - full test requires pre-treatment periods"
+    }
+    return DIDResult(
+        estimate=float(coef),
+        std_error=float(stderr),
+        t_statistic=float(tstat),
+        p_value=float(pval),
+        confidence_interval=[float(ci_lower), float(ci_upper)],
+        n_observations=len(df),
+        parallel_trend_test=parallel_trend
+    )

econometrics/causal_inference/causal_identification_strategy/event_study.py ADDED Viewed

@@ -0,0 +1,119 @@
+"""
+事件研究法 (Event Study) 实现
+"""
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+import statsmodels.api as sm
+from scipy import stats
+class EventStudyResult(BaseModel):
+    """事件研究法结果"""
+    method: str = Field(default="Event Study", description="使用的因果识别方法")
+    estimates: List[float] = Field(..., description="各期效应估计值")
+    std_errors: List[float] = Field(..., description="各期效应标准误")
+    t_statistics: List[float] = Field(..., description="各期效应t统计量")
+    p_values: List[float] = Field(..., description="各期效应p值")
+    confidence_intervals: List[List[float]] = Field(..., description="各期效应置信区间")
+    n_observations: int = Field(..., description="观测数量")
+    event_time_periods: List[int] = Field(..., description="事件时间期列表")
+def event_study(
+    outcome: List[float],
+    treatment: List[int],
+    entity_ids: List[str],
+    time_periods: List[str],
+    event_time: List[int]
+) -> EventStudyResult:
+    """
+    事件研究法 (Event Study)
+    事件研究法通过分析处理前后多个时间点的效应，验证处理效应的动态变化模式。
+    Args:
+        outcome: 结果变量
+        treatment: 处理状态变量
+        entity_ids: 个体标识符
+        time_periods: 时间标识符
+        event_time: 相对于事件发生时间的时间标识（如-2, -1, 0, 1, 2）
+    Returns:
+        EventStudyResult: 事件研究法结果
+    """
+    # 构建数据
+    df = pd.DataFrame({
+        'outcome': outcome,
+        'treatment': treatment,
+        'entity': entity_ids,
+        'time': time_periods,
+        'event_time': event_time
+    })
+    # 创建时间虚拟变量
+    time_dummies = pd.get_dummies(df['event_time'], prefix='time')
+    df = pd.concat([df, time_dummies], axis=1)
+    # 与处理状态交互
+    for col in time_dummies.columns:
+        df[f'{col}_treated'] = df[col] * df['treatment']
+    # 构建回归设计矩阵
+    interaction_vars = [col for col in df.columns if col.endswith('_treated')]
+    X = df[interaction_vars]
+    X = sm.add_constant(X)  # 添加常数项
+    y = df['outcome']
+    # OLS回归
+    model = sm.OLS(y, X)
+    results = model.fit()
+    # 提取各期效应估计结果
+    estimates = []
+    std_errors = []
+    t_statistics = []
+    p_values = []
+    confidence_intervals = []
+    event_time_periods = []
+    for col in interaction_vars:
+        # 从列名中提取时间期数
+        time_period = int(col.replace('time_', '').replace('_treated', ''))
+        event_time_periods.append(time_period)
+        coef = results.params[col]
+        stderr = results.bse[col]
+        tstat = results.tvalues[col]
+        pval = results.pvalues[col]
+        # 计算置信区间
+        ci_lower = coef - 1.96 * stderr
+        ci_upper = coef + 1.96 * stderr
+        estimates.append(float(coef))
+        std_errors.append(float(stderr))
+        t_statistics.append(float(tstat))
+        p_values.append(float(pval))
+        confidence_intervals.append([float(ci_lower), float(ci_upper)])
+    # 按时间期排序
+    sorted_indices = np.argsort(event_time_periods)
+    event_time_periods = [event_time_periods[i] for i in sorted_indices]
+    estimates = [estimates[i] for i in sorted_indices]
+    std_errors = [std_errors[i] for i in sorted_indices]
+    t_statistics = [t_statistics[i] for i in sorted_indices]
+    p_values = [p_values[i] for i in sorted_indices]
+    confidence_intervals = [confidence_intervals[i] for i in sorted_indices]
+    return EventStudyResult(
+        estimates=estimates,
+        std_errors=std_errors,
+        t_statistics=t_statistics,
+        p_values=p_values,
+        confidence_intervals=confidence_intervals,
+        n_observations=len(df),
+        event_time_periods=event_time_periods
+    )

econometrics/causal_inference/causal_identification_strategy/first_difference.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""
+一阶差分模型实现
+"""
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+import statsmodels.api as sm
+from scipy import stats
+class FirstDifferenceResult(BaseModel):
+    """一阶差分模型结果"""
+    method: str = Field(default="First Difference Model", description="使用的因果识别方法")
+    estimate: float = Field(..., description="因果效应估计值")
+    std_error: float = Field(..., description="标准误")
+    t_statistic: float = Field(..., description="t统计量")
+    p_value: float = Field(..., description="p值")
+    confidence_interval: List[float] = Field(..., description="置信区间")
+    n_observations: int = Field(..., description="观测数量")
+def first_difference_model(
+    y: List[float],
+    x: List[float],
+    entity_ids: List[str]
+) -> FirstDifferenceResult:
+    """
+    一阶差分模型
+    一阶差分法通过差分操作消除不随时间变化的个体固定效应，常用于面板数据分析。
+    Args:
+        y: 因变量（时间序列）
+        x: 自变量（时间序列）
+        entity_ids: 个体标识符
+    Returns:
+        FirstDifferenceResult: 一阶差分模型结果
+    """
+    # 转换为DataFrame便于处理
+    df = pd.DataFrame({
+        'y': y,
+        'x': x,
+        'entity': entity_ids
+    })
+    # 按个体排序
+    df = df.sort_values(['entity'])
+    # 计算一阶差分
+    df['y_diff'] = df.groupby('entity')['y'].diff()
+    df['x_diff'] = df.groupby('entity')['x'].diff()
+    # 删除NaN值（每组的第一行）
+    df_diff = df.dropna()
+    # 提取差分后的数据
+    y_diff = df_diff['y_diff'].values
+    x_diff = df_diff['x_diff'].values
+    n = len(y_diff)
+    # 添加常数项
+    X = np.column_stack([np.ones(n), x_diff])
+    # OLS回归
+    model = sm.OLS(y_diff, X)
+    results = model.fit()
+    # 提取x_diff的系数作为因果效应估计
+    coef = results.params[1]
+    stderr = results.bse[1]
+    tstat = results.tvalues[1]
+    pval = results.pvalues[1]
+    # 计算置信区间
+    ci_lower = coef - 1.96 * stderr
+    ci_upper = coef + 1.96 * stderr
+    return FirstDifferenceResult(
+        estimate=float(coef),
+        std_error=float(stderr),
+        t_statistic=float(tstat),
+        p_value=float(pval),
+        confidence_interval=[float(ci_lower), float(ci_upper)],
+        n_observations=n
+    )

econometrics/causal_inference/causal_identification_strategy/fixed_effects.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""
+面板数据固定效应模型实现
+"""
+from typing import List, Optional
+import numpy as np
+import pandas as pd
+from pydantic import BaseModel, Field
+from scipy import stats
+import statsmodels.api as sm
+from linearmodels.panel import PanelOLS
+class FixedEffectsResult(BaseModel):
+    """固定效应模型结果"""
+    method: str = Field(default="Fixed Effects Model", description="使用的因果识别方法")
+    estimate: float = Field(..., description="因果效应估计值")
+    std_error: float = Field(..., description="标准误")
+    t_statistic: float = Field(..., description="t统计量")
+    p_value: float = Field(..., description="p值")
+    confidence_interval: List[float] = Field(..., description="置信区间")
+    n_observations: int = Field(..., description="观测数量")
+    n_entities: int = Field(..., description="个体数量")
+    n_time_periods: int = Field(..., description="时间期数")
+def fixed_effects_model(
+    y: List[float],
+    x: List[List[float]],
+    entity_ids: List[str],
+    time_periods: List[str],
+    constant: bool = True
+) -> FixedEffectsResult:
+    """
+    固定效应模型
+    使用linearmodels.panel.PanelOLS实现固定效应模型。
+    Args:
+        y: 因变量
+        x: 自变量
+        entity_ids: 个体标识符
+        time_periods: 时间标识符
+        constant: 是否包含常数项
+    Returns:
+        FixedEffectsResult: 固定效应模型结果
+    """
+    # 转换为DataFrame
+    x_array = np.array(x)
+    if x_array.ndim == 1:
+        x_array = x_array.reshape(-1, 1)
+    # 创建多重索引面板数据
+    df = pd.DataFrame({
+        'y': y,
+        'entity': entity_ids,
+        'time': [int(t.split('_')[1]) if isinstance(t, str) and '_' in t else i
+                for i, t in enumerate(time_periods)]  # 处理字符串格式的时间
+    })
+    # 添加自变量
+    k_x = x_array.shape[1]
+    for i in range(k_x):
+        df[f'x{i+1}'] = x_array[:, i]
+    # 设置多重索引
+    df = df.set_index(['entity', 'time'])
+    # 定义因变量和自变量
+    dependent = df['y']
+    explanatory_vars = [f'x{i+1}' for i in range(k_x)]
+    explanatory = df[explanatory_vars]
+    # 使用linearmodels进行固定效应估计
+    model = PanelOLS(dependent, explanatory, entity_effects=True)
+    results = model.fit()
+    # 提取主要变量的估计结果（假设关注最后一个变量）
+    target_var = f'x{k_x}'
+    coef = results.params[target_var]
+    stderr = results.std_errors[target_var]
+    tstat = results.tstats[target_var]
+    pval = results.pvalues[target_var]
+    # 计算置信区间
+    ci_lower = coef - 1.96 * stderr
+    ci_upper = coef + 1.96 * stderr
+    # 计算实体和时间期数
+    n_entities = len(df.index.get_level_values('entity').unique())
+    n_time_periods = len(df.index.get_level_values('time').unique())
+    return FixedEffectsResult(
+        estimate=float(coef),
+        std_error=float(stderr),
+        t_statistic=float(tstat),
+        p_value=float(pval),
+        confidence_interval=[float(ci_lower), float(ci_upper)],
+        n_observations=len(df),
+        n_entities=n_entities,
+        n_time_periods=n_time_periods
+    )

aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

aigroup-econ-mcp 1.3.3py3-none-any.whl → 2.0.1py3-none-any.whl