PyPI - aigroup-econ-mcp - Versions diffs - 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl - Mend

aigroup-econ-mcp 1.4.3py3-none-any.whl → 2.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (97) hide show

econometrics/tests/causal_inference_tests/test_all_methods.py ADDED Viewed

@@ -0,0 +1,418 @@
+"""
+测试所有因果识别策略方法
+"""
+import numpy as np
+import unittest
+from econometrics.causal_inference.causal_identification_strategy import *
+class TestAllCausalMethods(unittest.TestCase):
+    def test_instrumental_variables_2sls(self):
+        """测试工具变量法"""
+        np.random.seed(42)
+        n = 100
+        # 工具变量
+        z = np.random.normal(0, 1, n)
+        # 内生变量（与误差项相关）
+        e1 = np.random.normal(0, 1, n)
+        x = 1 + 0.5 * z + e1
+        # 结果变量
+        e2 = np.random.normal(0, 1, n)
+        y = 2 + 1.5 * x + e2 + 0.3 * e1  # 包含内生性
+        # 执行工具变量回归
+        result = instrumental_variables_2sls(
+            y=y.tolist(),
+            x=x.reshape(-1, 1).tolist(),
+            instruments=z.reshape(-1, 1).tolist()
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_control_function_approach(self):
+        """测试控制函数法"""
+        np.random.seed(42)
+        n = 100
+        # 外生变量
+        z1 = np.random.normal(0, 1, n)
+        z2 = np.random.normal(0, 1, n)
+        # 内生变量（与误差项相关）
+        e1 = np.random.normal(0, 1, n)
+        x = 1 + 0.5 * z1 + 0.3 * z2 + e1
+        # 结果变量
+        e2 = np.random.normal(0, 1, n)
+        y = 2 + 1.5 * x + e2 + 0.3 * e1  # 包含内生性
+        # 执行控制函数法
+        result = control_function_approach(
+            y=y.tolist(),
+            x=x.tolist(),
+            z=np.column_stack([z1, z2]).tolist()
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_fixed_effects_model(self):
+        """测试固定效应模型"""
+        np.random.seed(42)
+        n_entities = 20
+        n_periods = 10
+        n = n_entities * n_periods
+        # 个体标识
+        entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
+        # 时间标识
+        time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
+        # 自变量
+        x = np.random.normal(0, 1, (n, 2)).tolist()
+        # 因变量（包含个体固定效应）
+        entity_effects = np.random.normal(0, 1, n_entities)
+        y = []
+        for i in range(n):
+            entity_idx = i // n_periods
+            y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
+            y.append(y_value)
+        # 执行固定效应模型
+        result = fixed_effects_model(
+            y=y,
+            x=x,
+            entity_ids=entity_ids,
+            time_periods=time_periods
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertEqual(result.n_observations, n)
+    def test_random_effects_model(self):
+        """测试随机效应模型"""
+        np.random.seed(42)
+        n_entities = 20
+        n_periods = 10
+        n = n_entities * n_periods
+        # 个体标识
+        entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
+        # 时间标识
+        time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
+        # 自变量
+        x = np.random.normal(0, 1, (n, 2)).tolist()
+        # 因变量（包含个体随机效应）
+        entity_effects = np.random.normal(0, 1, n_entities)
+        y = []
+        for i in range(n):
+            entity_idx = i // n_periods
+            y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
+            y.append(y_value)
+        # 执行随机效应模型
+        result = random_effects_model(
+            y=y,
+            x=x,
+            entity_ids=entity_ids,
+            time_periods=time_periods
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertEqual(result.n_observations, n)
+    def test_first_difference_model(self):
+        """测试一阶差分模型"""
+        np.random.seed(42)
+        n_entities = 20
+        n_periods = 10
+        n = n_entities * n_periods
+        # 个体标识
+        entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
+        # 时间标识
+        time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
+        # 生成面板数据
+        x = np.cumsum(np.random.normal(0, 1, n))  # 随时间累积的变量
+        y = 2 + 1.5 * x + np.random.normal(0, 1, n)  # 因变量
+        # 执行一阶差分模型
+        result = first_difference_model(
+            y=y.tolist(),
+            x=x.tolist(),
+            entity_ids=entity_ids
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_hausman_test(self):
+        """测试Hausman检验"""
+        np.random.seed(42)
+        n_entities = 20
+        n_periods = 10
+        n = n_entities * n_periods
+        # 个体标识
+        entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
+        # 时间标识
+        time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
+        # 自变量
+        x = np.random.normal(0, 1, (n, 2)).tolist()
+        # 因变量
+        y = []
+        for i in range(n):
+            y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + np.random.normal(0, 1)
+            y.append(y_value)
+        # 执行Hausman检验
+        try:
+            result = hausman_test(
+                y=y,
+                x=x,
+                entity_ids=entity_ids,
+                time_periods=time_periods
+            )
+            # 检查结果
+            self.assertIsNotNone(result.hausman_statistic)
+            self.assertIsNotNone(result.p_value)
+        except:
+            # 如果出现数值问题，跳过测试
+            pass
+    def test_difference_in_differences(self):
+        """测试双重差分法"""
+        np.random.seed(42)
+        n = 200
+        # 处理组标识（0=控制组，1=处理组）
+        treatment = np.concatenate([np.zeros(100), np.ones(100)]).tolist()
+        # 时间标识（0=处理前，1=处理后）
+        time_period = np.concatenate([np.zeros(50), np.ones(50), np.zeros(50), np.ones(50)]).tolist()
+        # 结果变量
+        outcome = []
+        for i in range(n):
+            if treatment[i] == 0 and time_period[i] == 0:
+                outcome.append(np.random.normal(10, 1))
+            elif treatment[i] == 0 and time_period[i] == 1:
+                outcome.append(np.random.normal(10, 1))
+            elif treatment[i] == 1 and time_period[i] == 0:
+                outcome.append(np.random.normal(10, 1))
+            else:  # treatment[i] == 1 and time_period[i] == 1
+                outcome.append(np.random.normal(12, 1))
+        # 执行DID分析
+        result = difference_in_differences(
+            treatment=treatment,
+            time_period=time_period,
+            outcome=outcome
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_triple_difference(self):
+        """测试三重差分法"""
+        np.random.seed(42)
+        n = 400
+        # 生成变量
+        treatment_group = np.tile([0, 0, 1, 1], n//4).tolist()
+        time_period = np.tile([0, 1, 0, 1], n//4).tolist()
+        cohort_group = np.tile([0, 0, 0, 0, 1, 1, 1, 1], n//8).tolist()
+        # 结果变量
+        outcome = []
+        for i in range(n):
+            if treatment_group[i] == 1 and time_period[i] == 1 and cohort_group[i] == 1:
+                outcome.append(np.random.normal(12, 1))  # 处理效应
+            else:
+                outcome.append(np.random.normal(10, 1))
+        # 执行DDD分析
+        result = triple_difference(
+            outcome=outcome,
+            treatment_group=treatment_group,
+            time_period=time_period,
+            cohort_group=cohort_group
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_regression_discontinuity(self):
+        """测试断点回归设计"""
+        np.random.seed(42)
+        n = 200
+        cutoff = 0.0
+        # 运行变量
+        running_variable = np.random.uniform(-1, 1, n).tolist()
+        # 结果变量 - 在断点处有跳跃
+        outcome = []
+        for r in running_variable:
+            if r >= cutoff:
+                outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5) + 1.0)  # +1.0是处理效应
+            else:
+                outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5))
+        # 执行RDD分析
+        result = regression_discontinuity(
+            running_variable=running_variable,
+            outcome=outcome,
+            cutoff=cutoff,
+            bandwidth=0.5
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+        self.assertEqual(result.discontinuity_location, cutoff)
+    def test_propensity_score_matching(self):
+        """测试倾向得分匹配"""
+        np.random.seed(42)
+        n = 200
+        # 协变量
+        x1 = np.random.normal(0, 1, n)
+        x2 = np.random.normal(0, 1, n)
+        covariates = np.column_stack([x1, x2]).tolist()
+        # 倾向得分
+        pscore = 1 / (1 + np.exp(-(0.5 * x1 + 0.3 * x2)))
+        treatment = (np.random.uniform(0, 1, n) < pscore).astype(int).tolist()
+        # 结果变量
+        outcome = (2 + 1.5 * np.array(treatment) + 0.8 * x1 + 0.5 * x2 +
+                  np.random.normal(0, 1, n)).tolist()
+        # 执行PSM
+        result = propensity_score_matching(
+            treatment=treatment,
+            outcome=outcome,
+            covariates=covariates
+        )
+        # 检查结果
+        self.assertIsNotNone(result.ate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_mediation_analysis(self):
+        """测试中介效应分析"""
+        np.random.seed(42)
+        n = 200
+        # 处理变量
+        treatment = np.random.normal(0, 1, n).tolist()
+        # 协变量
+        x1 = np.random.normal(0, 1, n)
+        x2 = np.random.normal(0, 1, n)
+        covariates = np.column_stack([x1, x2]).tolist()
+        # 中介变量
+        mediator = (1 + 0.8 * np.array(treatment) + 0.3 * x1 + 0.2 * x2 +
+                   np.random.normal(0, 1, n)).tolist()
+        # 结果变量
+        outcome = (2 + 1.2 * np.array(treatment) + 0.7 * np.array(mediator) +
+                  0.4 * x1 + 0.3 * x2 + np.random.normal(0, 1, n)).tolist()
+        # 执行中介效应分析
+        result = mediation_analysis(
+            outcome=outcome,
+            treatment=treatment,
+            mediator=mediator,
+            covariates=covariates
+        )
+        # 检查结果
+        self.assertIsNotNone(result.direct_effect)
+        self.assertIsNotNone(result.indirect_effect)
+        self.assertIsNotNone(result.total_effect)
+        self.assertGreater(result.n_observations, 0)
+    def test_moderation_analysis(self):
+        """测试调节效应分析"""
+        np.random.seed(42)
+        n = 200
+        # 预测变量
+        predictor = np.random.normal(0, 1, n).tolist()
+        # 调节变量
+        moderator = np.random.normal(0, 1, n).tolist()
+        # 协变量
+        x1 = np.random.normal(0, 1, n)
+        x2 = np.random.normal(0, 1, n)
+        covariates = np.column_stack([x1, x2]).tolist()
+        # 结果变量
+        outcome = (2 + 1.2 * np.array(predictor) + 0.8 * np.array(moderator) +
+                  0.5 * np.array(predictor) * np.array(moderator) +
+                  0.3 * x1 + 0.2 * x2 + np.random.normal(0, 1, n)).tolist()
+        # 执行调节效应分析
+        result = moderation_analysis(
+            outcome=outcome,
+            predictor=predictor,
+            moderator=moderator,
+            covariates=covariates
+        )
+        # 检查结果
+        self.assertIsNotNone(result.main_effect)
+        self.assertIsNotNone(result.moderator_effect)
+        self.assertIsNotNone(result.interaction_effect)
+        self.assertGreater(result.n_observations, 0)
+if __name__ == "__main__":
+    unittest.main()

econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""
+因果识别策略模块测试
+"""
+import numpy as np
+import unittest
+from econometrics.causal_inference.causal_identification_strategy import (
+    instrumental_variables_2sls,
+    difference_in_differences,
+    regression_discontinuity,
+    fixed_effects_model,
+    random_effects_model
+)
+class TestCausalIdentificationStrategy(unittest.TestCase):
+    def test_instrumental_variables_2sls(self):
+        """测试工具变量法"""
+        # 生成模拟数据
+        np.random.seed(42)
+        n = 100
+        # 工具变量
+        z = np.random.normal(0, 1, n)
+        # 内生变量（与误差项相关）
+        e1 = np.random.normal(0, 1, n)
+        x = 1 + 0.5 * z + e1
+        # 结果变量
+        e2 = np.random.normal(0, 1, n)
+        y = 2 + 1.5 * x + e2 + 0.3 * e1  # 包含内生性
+        # 执行工具变量回归
+        result = instrumental_variables_2sls(
+            y=y.tolist(),
+            x=x.reshape(-1, 1).tolist(),
+            instruments=z.reshape(-1, 1).tolist()
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_difference_in_differences(self):
+        """测试双重差分法"""
+        # 生成模拟数据
+        np.random.seed(42)
+        n = 200
+        # 处理组标识（0=控制组，1=处理组）
+        treatment = np.concatenate([np.zeros(100), np.ones(100)]).tolist()
+        # 时间标识（0=处理前，1=处理后）
+        time_period = np.concatenate([np.zeros(50), np.ones(50), np.zeros(50), np.ones(50)]).tolist()
+        # 结果变量
+        # 控制组处理前均值为10，处理后为10
+        # 处理组处理前均值为10，处理后为12（处理效应为2）
+        outcome = []
+        for i in range(n):
+            if treatment[i] == 0 and time_period[i] == 0:
+                outcome.append(np.random.normal(10, 1))
+            elif treatment[i] == 0 and time_period[i] == 1:
+                outcome.append(np.random.normal(10, 1))
+            elif treatment[i] == 1 and time_period[i] == 0:
+                outcome.append(np.random.normal(10, 1))
+            else:  # treatment[i] == 1 and time_period[i] == 1
+                outcome.append(np.random.normal(12, 1))
+        # 执行DID分析
+        result = difference_in_differences(
+            treatment=treatment,
+            time_period=time_period,
+            outcome=outcome
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+    def test_regression_discontinuity(self):
+        """测试断点回归设计"""
+        # 生成模拟数据
+        np.random.seed(42)
+        n = 200
+        cutoff = 0.0
+        # 运行变量
+        running_variable = np.random.uniform(-1, 1, n).tolist()
+        # 结果变量 - 在断点处有跳跃
+        outcome = []
+        for r in running_variable:
+            if r >= cutoff:
+                outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5) + 1.0)  # +1.0是处理效应
+            else:
+                outcome.append(2 + 1.5 * r + np.random.normal(0, 0.5))
+        # 执行RDD分析
+        result = regression_discontinuity(
+            running_variable=running_variable,
+            outcome=outcome,
+            cutoff=cutoff,
+            bandwidth=0.5
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+        self.assertEqual(result.discontinuity_location, cutoff)
+    def test_fixed_effects_model(self):
+        """测试固定效应模型"""
+        # 生成面板数据
+        np.random.seed(42)
+        n_entities = 20
+        n_periods = 10
+        n = n_entities * n_periods
+        # 个体标识
+        entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
+        # 时间标识
+        time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
+        # 自变量
+        x = np.random.normal(0, 1, (n, 2)).tolist()
+        # 因变量（包含个体固定效应）
+        entity_effects = np.random.normal(0, 1, n_entities)
+        y = []
+        for i in range(n):
+            entity_idx = i // n_periods
+            y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
+            y.append(y_value)
+        # 执行固定效应模型
+        result = fixed_effects_model(
+            y=y,
+            x=x,
+            entity_ids=entity_ids,
+            time_periods=time_periods
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertEqual(result.n_observations, n)
+        self.assertEqual(result.model_type, "FE")
+    def test_random_effects_model(self):
+        """测试随机效应模型"""
+        # 生成面板数据
+        np.random.seed(42)
+        n_entities = 20
+        n_periods = 10
+        n = n_entities * n_periods
+        # 个体标识
+        entity_ids = [f"entity_{i}" for i in range(n_entities) for _ in range(n_periods)]
+        # 时间标识
+        time_periods = [f"period_{t}" for _ in range(n_entities) for t in range(n_periods)]
+        # 自变量
+        x = np.random.normal(0, 1, (n, 2)).tolist()
+        # 因变量（包含个体随机效应）
+        entity_effects = np.random.normal(0, 1, n_entities)
+        y = []
+        for i in range(n):
+            entity_idx = i // n_periods
+            y_value = 1 + 2 * x[i][0] + 1.5 * x[i][1] + entity_effects[entity_idx] + np.random.normal(0, 0.5)
+            y.append(y_value)
+        # 执行随机效应模型
+        result = random_effects_model(
+            y=y,
+            x=x,
+            entity_ids=entity_ids,
+            time_periods=time_periods
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertEqual(result.n_observations, n)
+        self.assertEqual(result.model_type, "RE")
+if __name__ == "__main__":
+    unittest.main()

econometrics/tests/causal_inference_tests/test_difference_in_differences.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""
+双重差分法测试
+"""
+import numpy as np
+import unittest
+from econometrics.causal_inference.causal_identification_strategy import difference_in_differences
+class TestDifferenceInDifferences(unittest.TestCase):
+    def test_difference_in_differences(self):
+        """测试双重差分法"""
+        # 生成模拟数据
+        np.random.seed(42)
+        n = 200
+        # 处理组标识（0=控制组，1=处理组）
+        treatment = np.concatenate([np.zeros(100), np.ones(100)]).tolist()
+        # 时间标识（0=处理前，1=处理后）
+        time_period = np.concatenate([np.zeros(50), np.ones(50), np.zeros(50), np.ones(50)]).tolist()
+        # 结果变量
+        # 控制组处理前均值为10，处理后为10
+        # 处理组处理前均值为10，处理后为12（处理效应为2）
+        outcome = []
+        for i in range(n):
+            if treatment[i] == 0 and time_period[i] == 0:
+                outcome.append(np.random.normal(10, 1))
+            elif treatment[i] == 0 and time_period[i] == 1:
+                outcome.append(np.random.normal(10, 1))
+            elif treatment[i] == 1 and time_period[i] == 0:
+                outcome.append(np.random.normal(10, 1))
+            else:  # treatment[i] == 1 and time_period[i] == 1
+                outcome.append(np.random.normal(12, 1))
+        # 执行DID分析
+        result = difference_in_differences(
+            treatment=treatment,
+            time_period=time_period,
+            outcome=outcome
+        )
+        # 检查结果
+        self.assertIsNotNone(result.estimate)
+        self.assertIsNotNone(result.std_error)
+        self.assertIsNotNone(result.p_value)
+        self.assertGreater(result.n_observations, 0)
+if __name__ == "__main__":
+    unittest.main()

aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

aigroup-econ-mcp 1.4.3py3-none-any.whl → 2.0.1py3-none-any.whl