aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,253 @@
1
+ """
2
+ Causal Forest implementation for heterogeneous treatment effect estimation
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.ensemble import RandomForestRegressor
7
+ from sklearn.model_selection import train_test_split
8
+ from typing import Union, Optional, Dict, Any
9
+
10
+
11
+ class CausalForest:
12
+ """
13
+ Causal Forest for estimating heterogeneous treatment effects
14
+ """
15
+
16
+ def __init__(self, n_estimators: int = 100, min_samples_leaf: int = 5,
17
+ max_depth: Optional[int] = None, random_state: int = 42,
18
+ honest: bool = True, n_jobs: int = -1):
19
+ """
20
+ Initialize Causal Forest model
21
+
22
+ Parameters:
23
+ -----------
24
+ n_estimators : int
25
+ Number of trees in the forest
26
+ min_samples_leaf : int
27
+ Minimum number of samples required to be at a leaf node
28
+ max_depth : int, optional
29
+ Maximum depth of the tree
30
+ random_state : int
31
+ Random state for reproducibility
32
+ honest : bool
33
+ Whether to use honest splitting (separate samples for splitting and estimation)
34
+ n_jobs : int
35
+ Number of jobs to run in parallel
36
+ """
37
+ self.n_estimators = n_estimators
38
+ self.min_samples_leaf = min_samples_leaf
39
+ self.max_depth = max_depth
40
+ self.random_state = random_state
41
+ self.honest = honest
42
+ self.n_jobs = n_jobs
43
+
44
+ # We'll implement a simplified version using two random forests
45
+ # One for the outcome regression and one for the treatment regression
46
+ self.mu_model = RandomForestRegressor(
47
+ n_estimators=n_estimators,
48
+ min_samples_leaf=min_samples_leaf,
49
+ max_depth=max_depth,
50
+ random_state=random_state,
51
+ n_jobs=n_jobs
52
+ )
53
+
54
+ self.pi_model = RandomForestRegressor(
55
+ n_estimators=n_estimators,
56
+ min_samples_leaf=min_samples_leaf,
57
+ max_depth=max_depth,
58
+ random_state=random_state,
59
+ n_jobs=n_jobs
60
+ )
61
+
62
+ # Store results
63
+ self.fitted = False
64
+
65
+ def fit(self, X: Union[np.ndarray, pd.DataFrame],
66
+ y: Union[np.ndarray, pd.Series],
67
+ w: Union[np.ndarray, pd.Series]) -> 'CausalForest':
68
+ """
69
+ Fit the Causal Forest model
70
+
71
+ Parameters:
72
+ -----------
73
+ X : array-like of shape (n_samples, n_features)
74
+ Covariates
75
+ y : array-like of shape (n_samples,)
76
+ Outcome variable
77
+ w : array-like of shape (n_samples,)
78
+ Treatment assignment (binary)
79
+
80
+ Returns:
81
+ --------
82
+ self : CausalForest
83
+ """
84
+ # Convert to numpy arrays
85
+ X = np.asarray(X)
86
+ y = np.asarray(y)
87
+ w = np.asarray(w)
88
+
89
+ # Fit outcome regression E[Y|X]
90
+ self.mu_model.fit(X, y)
91
+
92
+ # Fit treatment regression E[W|X]
93
+ self.pi_model.fit(X, w)
94
+
95
+ self.fitted = True
96
+ return self
97
+
98
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> Dict[str, np.ndarray]:
99
+ """
100
+ Predict treatment effects for new samples
101
+
102
+ Parameters:
103
+ -----------
104
+ X : array-like of shape (n_samples, n_features)
105
+ Samples
106
+
107
+ Returns:
108
+ --------
109
+ results : dict
110
+ Dictionary with treatment effect estimates and related statistics
111
+ """
112
+ if not self.fitted:
113
+ raise ValueError("Model must be fitted before making predictions")
114
+
115
+ # Convert to numpy array
116
+ X = np.asarray(X)
117
+
118
+ # Get base predictions
119
+ mu_pred = self.mu_model.predict(X)
120
+ pi_pred = self.pi_model.predict(X)
121
+
122
+ # In a full implementation, we would compute heterogeneous treatment effects
123
+ # For this simplified version, we return the predicted values
124
+ # A full implementation would involve:
125
+ # 1. Using honest splitting
126
+ # 2. Computing R-learner or similar estimates in the leaves
127
+ # 3. Aggregating across trees
128
+
129
+ return {
130
+ 'outcome_prediction': mu_pred,
131
+ 'treatment_propensity': pi_pred,
132
+ 'treatment_effect': mu_pred # Placeholder - in practice would be different
133
+ }
134
+
135
+ def estimate_treatment_effect(self, X: Union[np.ndarray, pd.DataFrame],
136
+ y: Union[np.ndarray, pd.Series],
137
+ w: Union[np.ndarray, pd.Series]) -> Dict[str, Any]:
138
+ """
139
+ Estimate treatment effects using the fitted model
140
+
141
+ Parameters:
142
+ -----------
143
+ X : array-like of shape (n_samples, n_features)
144
+ Covariates
145
+ y : array-like of shape (n_samples,)
146
+ Outcome variable
147
+ w : array-like of shape (n_samples,)
148
+ Treatment assignment (binary)
149
+
150
+ Returns:
151
+ --------
152
+ results : dict
153
+ Dictionary with treatment effect estimates
154
+ """
155
+ if not self.fitted:
156
+ raise ValueError("Model must be fitted first")
157
+
158
+ # Convert to numpy arrays
159
+ X = np.asarray(X)
160
+ y = np.asarray(y)
161
+ w = np.asarray(w)
162
+
163
+ # Get predictions
164
+ mu_pred = self.mu_model.predict(X)
165
+ pi_pred = self.pi_model.predict(X)
166
+
167
+ # Compute doubly robust scores for treatment effect estimation
168
+ # psi = (w - pi_pred) * (y - mu_pred) / (pi_pred * (1 - pi_pred)) + mu_pred
169
+
170
+ # Handle edge cases for propensity scores
171
+ pi_pred = np.clip(pi_pred, 1e-5, 1 - 1e-5)
172
+
173
+ # Compute AIPW (Augmented Inverse Probability Weighting) scores
174
+ w1 = w / pi_pred
175
+ w0 = (1 - w) / (1 - pi_pred)
176
+
177
+ # Estimate treatment effects
178
+ y1_est = w1 * y + (1 - w1) * mu_pred
179
+ y0_est = w0 * y + (1 - w0) * mu_pred
180
+
181
+ # Individual treatment effects (CATE - Conditional Average Treatment Effect)
182
+ cate = y1_est - y0_est
183
+
184
+ # Average treatment effect
185
+ ate = np.mean(cate)
186
+
187
+ # Standard error (naive)
188
+ cate_se = np.std(cate) / np.sqrt(len(cate))
189
+
190
+ return {
191
+ 'cate': cate, # Conditional Average Treatment Effects
192
+ 'ate': ate, # Average Treatment Effect
193
+ 'cate_se': cate_se,
194
+ 'outcome_prediction': mu_pred,
195
+ 'treatment_propensity': pi_pred
196
+ }
197
+
198
+
199
+ def causal_forest_analysis(X: Union[np.ndarray, pd.DataFrame],
200
+ y: Union[np.ndarray, pd.Series],
201
+ w: Union[np.ndarray, pd.Series],
202
+ n_estimators: int = 100,
203
+ min_samples_leaf: int = 5,
204
+ max_depth: Optional[int] = None,
205
+ random_state: int = 42,
206
+ honest: bool = True) -> dict:
207
+ """
208
+ Perform complete Causal Forest analysis
209
+
210
+ Parameters:
211
+ -----------
212
+ X : array-like of shape (n_samples, n_features)
213
+ Covariates
214
+ y : array-like of shape (n_samples,)
215
+ Outcome variable
216
+ w : array-like of shape (n_samples,)
217
+ Treatment assignment (binary)
218
+ n_estimators : int
219
+ Number of trees in the forest
220
+ min_samples_leaf : int
221
+ Minimum number of samples required to be at a leaf node
222
+ max_depth : int, optional
223
+ Maximum depth of the tree
224
+ random_state : int
225
+ Random state for reproducibility
226
+ honest : bool
227
+ Whether to use honest splitting
228
+
229
+ Returns:
230
+ --------
231
+ results : dict
232
+ Dictionary with model and estimation results
233
+ """
234
+ # Initialize and fit model
235
+ cf_model = CausalForest(
236
+ n_estimators=n_estimators,
237
+ min_samples_leaf=min_samples_leaf,
238
+ max_depth=max_depth,
239
+ random_state=random_state,
240
+ honest=honest
241
+ )
242
+ cf_model.fit(X, y, w)
243
+
244
+ # Estimate treatment effects
245
+ te_results = cf_model.estimate_treatment_effect(X, y, w)
246
+
247
+ return {
248
+ 'model': cf_model,
249
+ 'treatment_effects': te_results,
250
+ 'X': X,
251
+ 'y': y,
252
+ 'w': w
253
+ }
@@ -0,0 +1,268 @@
1
+ """
2
+ Double Machine Learning implementation for causal inference
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
8
+ from sklearn.linear_model import LinearRegression, LogisticRegression
9
+ from sklearn.metrics import mean_squared_error
10
+ from typing import Union, Optional, Dict, Any, Tuple
11
+ from scipy import stats
12
+
13
+
14
+ class DoubleML:
15
+ """
16
+ Double Machine Learning for causal inference with treatment effects
17
+ """
18
+
19
+ def __init__(self, learner_g: Any = None, learner_m: Any = None,
20
+ treatment_type: str = 'continuous', n_folds: int = 5,
21
+ random_state: int = 42):
22
+ """
23
+ Initialize Double Machine Learning model
24
+
25
+ Parameters:
26
+ -----------
27
+ learner_g : sklearn estimator, optional
28
+ Estimator for the outcome regression (g)
29
+ Default: RandomForestRegressor for continuous, RandomForestClassifier for binary
30
+ learner_m : sklearn estimator, optional
31
+ Estimator for the treatment regression (m)
32
+ Default: RandomForestRegressor for continuous, RandomForestClassifier for binary
33
+ treatment_type : str, 'continuous' or 'binary'
34
+ Type of treatment variable
35
+ n_folds : int
36
+ Number of cross-fitting folds
37
+ random_state : int
38
+ Random state for reproducibility
39
+ """
40
+ self.learner_g = learner_g
41
+ self.learner_m = learner_m
42
+ self.treatment_type = treatment_type
43
+ self.n_folds = n_folds
44
+ self.random_state = random_state
45
+
46
+ # Set default learners if not provided
47
+ if self.learner_g is None:
48
+ if treatment_type == 'continuous':
49
+ self.learner_g = RandomForestRegressor(n_estimators=100, random_state=random_state)
50
+ else:
51
+ self.learner_g = RandomForestClassifier(n_estimators=100, random_state=random_state)
52
+
53
+ if self.learner_m is None:
54
+ if treatment_type == 'continuous':
55
+ self.learner_m = RandomForestRegressor(n_estimators=100, random_state=random_state)
56
+ else:
57
+ self.learner_m = RandomForestClassifier(n_estimators=100, random_state=random_state)
58
+
59
+ # Store results
60
+ self.effect = None
61
+ self.se = None
62
+ self.ci = None
63
+ self.pval = None
64
+
65
+ def fit(self, X: Union[np.ndarray, pd.DataFrame],
66
+ y: Union[np.ndarray, pd.Series],
67
+ d: Union[np.ndarray, pd.Series]) -> 'DoubleML':
68
+ """
69
+ Fit the Double Machine Learning model
70
+
71
+ Parameters:
72
+ -----------
73
+ X : array-like of shape (n_samples, n_features)
74
+ Covariates
75
+ y : array-like of shape (n_samples,)
76
+ Outcome variable
77
+ d : array-like of shape (n_samples,)
78
+ Treatment variable
79
+
80
+ Returns:
81
+ --------
82
+ self : DoubleML
83
+ """
84
+ # Convert to numpy arrays if needed
85
+ X = np.asarray(X)
86
+ y = np.asarray(y)
87
+ d = np.asarray(d)
88
+
89
+ n_samples = X.shape[0]
90
+
91
+ # Initialize arrays to store residuals
92
+ y_res = np.zeros(n_samples)
93
+ d_res = np.zeros(n_samples)
94
+
95
+ # Create folds for cross-fitting
96
+ np.random.seed(self.random_state)
97
+ indices = np.random.permutation(n_samples)
98
+ fold_size = n_samples // self.n_folds
99
+ folds = [indices[i*fold_size:(i+1)*fold_size] for i in range(self.n_folds)]
100
+ # Add remaining samples to the last fold
101
+ if n_samples % self.n_folds != 0:
102
+ folds[-1] = np.concatenate([folds[-1], indices[self.n_folds*fold_size:]])
103
+
104
+ # Cross-fitting
105
+ for fold_idx, test_idx in enumerate(folds):
106
+ # Training indices (all except test fold)
107
+ train_idx = np.concatenate([folds[i] for i in range(self.n_folds) if i != fold_idx])
108
+
109
+ # Split data
110
+ X_train, X_test = X[train_idx], X[test_idx]
111
+ y_train, y_test = y[train_idx], y[test_idx]
112
+ d_train, d_test = d[train_idx], d[test_idx]
113
+
114
+ # Fit outcome regression and get residuals
115
+ self.learner_g.fit(X_train, y_train)
116
+ if self.treatment_type == 'continuous':
117
+ y_pred = self.learner_g.predict(X_test)
118
+ else:
119
+ y_pred = self.learner_g.predict_proba(X_test)[:, 1]
120
+ y_res[test_idx] = y_test - y_pred
121
+
122
+ # Fit treatment regression and get residuals
123
+ self.learner_m.fit(X_train, d_train)
124
+ if self.treatment_type == 'continuous':
125
+ d_pred = self.learner_m.predict(X_test)
126
+ else:
127
+ d_pred = self.learner_m.predict_proba(X_test)[:, 1]
128
+ d_res[test_idx] = d_test - d_pred
129
+
130
+ # Estimate treatment effect using partially linear regression
131
+ # theta = E[d_res * y_res] / E[d_res^2]
132
+ numerator = np.mean(d_res * y_res)
133
+ denominator = np.mean(d_res**2)
134
+
135
+ self.effect = numerator / denominator
136
+
137
+ # Calculate standard error
138
+ # Using the formula for the variance of the DML estimator
139
+ residuals = y_res - self.effect * d_res
140
+ variance = np.mean(residuals**2) / np.mean(d_res**2)**2 / n_samples
141
+ self.se = np.sqrt(variance)
142
+
143
+ # Calculate 95% confidence interval
144
+ crit_val = 1.96 # 95% CI
145
+ self.ci = (self.effect - crit_val * self.se,
146
+ self.effect + crit_val * self.se)
147
+
148
+ # Calculate p-value (two-sided test)
149
+ z_score = self.effect / self.se
150
+ # Use scipy.stats.norm for calculating p-value
151
+ self.pval = 2 * (1 - stats.norm.cdf(np.abs(z_score)))
152
+
153
+ return self
154
+
155
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
156
+ """
157
+ Predict treatment effects (constant for this implementation)
158
+
159
+ Parameters:
160
+ -----------
161
+ X : array-like of shape (n_samples, n_features)
162
+ Samples (not used, treatment effect is constant)
163
+
164
+ Returns:
165
+ --------
166
+ effects : ndarray of shape (n_samples,)
167
+ Estimated treatment effects
168
+ """
169
+ return np.full(X.shape[0], self.effect) if hasattr(X, 'shape') else np.full(len(X), self.effect)
170
+
171
+ def get_effect(self) -> float:
172
+ """
173
+ Get the estimated treatment effect
174
+
175
+ Returns:
176
+ --------
177
+ effect : float
178
+ Estimated treatment effect
179
+ """
180
+ return self.effect
181
+
182
+ def get_se(self) -> float:
183
+ """
184
+ Get the standard error of the treatment effect
185
+
186
+ Returns:
187
+ --------
188
+ se : float
189
+ Standard error of the treatment effect
190
+ """
191
+ return self.se
192
+
193
+ def get_ci(self) -> Tuple[float, float]:
194
+ """
195
+ Get the 95% confidence interval for the treatment effect
196
+
197
+ Returns:
198
+ --------
199
+ ci : tuple
200
+ 95% confidence interval (lower, upper)
201
+ """
202
+ return self.ci
203
+
204
+ def get_pval(self) -> float:
205
+ """
206
+ Get the p-value for the treatment effect
207
+
208
+ Returns:
209
+ --------
210
+ pval : float
211
+ P-value for the treatment effect
212
+ """
213
+ return self.pval
214
+
215
+
216
+ def double_ml_analysis(X: Union[np.ndarray, pd.DataFrame],
217
+ y: Union[np.ndarray, pd.Series],
218
+ d: Union[np.ndarray, pd.Series],
219
+ treatment_type: str = 'continuous',
220
+ n_folds: int = 5,
221
+ random_state: int = 42) -> dict:
222
+ """
223
+ Perform complete Double Machine Learning analysis
224
+
225
+ Parameters:
226
+ -----------
227
+ X : array-like of shape (n_samples, n_features)
228
+ Covariates
229
+ y : array-like of shape (n_samples,)
230
+ Outcome variable
231
+ d : array-like of shape (n_samples,)
232
+ Treatment variable
233
+ treatment_type : str, 'continuous' or 'binary'
234
+ Type of treatment variable
235
+ n_folds : int
236
+ Number of cross-fitting folds
237
+ random_state : int
238
+ Random state for reproducibility
239
+
240
+ Returns:
241
+ --------
242
+ results : dict
243
+ Dictionary with model and estimation results
244
+ """
245
+ # Initialize and fit model
246
+ dml_model = DoubleML(
247
+ treatment_type=treatment_type,
248
+ n_folds=n_folds,
249
+ random_state=random_state
250
+ )
251
+ dml_model.fit(X, y, d)
252
+
253
+ # Get results
254
+ effect = dml_model.get_effect()
255
+ se = dml_model.get_se()
256
+ ci = dml_model.get_ci()
257
+ pval = dml_model.get_pval()
258
+
259
+ return {
260
+ 'model': dml_model,
261
+ 'effect': effect,
262
+ 'se': se,
263
+ 'ci': ci,
264
+ 'pval': pval,
265
+ 'X': X,
266
+ 'y': y,
267
+ 'd': d
268
+ }