aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. PKG-INFO +344 -322
  2. README.md +335 -320
  3. __init__.py +1 -1
  4. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  6. cli.py +4 -0
  7. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  8. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  9. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  10. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  11. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  18. econometrics/causal_inference/__init__.py +66 -0
  19. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  20. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  21. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  22. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  23. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  24. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  25. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  26. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  27. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  28. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  29. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  30. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  31. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  32. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  33. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  34. econometrics/distribution_analysis/__init__.py +28 -0
  35. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  36. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  37. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  38. econometrics/missing_data/__init__.py +18 -0
  39. econometrics/missing_data/imputation_methods.py +219 -0
  40. econometrics/nonparametric/__init__.py +35 -0
  41. econometrics/nonparametric/gam_model.py +117 -0
  42. econometrics/nonparametric/kernel_regression.py +161 -0
  43. econometrics/nonparametric/quantile_regression.py +249 -0
  44. econometrics/nonparametric/spline_regression.py +100 -0
  45. econometrics/spatial_econometrics/__init__.py +68 -0
  46. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  47. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  48. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  49. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  50. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  51. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  52. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  53. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  54. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  55. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  56. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  57. econometrics/statistical_inference/__init__.py +21 -0
  58. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  59. econometrics/statistical_inference/permutation_test.py +177 -0
  60. econometrics/survival_analysis/__init__.py +18 -0
  61. econometrics/survival_analysis/survival_models.py +259 -0
  62. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  63. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  64. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  65. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  66. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  67. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  68. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  69. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  70. pyproject.toml +9 -2
  71. server.py +15 -1
  72. tools/__init__.py +75 -1
  73. tools/causal_inference_adapter.py +658 -0
  74. tools/distribution_analysis_adapter.py +121 -0
  75. tools/gwr_simple_adapter.py +54 -0
  76. tools/machine_learning_adapter.py +567 -0
  77. tools/mcp_tool_groups/__init__.py +15 -1
  78. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  79. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  80. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  81. tools/mcp_tool_groups/microecon_tools.py +325 -0
  82. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  83. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  84. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  85. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  86. tools/mcp_tools_registry.py +13 -3
  87. tools/microecon_adapter.py +412 -0
  88. tools/missing_data_adapter.py +73 -0
  89. tools/nonparametric_adapter.py +190 -0
  90. tools/spatial_econometrics_adapter.py +318 -0
  91. tools/statistical_inference_adapter.py +90 -0
  92. tools/survival_analysis_adapter.py +46 -0
  93. aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
  94. aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
  95. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
  96. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
  97. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,268 @@
1
+ """
2
+ Double Machine Learning implementation for causal inference
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.model_selection import train_test_split
7
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
8
+ from sklearn.linear_model import LinearRegression, LogisticRegression
9
+ from sklearn.metrics import mean_squared_error
10
+ from typing import Union, Optional, Dict, Any, Tuple
11
+ from scipy import stats
12
+
13
+
14
+ class DoubleML:
15
+ """
16
+ Double Machine Learning for causal inference with treatment effects
17
+ """
18
+
19
+ def __init__(self, learner_g: Any = None, learner_m: Any = None,
20
+ treatment_type: str = 'continuous', n_folds: int = 5,
21
+ random_state: int = 42):
22
+ """
23
+ Initialize Double Machine Learning model
24
+
25
+ Parameters:
26
+ -----------
27
+ learner_g : sklearn estimator, optional
28
+ Estimator for the outcome regression (g)
29
+ Default: RandomForestRegressor for continuous, RandomForestClassifier for binary
30
+ learner_m : sklearn estimator, optional
31
+ Estimator for the treatment regression (m)
32
+ Default: RandomForestRegressor for continuous, RandomForestClassifier for binary
33
+ treatment_type : str, 'continuous' or 'binary'
34
+ Type of treatment variable
35
+ n_folds : int
36
+ Number of cross-fitting folds
37
+ random_state : int
38
+ Random state for reproducibility
39
+ """
40
+ self.learner_g = learner_g
41
+ self.learner_m = learner_m
42
+ self.treatment_type = treatment_type
43
+ self.n_folds = n_folds
44
+ self.random_state = random_state
45
+
46
+ # Set default learners if not provided
47
+ if self.learner_g is None:
48
+ if treatment_type == 'continuous':
49
+ self.learner_g = RandomForestRegressor(n_estimators=100, random_state=random_state)
50
+ else:
51
+ self.learner_g = RandomForestClassifier(n_estimators=100, random_state=random_state)
52
+
53
+ if self.learner_m is None:
54
+ if treatment_type == 'continuous':
55
+ self.learner_m = RandomForestRegressor(n_estimators=100, random_state=random_state)
56
+ else:
57
+ self.learner_m = RandomForestClassifier(n_estimators=100, random_state=random_state)
58
+
59
+ # Store results
60
+ self.effect = None
61
+ self.se = None
62
+ self.ci = None
63
+ self.pval = None
64
+
65
+ def fit(self, X: Union[np.ndarray, pd.DataFrame],
66
+ y: Union[np.ndarray, pd.Series],
67
+ d: Union[np.ndarray, pd.Series]) -> 'DoubleML':
68
+ """
69
+ Fit the Double Machine Learning model
70
+
71
+ Parameters:
72
+ -----------
73
+ X : array-like of shape (n_samples, n_features)
74
+ Covariates
75
+ y : array-like of shape (n_samples,)
76
+ Outcome variable
77
+ d : array-like of shape (n_samples,)
78
+ Treatment variable
79
+
80
+ Returns:
81
+ --------
82
+ self : DoubleML
83
+ """
84
+ # Convert to numpy arrays if needed
85
+ X = np.asarray(X)
86
+ y = np.asarray(y)
87
+ d = np.asarray(d)
88
+
89
+ n_samples = X.shape[0]
90
+
91
+ # Initialize arrays to store residuals
92
+ y_res = np.zeros(n_samples)
93
+ d_res = np.zeros(n_samples)
94
+
95
+ # Create folds for cross-fitting
96
+ np.random.seed(self.random_state)
97
+ indices = np.random.permutation(n_samples)
98
+ fold_size = n_samples // self.n_folds
99
+ folds = [indices[i*fold_size:(i+1)*fold_size] for i in range(self.n_folds)]
100
+ # Add remaining samples to the last fold
101
+ if n_samples % self.n_folds != 0:
102
+ folds[-1] = np.concatenate([folds[-1], indices[self.n_folds*fold_size:]])
103
+
104
+ # Cross-fitting
105
+ for fold_idx, test_idx in enumerate(folds):
106
+ # Training indices (all except test fold)
107
+ train_idx = np.concatenate([folds[i] for i in range(self.n_folds) if i != fold_idx])
108
+
109
+ # Split data
110
+ X_train, X_test = X[train_idx], X[test_idx]
111
+ y_train, y_test = y[train_idx], y[test_idx]
112
+ d_train, d_test = d[train_idx], d[test_idx]
113
+
114
+ # Fit outcome regression and get residuals
115
+ self.learner_g.fit(X_train, y_train)
116
+ if self.treatment_type == 'continuous':
117
+ y_pred = self.learner_g.predict(X_test)
118
+ else:
119
+ y_pred = self.learner_g.predict_proba(X_test)[:, 1]
120
+ y_res[test_idx] = y_test - y_pred
121
+
122
+ # Fit treatment regression and get residuals
123
+ self.learner_m.fit(X_train, d_train)
124
+ if self.treatment_type == 'continuous':
125
+ d_pred = self.learner_m.predict(X_test)
126
+ else:
127
+ d_pred = self.learner_m.predict_proba(X_test)[:, 1]
128
+ d_res[test_idx] = d_test - d_pred
129
+
130
+ # Estimate treatment effect using partially linear regression
131
+ # theta = E[d_res * y_res] / E[d_res^2]
132
+ numerator = np.mean(d_res * y_res)
133
+ denominator = np.mean(d_res**2)
134
+
135
+ self.effect = numerator / denominator
136
+
137
+ # Calculate standard error
138
+ # Using the formula for the variance of the DML estimator
139
+ residuals = y_res - self.effect * d_res
140
+ variance = np.mean(residuals**2) / np.mean(d_res**2)**2 / n_samples
141
+ self.se = np.sqrt(variance)
142
+
143
+ # Calculate 95% confidence interval
144
+ crit_val = 1.96 # 95% CI
145
+ self.ci = (self.effect - crit_val * self.se,
146
+ self.effect + crit_val * self.se)
147
+
148
+ # Calculate p-value (two-sided test)
149
+ z_score = self.effect / self.se
150
+ # Use scipy.stats.norm for calculating p-value
151
+ self.pval = 2 * (1 - stats.norm.cdf(np.abs(z_score)))
152
+
153
+ return self
154
+
155
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
156
+ """
157
+ Predict treatment effects (constant for this implementation)
158
+
159
+ Parameters:
160
+ -----------
161
+ X : array-like of shape (n_samples, n_features)
162
+ Samples (not used, treatment effect is constant)
163
+
164
+ Returns:
165
+ --------
166
+ effects : ndarray of shape (n_samples,)
167
+ Estimated treatment effects
168
+ """
169
+ return np.full(X.shape[0], self.effect) if hasattr(X, 'shape') else np.full(len(X), self.effect)
170
+
171
+ def get_effect(self) -> float:
172
+ """
173
+ Get the estimated treatment effect
174
+
175
+ Returns:
176
+ --------
177
+ effect : float
178
+ Estimated treatment effect
179
+ """
180
+ return self.effect
181
+
182
+ def get_se(self) -> float:
183
+ """
184
+ Get the standard error of the treatment effect
185
+
186
+ Returns:
187
+ --------
188
+ se : float
189
+ Standard error of the treatment effect
190
+ """
191
+ return self.se
192
+
193
+ def get_ci(self) -> Tuple[float, float]:
194
+ """
195
+ Get the 95% confidence interval for the treatment effect
196
+
197
+ Returns:
198
+ --------
199
+ ci : tuple
200
+ 95% confidence interval (lower, upper)
201
+ """
202
+ return self.ci
203
+
204
+ def get_pval(self) -> float:
205
+ """
206
+ Get the p-value for the treatment effect
207
+
208
+ Returns:
209
+ --------
210
+ pval : float
211
+ P-value for the treatment effect
212
+ """
213
+ return self.pval
214
+
215
+
216
+ def double_ml_analysis(X: Union[np.ndarray, pd.DataFrame],
217
+ y: Union[np.ndarray, pd.Series],
218
+ d: Union[np.ndarray, pd.Series],
219
+ treatment_type: str = 'continuous',
220
+ n_folds: int = 5,
221
+ random_state: int = 42) -> dict:
222
+ """
223
+ Perform complete Double Machine Learning analysis
224
+
225
+ Parameters:
226
+ -----------
227
+ X : array-like of shape (n_samples, n_features)
228
+ Covariates
229
+ y : array-like of shape (n_samples,)
230
+ Outcome variable
231
+ d : array-like of shape (n_samples,)
232
+ Treatment variable
233
+ treatment_type : str, 'continuous' or 'binary'
234
+ Type of treatment variable
235
+ n_folds : int
236
+ Number of cross-fitting folds
237
+ random_state : int
238
+ Random state for reproducibility
239
+
240
+ Returns:
241
+ --------
242
+ results : dict
243
+ Dictionary with model and estimation results
244
+ """
245
+ # Initialize and fit model
246
+ dml_model = DoubleML(
247
+ treatment_type=treatment_type,
248
+ n_folds=n_folds,
249
+ random_state=random_state
250
+ )
251
+ dml_model.fit(X, y, d)
252
+
253
+ # Get results
254
+ effect = dml_model.get_effect()
255
+ se = dml_model.get_se()
256
+ ci = dml_model.get_ci()
257
+ pval = dml_model.get_pval()
258
+
259
+ return {
260
+ 'model': dml_model,
261
+ 'effect': effect,
262
+ 'se': se,
263
+ 'ci': ci,
264
+ 'pval': pval,
265
+ 'X': X,
266
+ 'y': y,
267
+ 'd': d
268
+ }
@@ -0,0 +1,249 @@
1
+ """
2
+ Gradient Boosting Machine (GBM/XGBoost) implementation for econometric analysis
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import mean_squared_error, accuracy_score
9
+ try:
10
+ import xgboost as xgb
11
+ XGBOOST_AVAILABLE = True
12
+ except ImportError:
13
+ XGBOOST_AVAILABLE = False
14
+ from typing import Union, Optional, Dict, Any
15
+
16
+
17
+ class EconGradientBoosting:
18
+ """
19
+ Gradient Boosting for econometric analysis with both scikit-learn and XGBoost implementations
20
+ """
21
+
22
+ def __init__(self, algorithm: str = 'sklearn', problem_type: str = 'regression',
23
+ n_estimators: int = 100, learning_rate: float = 0.1,
24
+ max_depth: int = 3, random_state: int = 42):
25
+ """
26
+ Initialize Gradient Boosting model
27
+
28
+ Parameters:
29
+ -----------
30
+ algorithm : str, 'sklearn' or 'xgboost'
31
+ Which implementation to use
32
+ problem_type : str, 'regression' or 'classification'
33
+ Type of problem to solve
34
+ n_estimators : int
35
+ Number of boosting stages
36
+ learning_rate : float
37
+ Learning rate shrinks the contribution of each tree
38
+ max_depth : int
39
+ Maximum depth of the individual regression estimators
40
+ random_state : int
41
+ Random state for reproducibility
42
+ """
43
+ self.algorithm = algorithm
44
+ self.problem_type = problem_type
45
+ self.n_estimators = n_estimators
46
+ self.learning_rate = learning_rate
47
+ self.max_depth = max_depth
48
+ self.random_state = random_state
49
+
50
+ if algorithm == 'sklearn':
51
+ if problem_type == 'regression':
52
+ self.model = GradientBoostingRegressor(
53
+ n_estimators=n_estimators,
54
+ learning_rate=learning_rate,
55
+ max_depth=max_depth,
56
+ random_state=random_state
57
+ )
58
+ elif problem_type == 'classification':
59
+ self.model = GradientBoostingClassifier(
60
+ n_estimators=n_estimators,
61
+ learning_rate=learning_rate,
62
+ max_depth=max_depth,
63
+ random_state=random_state
64
+ )
65
+ elif algorithm == 'xgboost':
66
+ if not XGBOOST_AVAILABLE:
67
+ raise ImportError("XGBoost is not installed. Please install it with 'pip install xgboost'")
68
+
69
+ if problem_type == 'regression':
70
+ self.model = xgb.XGBRegressor(
71
+ n_estimators=n_estimators,
72
+ learning_rate=learning_rate,
73
+ max_depth=max_depth,
74
+ random_state=random_state
75
+ )
76
+ elif problem_type == 'classification':
77
+ self.model = xgb.XGBClassifier(
78
+ n_estimators=n_estimators,
79
+ learning_rate=learning_rate,
80
+ max_depth=max_depth,
81
+ random_state=random_state
82
+ )
83
+ else:
84
+ raise ValueError("algorithm must be either 'sklearn' or 'xgboost'")
85
+
86
+ def fit(self, X: Union[np.ndarray, pd.DataFrame], y: Union[np.ndarray, pd.Series]) -> 'EconGradientBoosting':
87
+ """
88
+ Fit the Gradient Boosting model
89
+
90
+ Parameters:
91
+ -----------
92
+ X : array-like of shape (n_samples, n_features)
93
+ Training data
94
+ y : array-like of shape (n_samples,)
95
+ Target values
96
+
97
+ Returns:
98
+ --------
99
+ self : EconGradientBoosting
100
+ """
101
+ self.model.fit(X, y)
102
+ return self
103
+
104
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
105
+ """
106
+ Predict using the Gradient Boosting model
107
+
108
+ Parameters:
109
+ -----------
110
+ X : array-like of shape (n_samples, n_features)
111
+ Samples
112
+
113
+ Returns:
114
+ --------
115
+ y_pred : ndarray of shape (n_samples,)
116
+ Predicted values
117
+ """
118
+ return self.model.predict(X)
119
+
120
+ def feature_importance(self) -> Dict[str, np.ndarray]:
121
+ """
122
+ Get feature importances
123
+
124
+ Returns:
125
+ --------
126
+ importances : dict
127
+ Dictionary with feature importances (depends on algorithm)
128
+ """
129
+ if self.algorithm == 'sklearn':
130
+ return {
131
+ 'importances': self.model.feature_importances_
132
+ }
133
+ elif self.algorithm == 'xgboost':
134
+ # XGBoost provides multiple importance types
135
+ importance_types = ['weight', 'gain', 'cover', 'total_gain', 'total_cover']
136
+ importances = {}
137
+ for imp_type in importance_types:
138
+ try:
139
+ importances[imp_type] = self.model.feature_importances_
140
+ except:
141
+ pass
142
+ return importances
143
+
144
+ def evaluate(self, X: Union[np.ndarray, pd.DataFrame],
145
+ y: Union[np.ndarray, pd.Series]) -> dict:
146
+ """
147
+ Evaluate model performance
148
+
149
+ Parameters:
150
+ -----------
151
+ X : array-like of shape (n_samples, n_features)
152
+ Test data
153
+ y : array-like of shape (n_samples,)
154
+ True values
155
+
156
+ Returns:
157
+ --------
158
+ metrics : dict
159
+ Dictionary with evaluation metrics
160
+ """
161
+ y_pred = self.predict(X)
162
+
163
+ if self.problem_type == 'regression':
164
+ mse = mean_squared_error(y, y_pred)
165
+ rmse = np.sqrt(mse)
166
+ return {
167
+ 'mse': mse,
168
+ 'rmse': rmse,
169
+ 'predictions': y_pred
170
+ }
171
+ else:
172
+ accuracy = accuracy_score(y, y_pred)
173
+ return {
174
+ 'accuracy': accuracy,
175
+ 'predictions': y_pred
176
+ }
177
+
178
+
179
+ def gradient_boosting_analysis(X: Union[np.ndarray, pd.DataFrame],
180
+ y: Union[np.ndarray, pd.Series],
181
+ algorithm: str = 'sklearn',
182
+ problem_type: str = 'regression',
183
+ test_size: float = 0.2,
184
+ n_estimators: int = 100,
185
+ learning_rate: float = 0.1,
186
+ max_depth: int = 3,
187
+ random_state: int = 42) -> dict:
188
+ """
189
+ Perform complete Gradient Boosting analysis
190
+
191
+ Parameters:
192
+ -----------
193
+ X : array-like of shape (n_samples, n_features)
194
+ Features
195
+ y : array-like of shape (n_samples,)
196
+ Target variable
197
+ algorithm : str, 'sklearn' or 'xgboost'
198
+ Which implementation to use
199
+ problem_type : str, 'regression' or 'classification'
200
+ Type of problem to solve
201
+ test_size : float
202
+ Proportion of dataset to include in test split
203
+ n_estimators : int
204
+ Number of boosting stages
205
+ learning_rate : float
206
+ Learning rate shrinks the contribution of each tree
207
+ max_depth : int
208
+ Maximum depth of the individual regression estimators
209
+ random_state : int
210
+ Random state for reproducibility
211
+
212
+ Returns:
213
+ --------
214
+ results : dict
215
+ Dictionary with model, predictions, and feature importances
216
+ """
217
+ # Split data
218
+ X_train, X_test, y_train, y_test = train_test_split(
219
+ X, y, test_size=test_size, random_state=random_state
220
+ )
221
+
222
+ # Initialize and fit model
223
+ gb_model = EconGradientBoosting(
224
+ algorithm=algorithm,
225
+ problem_type=problem_type,
226
+ n_estimators=n_estimators,
227
+ learning_rate=learning_rate,
228
+ max_depth=max_depth,
229
+ random_state=random_state
230
+ )
231
+ gb_model.fit(X_train, y_train)
232
+
233
+ # Evaluate model
234
+ train_results = gb_model.evaluate(X_train, y_train)
235
+ test_results = gb_model.evaluate(X_test, y_test)
236
+
237
+ # Get feature importances
238
+ importances = gb_model.feature_importance()
239
+
240
+ return {
241
+ 'model': gb_model,
242
+ 'train_results': train_results,
243
+ 'test_results': test_results,
244
+ 'feature_importances': importances,
245
+ 'X_train': X_train,
246
+ 'X_test': X_test,
247
+ 'y_train': y_train,
248
+ 'y_test': y_test
249
+ }