aigroup-econ-mcp 1.4.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. PKG-INFO +344 -322
  2. README.md +335 -320
  3. __init__.py +1 -1
  4. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  6. cli.py +4 -0
  7. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  8. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  9. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  10. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  11. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  18. econometrics/causal_inference/__init__.py +66 -0
  19. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  20. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  21. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  22. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  23. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  24. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  25. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  26. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  27. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  28. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  29. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  30. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  31. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  32. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  33. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  34. econometrics/distribution_analysis/__init__.py +28 -0
  35. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  36. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  37. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  38. econometrics/missing_data/__init__.py +18 -0
  39. econometrics/missing_data/imputation_methods.py +219 -0
  40. econometrics/nonparametric/__init__.py +35 -0
  41. econometrics/nonparametric/gam_model.py +117 -0
  42. econometrics/nonparametric/kernel_regression.py +161 -0
  43. econometrics/nonparametric/quantile_regression.py +249 -0
  44. econometrics/nonparametric/spline_regression.py +100 -0
  45. econometrics/spatial_econometrics/__init__.py +68 -0
  46. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  47. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  48. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  49. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  50. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  51. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  52. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  53. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  54. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  55. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  56. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  57. econometrics/statistical_inference/__init__.py +21 -0
  58. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  59. econometrics/statistical_inference/permutation_test.py +177 -0
  60. econometrics/survival_analysis/__init__.py +18 -0
  61. econometrics/survival_analysis/survival_models.py +259 -0
  62. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  63. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  64. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  65. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  66. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  67. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  68. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  69. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  70. pyproject.toml +9 -2
  71. server.py +15 -1
  72. tools/__init__.py +75 -1
  73. tools/causal_inference_adapter.py +658 -0
  74. tools/distribution_analysis_adapter.py +121 -0
  75. tools/gwr_simple_adapter.py +54 -0
  76. tools/machine_learning_adapter.py +567 -0
  77. tools/mcp_tool_groups/__init__.py +15 -1
  78. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  79. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  80. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  81. tools/mcp_tool_groups/microecon_tools.py +325 -0
  82. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  83. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  84. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  85. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  86. tools/mcp_tools_registry.py +13 -3
  87. tools/microecon_adapter.py +412 -0
  88. tools/missing_data_adapter.py +73 -0
  89. tools/nonparametric_adapter.py +190 -0
  90. tools/spatial_econometrics_adapter.py +318 -0
  91. tools/statistical_inference_adapter.py +90 -0
  92. tools/survival_analysis_adapter.py +46 -0
  93. aigroup_econ_mcp-1.4.3.dist-info/METADATA +0 -710
  94. aigroup_econ_mcp-1.4.3.dist-info/RECORD +0 -92
  95. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
  96. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/entry_points.txt +0 -0
  97. {aigroup_econ_mcp-1.4.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,264 @@
1
+ """
2
+ Neural Network implementation for econometric analysis
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.neural_network import MLPRegressor, MLPClassifier
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import mean_squared_error, accuracy_score
9
+ from sklearn.preprocessing import StandardScaler
10
+ from typing import Union, Optional, List, Tuple
11
+
12
+
13
+ class EconNeuralNetwork:
14
+ """
15
+ Neural Network for econometric analysis with both regression and classification capabilities
16
+ """
17
+
18
+ def __init__(self, problem_type: str = 'regression', hidden_layer_sizes: tuple = (100,),
19
+ activation: str = 'relu', solver: str = 'adam', alpha: float = 0.0001,
20
+ learning_rate: str = 'constant', learning_rate_init: float = 0.001,
21
+ max_iter: int = 200, random_state: int = 42):
22
+ """
23
+ Initialize Neural Network model
24
+
25
+ Parameters:
26
+ -----------
27
+ problem_type : str, 'regression' or 'classification'
28
+ Type of problem to solve
29
+ hidden_layer_sizes : tuple
30
+ The ith element represents the number of neurons in the ith hidden layer
31
+ activation : str, 'identity', 'logistic', 'tanh', 'relu'
32
+ Activation function for the hidden layer
33
+ solver : str, 'lbfgs', 'sgd', 'adam'
34
+ The solver for weight optimization
35
+ alpha : float
36
+ L2 penalty (regularization term) parameter
37
+ learning_rate : str, 'constant', 'invscaling', 'adaptive'
38
+ Learning rate schedule for weight updates
39
+ learning_rate_init : float
40
+ The initial learning rate used
41
+ max_iter : int
42
+ Maximum number of iterations
43
+ random_state : int
44
+ Random state for reproducibility
45
+ """
46
+ self.problem_type = problem_type
47
+ self.hidden_layer_sizes = hidden_layer_sizes
48
+ self.activation = activation
49
+ self.solver = solver
50
+ self.alpha = alpha
51
+ self.learning_rate = learning_rate
52
+ self.learning_rate_init = learning_rate_init
53
+ self.max_iter = max_iter
54
+ self.random_state = random_state
55
+ self.scaler = StandardScaler()
56
+
57
+ if problem_type == 'regression':
58
+ self.model = MLPRegressor(
59
+ hidden_layer_sizes=hidden_layer_sizes,
60
+ activation=activation,
61
+ solver=solver,
62
+ alpha=alpha,
63
+ learning_rate=learning_rate,
64
+ learning_rate_init=learning_rate_init,
65
+ max_iter=max_iter,
66
+ random_state=random_state
67
+ )
68
+ elif problem_type == 'classification':
69
+ self.model = MLPClassifier(
70
+ hidden_layer_sizes=hidden_layer_sizes,
71
+ activation=activation,
72
+ solver=solver,
73
+ alpha=alpha,
74
+ learning_rate=learning_rate,
75
+ learning_rate_init=learning_rate_init,
76
+ max_iter=max_iter,
77
+ random_state=random_state
78
+ )
79
+ else:
80
+ raise ValueError("problem_type must be either 'regression' or 'classification'")
81
+
82
+ def fit(self, X: Union[np.ndarray, pd.DataFrame], y: Union[np.ndarray, pd.Series]) -> 'EconNeuralNetwork':
83
+ """
84
+ Fit the Neural Network model
85
+
86
+ Parameters:
87
+ -----------
88
+ X : array-like of shape (n_samples, n_features)
89
+ Training data
90
+ y : array-like of shape (n_samples,)
91
+ Target values
92
+
93
+ Returns:
94
+ --------
95
+ self : EconNeuralNetwork
96
+ """
97
+ # Scale features
98
+ X_scaled = self.scaler.fit_transform(X)
99
+ self.model.fit(X_scaled, y)
100
+ return self
101
+
102
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
103
+ """
104
+ Predict using the Neural Network model
105
+
106
+ Parameters:
107
+ -----------
108
+ X : array-like of shape (n_samples, n_features)
109
+ Samples
110
+
111
+ Returns:
112
+ --------
113
+ y_pred : ndarray of shape (n_samples,)
114
+ Predicted values
115
+ """
116
+ # Scale features using the same scaler
117
+ X_scaled = self.scaler.transform(X)
118
+ return self.model.predict(X_scaled)
119
+
120
+ def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
121
+ """
122
+ Predict class probabilities using the Neural Network model (classification only)
123
+
124
+ Parameters:
125
+ -----------
126
+ X : array-like of shape (n_samples, n_features)
127
+ Samples
128
+
129
+ Returns:
130
+ --------
131
+ y_proba : ndarray of shape (n_samples, n_classes)
132
+ Predicted class probabilities
133
+ """
134
+ if self.problem_type != 'classification':
135
+ raise ValueError("predict_proba is only available for classification problems")
136
+
137
+ # Scale features using the same scaler
138
+ X_scaled = self.scaler.transform(X)
139
+ return self.model.predict_proba(X_scaled)
140
+
141
+ def evaluate(self, X: Union[np.ndarray, pd.DataFrame],
142
+ y: Union[np.ndarray, pd.Series]) -> dict:
143
+ """
144
+ Evaluate model performance
145
+
146
+ Parameters:
147
+ -----------
148
+ X : array-like of shape (n_samples, n_features)
149
+ Test data
150
+ y : array-like of shape (n_samples,)
151
+ True values
152
+
153
+ Returns:
154
+ --------
155
+ metrics : dict
156
+ Dictionary with evaluation metrics
157
+ """
158
+ y_pred = self.predict(X)
159
+
160
+ if self.problem_type == 'regression':
161
+ mse = mean_squared_error(y, y_pred)
162
+ rmse = np.sqrt(mse)
163
+ return {
164
+ 'mse': mse,
165
+ 'rmse': rmse,
166
+ 'predictions': y_pred
167
+ }
168
+ else:
169
+ accuracy = accuracy_score(y, y_pred)
170
+ return {
171
+ 'accuracy': accuracy,
172
+ 'predictions': y_pred
173
+ }
174
+
175
+
176
+ def neural_network_analysis(X: Union[np.ndarray, pd.DataFrame],
177
+ y: Union[np.ndarray, pd.Series],
178
+ problem_type: str = 'regression',
179
+ hidden_layer_sizes: tuple = (100,),
180
+ activation: str = 'relu',
181
+ solver: str = 'adam',
182
+ test_size: float = 0.2,
183
+ alpha: float = 0.0001,
184
+ learning_rate: str = 'constant',
185
+ learning_rate_init: float = 0.001,
186
+ max_iter: int = 200,
187
+ random_state: int = 42) -> dict:
188
+ """
189
+ Perform complete Neural Network analysis
190
+
191
+ Parameters:
192
+ -----------
193
+ X : array-like of shape (n_samples, n_features)
194
+ Features
195
+ y : array-like of shape (n_samples,)
196
+ Target variable
197
+ problem_type : str, 'regression' or 'classification'
198
+ Type of problem to solve
199
+ hidden_layer_sizes : tuple
200
+ The ith element represents the number of neurons in the ith hidden layer
201
+ activation : str, 'identity', 'logistic', 'tanh', 'relu'
202
+ Activation function for the hidden layer
203
+ solver : str, 'lbfgs', 'sgd', 'adam'
204
+ The solver for weight optimization
205
+ test_size : float
206
+ Proportion of dataset to include in test split
207
+ alpha : float
208
+ L2 penalty (regularization term) parameter
209
+ learning_rate : str, 'constant', 'invscaling', 'adaptive'
210
+ Learning rate schedule for weight updates
211
+ learning_rate_init : float
212
+ The initial learning rate used
213
+ max_iter : int
214
+ Maximum number of iterations
215
+ random_state : int
216
+ Random state for reproducibility
217
+
218
+ Returns:
219
+ --------
220
+ results : dict
221
+ Dictionary with model, predictions, and evaluation metrics
222
+ """
223
+ # Split data
224
+ X_train, X_test, y_train, y_test = train_test_split(
225
+ X, y, test_size=test_size, random_state=random_state
226
+ )
227
+
228
+ # Initialize and fit model
229
+ nn_model = EconNeuralNetwork(
230
+ problem_type=problem_type,
231
+ hidden_layer_sizes=hidden_layer_sizes,
232
+ activation=activation,
233
+ solver=solver,
234
+ alpha=alpha,
235
+ learning_rate=learning_rate,
236
+ learning_rate_init=learning_rate_init,
237
+ max_iter=max_iter,
238
+ random_state=random_state
239
+ )
240
+ nn_model.fit(X_train, y_train)
241
+
242
+ # Evaluate model
243
+ train_results = nn_model.evaluate(X_train, y_train)
244
+ test_results = nn_model.evaluate(X_test, y_test)
245
+
246
+ # For classification, also get probabilities
247
+ if problem_type == 'classification':
248
+ train_proba = nn_model.predict_proba(X_train)
249
+ test_proba = nn_model.predict_proba(X_test)
250
+ else:
251
+ train_proba = None
252
+ test_proba = None
253
+
254
+ return {
255
+ 'model': nn_model,
256
+ 'train_results': train_results,
257
+ 'test_results': test_results,
258
+ 'train_proba': train_proba,
259
+ 'test_proba': test_proba,
260
+ 'X_train': X_train,
261
+ 'X_test': X_test,
262
+ 'y_train': y_train,
263
+ 'y_test': y_test
264
+ }
@@ -0,0 +1,195 @@
1
+ """
2
+ Random Forest implementation for econometric analysis
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import mean_squared_error, accuracy_score
9
+ from typing import Union, Optional, Tuple
10
+
11
+
12
+ class EconRandomForest:
13
+ """
14
+ Random Forest for econometric analysis with both regression and classification capabilities
15
+ """
16
+
17
+ def __init__(self, problem_type: str = 'regression', n_estimators: int = 100,
18
+ max_depth: Optional[int] = None, random_state: int = 42):
19
+ """
20
+ Initialize Random Forest model
21
+
22
+ Parameters:
23
+ -----------
24
+ problem_type : str, 'regression' or 'classification'
25
+ Type of problem to solve
26
+ n_estimators : int
27
+ Number of trees in the forest
28
+ max_depth : int, optional
29
+ Maximum depth of the tree
30
+ random_state : int
31
+ Random state for reproducibility
32
+ """
33
+ self.problem_type = problem_type
34
+ self.n_estimators = n_estimators
35
+ self.max_depth = max_depth
36
+ self.random_state = random_state
37
+
38
+ if problem_type == 'regression':
39
+ self.model = RandomForestRegressor(
40
+ n_estimators=n_estimators,
41
+ max_depth=max_depth,
42
+ random_state=random_state
43
+ )
44
+ elif problem_type == 'classification':
45
+ self.model = RandomForestClassifier(
46
+ n_estimators=n_estimators,
47
+ max_depth=max_depth,
48
+ random_state=random_state
49
+ )
50
+ else:
51
+ raise ValueError("problem_type must be either 'regression' or 'classification'")
52
+
53
+ def fit(self, X: Union[np.ndarray, pd.DataFrame], y: Union[np.ndarray, pd.Series]) -> 'EconRandomForest':
54
+ """
55
+ Fit the Random Forest model
56
+
57
+ Parameters:
58
+ -----------
59
+ X : array-like of shape (n_samples, n_features)
60
+ Training data
61
+ y : array-like of shape (n_samples,)
62
+ Target values
63
+
64
+ Returns:
65
+ --------
66
+ self : EconRandomForest
67
+ """
68
+ self.model.fit(X, y)
69
+ return self
70
+
71
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
72
+ """
73
+ Predict using the Random Forest model
74
+
75
+ Parameters:
76
+ -----------
77
+ X : array-like of shape (n_samples, n_features)
78
+ Samples
79
+
80
+ Returns:
81
+ --------
82
+ y_pred : ndarray of shape (n_samples,)
83
+ Predicted values
84
+ """
85
+ return self.model.predict(X)
86
+
87
+ def feature_importance(self) -> np.ndarray:
88
+ """
89
+ Get feature importances
90
+
91
+ Returns:
92
+ --------
93
+ importances : ndarray of shape (n_features,)
94
+ Feature importances
95
+ """
96
+ return self.model.feature_importances_
97
+
98
+ def evaluate(self, X: Union[np.ndarray, pd.DataFrame],
99
+ y: Union[np.ndarray, pd.Series]) -> dict:
100
+ """
101
+ Evaluate model performance
102
+
103
+ Parameters:
104
+ -----------
105
+ X : array-like of shape (n_samples, n_features)
106
+ Test data
107
+ y : array-like of shape (n_samples,)
108
+ True values
109
+
110
+ Returns:
111
+ --------
112
+ metrics : dict
113
+ Dictionary with evaluation metrics
114
+ """
115
+ y_pred = self.predict(X)
116
+
117
+ if self.problem_type == 'regression':
118
+ mse = mean_squared_error(y, y_pred)
119
+ rmse = np.sqrt(mse)
120
+ return {
121
+ 'mse': mse,
122
+ 'rmse': rmse,
123
+ 'predictions': y_pred
124
+ }
125
+ else:
126
+ accuracy = accuracy_score(y, y_pred)
127
+ return {
128
+ 'accuracy': accuracy,
129
+ 'predictions': y_pred
130
+ }
131
+
132
+
133
+ def random_forest_analysis(X: Union[np.ndarray, pd.DataFrame],
134
+ y: Union[np.ndarray, pd.Series],
135
+ problem_type: str = 'regression',
136
+ test_size: float = 0.2,
137
+ n_estimators: int = 100,
138
+ max_depth: Optional[int] = None,
139
+ random_state: int = 42) -> dict:
140
+ """
141
+ Perform complete Random Forest analysis
142
+
143
+ Parameters:
144
+ -----------
145
+ X : array-like of shape (n_samples, n_features)
146
+ Features
147
+ y : array-like of shape (n_samples,)
148
+ Target variable
149
+ problem_type : str, 'regression' or 'classification'
150
+ Type of problem to solve
151
+ test_size : float
152
+ Proportion of dataset to include in test split
153
+ n_estimators : int
154
+ Number of trees in the forest
155
+ max_depth : int, optional
156
+ Maximum depth of the tree
157
+ random_state : int
158
+ Random state for reproducibility
159
+
160
+ Returns:
161
+ --------
162
+ results : dict
163
+ Dictionary with model, predictions, and feature importances
164
+ """
165
+ # Split data
166
+ X_train, X_test, y_train, y_test = train_test_split(
167
+ X, y, test_size=test_size, random_state=random_state
168
+ )
169
+
170
+ # Initialize and fit model
171
+ rf_model = EconRandomForest(
172
+ problem_type=problem_type,
173
+ n_estimators=n_estimators,
174
+ max_depth=max_depth,
175
+ random_state=random_state
176
+ )
177
+ rf_model.fit(X_train, y_train)
178
+
179
+ # Evaluate model
180
+ train_results = rf_model.evaluate(X_train, y_train)
181
+ test_results = rf_model.evaluate(X_test, y_test)
182
+
183
+ # Get feature importances
184
+ importances = rf_model.feature_importance()
185
+
186
+ return {
187
+ 'model': rf_model,
188
+ 'train_results': train_results,
189
+ 'test_results': test_results,
190
+ 'feature_importances': importances,
191
+ 'X_train': X_train,
192
+ 'X_test': X_test,
193
+ 'y_train': y_train,
194
+ 'y_test': y_test
195
+ }
@@ -0,0 +1,226 @@
1
+ """
2
+ Support Vector Machine (SVM) implementation for econometric analysis
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.svm import SVR, SVC
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import mean_squared_error, accuracy_score
9
+ from sklearn.preprocessing import StandardScaler
10
+ from typing import Union, Optional, Dict, Any
11
+
12
+
13
+ class EconSVM:
14
+ """
15
+ Support Vector Machine for econometric analysis with both regression and classification capabilities
16
+ """
17
+
18
+ def __init__(self, problem_type: str = 'regression', kernel: str = 'rbf',
19
+ C: float = 1.0, gamma: str = 'scale', random_state: int = 42):
20
+ """
21
+ Initialize SVM model
22
+
23
+ Parameters:
24
+ -----------
25
+ problem_type : str, 'regression' or 'classification'
26
+ Type of problem to solve
27
+ kernel : str, 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
28
+ Specifies the kernel type to be used in the algorithm
29
+ C : float
30
+ Regularization parameter
31
+ gamma : str or float, 'scale' or 'auto' or float
32
+ Kernel coefficient for 'rbf', 'poly' and 'sigmoid'
33
+ random_state : int
34
+ Random state for reproducibility (used in probability estimation)
35
+ """
36
+ self.problem_type = problem_type
37
+ self.kernel = kernel
38
+ self.C = C
39
+ self.gamma = gamma
40
+ self.random_state = random_state
41
+ self.scaler = StandardScaler()
42
+
43
+ if problem_type == 'regression':
44
+ self.model = SVR(
45
+ kernel=kernel,
46
+ C=C,
47
+ gamma=gamma
48
+ )
49
+ elif problem_type == 'classification':
50
+ self.model = SVC(
51
+ kernel=kernel,
52
+ C=C,
53
+ gamma=gamma,
54
+ random_state=random_state,
55
+ probability=True
56
+ )
57
+ else:
58
+ raise ValueError("problem_type must be either 'regression' or 'classification'")
59
+
60
+ def fit(self, X: Union[np.ndarray, pd.DataFrame], y: Union[np.ndarray, pd.Series]) -> 'EconSVM':
61
+ """
62
+ Fit the SVM model
63
+
64
+ Parameters:
65
+ -----------
66
+ X : array-like of shape (n_samples, n_features)
67
+ Training data
68
+ y : array-like of shape (n_samples,)
69
+ Target values
70
+
71
+ Returns:
72
+ --------
73
+ self : EconSVM
74
+ """
75
+ # Scale features
76
+ X_scaled = self.scaler.fit_transform(X)
77
+ self.model.fit(X_scaled, y)
78
+ return self
79
+
80
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
81
+ """
82
+ Predict using the SVM model
83
+
84
+ Parameters:
85
+ -----------
86
+ X : array-like of shape (n_samples, n_features)
87
+ Samples
88
+
89
+ Returns:
90
+ --------
91
+ y_pred : ndarray of shape (n_samples,)
92
+ Predicted values
93
+ """
94
+ # Scale features using the same scaler
95
+ X_scaled = self.scaler.transform(X)
96
+ return self.model.predict(X_scaled)
97
+
98
+ def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
99
+ """
100
+ Predict class probabilities using the SVM model (classification only)
101
+
102
+ Parameters:
103
+ -----------
104
+ X : array-like of shape (n_samples, n_features)
105
+ Samples
106
+
107
+ Returns:
108
+ --------
109
+ y_proba : ndarray of shape (n_samples, n_classes)
110
+ Predicted class probabilities
111
+ """
112
+ if self.problem_type != 'classification':
113
+ raise ValueError("predict_proba is only available for classification problems")
114
+
115
+ # Scale features using the same scaler
116
+ X_scaled = self.scaler.transform(X)
117
+ return self.model.predict_proba(X_scaled)
118
+
119
+ def evaluate(self, X: Union[np.ndarray, pd.DataFrame],
120
+ y: Union[np.ndarray, pd.Series]) -> dict:
121
+ """
122
+ Evaluate model performance
123
+
124
+ Parameters:
125
+ -----------
126
+ X : array-like of shape (n_samples, n_features)
127
+ Test data
128
+ y : array-like of shape (n_samples,)
129
+ True values
130
+
131
+ Returns:
132
+ --------
133
+ metrics : dict
134
+ Dictionary with evaluation metrics
135
+ """
136
+ y_pred = self.predict(X)
137
+
138
+ if self.problem_type == 'regression':
139
+ mse = mean_squared_error(y, y_pred)
140
+ rmse = np.sqrt(mse)
141
+ return {
142
+ 'mse': mse,
143
+ 'rmse': rmse,
144
+ 'predictions': y_pred
145
+ }
146
+ else:
147
+ accuracy = accuracy_score(y, y_pred)
148
+ return {
149
+ 'accuracy': accuracy,
150
+ 'predictions': y_pred
151
+ }
152
+
153
+
154
+ def svm_analysis(X: Union[np.ndarray, pd.DataFrame],
155
+ y: Union[np.ndarray, pd.Series],
156
+ problem_type: str = 'regression',
157
+ kernel: str = 'rbf',
158
+ test_size: float = 0.2,
159
+ C: float = 1.0,
160
+ gamma: str = 'scale',
161
+ random_state: int = 42) -> dict:
162
+ """
163
+ Perform complete SVM analysis
164
+
165
+ Parameters:
166
+ -----------
167
+ X : array-like of shape (n_samples, n_features)
168
+ Features
169
+ y : array-like of shape (n_samples,)
170
+ Target variable
171
+ problem_type : str, 'regression' or 'classification'
172
+ Type of problem to solve
173
+ kernel : str, 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
174
+ Specifies the kernel type to be used in the algorithm
175
+ test_size : float
176
+ Proportion of dataset to include in test split
177
+ C : float
178
+ Regularization parameter
179
+ gamma : str or float, 'scale' or 'auto' or float
180
+ Kernel coefficient for 'rbf', 'poly' and 'sigmoid'
181
+ random_state : int
182
+ Random state for reproducibility
183
+
184
+ Returns:
185
+ --------
186
+ results : dict
187
+ Dictionary with model, predictions, and evaluation metrics
188
+ """
189
+ # Split data
190
+ X_train, X_test, y_train, y_test = train_test_split(
191
+ X, y, test_size=test_size, random_state=random_state
192
+ )
193
+
194
+ # Initialize and fit model
195
+ svm_model = EconSVM(
196
+ problem_type=problem_type,
197
+ kernel=kernel,
198
+ C=C,
199
+ gamma=gamma,
200
+ random_state=random_state
201
+ )
202
+ svm_model.fit(X_train, y_train)
203
+
204
+ # Evaluate model
205
+ train_results = svm_model.evaluate(X_train, y_train)
206
+ test_results = svm_model.evaluate(X_test, y_test)
207
+
208
+ # For classification, also get probabilities
209
+ if problem_type == 'classification':
210
+ train_proba = svm_model.predict_proba(X_train)
211
+ test_proba = svm_model.predict_proba(X_test)
212
+ else:
213
+ train_proba = None
214
+ test_proba = None
215
+
216
+ return {
217
+ 'model': svm_model,
218
+ 'train_results': train_results,
219
+ 'test_results': test_results,
220
+ 'train_proba': train_proba,
221
+ 'test_proba': test_proba,
222
+ 'X_train': X_train,
223
+ 'X_test': X_test,
224
+ 'y_train': y_train,
225
+ 'y_test': y_test
226
+ }