aigroup-econ-mcp 1.3.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. .gitignore +253 -0
  2. PKG-INFO +732 -0
  3. README.md +687 -0
  4. __init__.py +14 -0
  5. aigroup_econ_mcp-2.0.1.dist-info/METADATA +732 -0
  6. aigroup_econ_mcp-2.0.1.dist-info/RECORD +170 -0
  7. aigroup_econ_mcp-2.0.1.dist-info/entry_points.txt +2 -0
  8. aigroup_econ_mcp-2.0.1.dist-info/licenses/LICENSE +21 -0
  9. cli.py +32 -0
  10. econometrics/README.md +18 -0
  11. econometrics/__init__.py +191 -0
  12. econometrics/advanced_methods/modern_computing_machine_learning/__init__.py +30 -0
  13. econometrics/advanced_methods/modern_computing_machine_learning/causal_forest.py +253 -0
  14. econometrics/advanced_methods/modern_computing_machine_learning/double_ml.py +268 -0
  15. econometrics/advanced_methods/modern_computing_machine_learning/gradient_boosting.py +249 -0
  16. econometrics/advanced_methods/modern_computing_machine_learning/hierarchical_clustering.py +243 -0
  17. econometrics/advanced_methods/modern_computing_machine_learning/kmeans_clustering.py +293 -0
  18. econometrics/advanced_methods/modern_computing_machine_learning/neural_network.py +264 -0
  19. econometrics/advanced_methods/modern_computing_machine_learning/random_forest.py +195 -0
  20. econometrics/advanced_methods/modern_computing_machine_learning/support_vector_machine.py +226 -0
  21. econometrics/advanced_methods/modern_computing_machine_learning/test_all_modules.py +329 -0
  22. econometrics/advanced_methods/modern_computing_machine_learning/test_report.md +107 -0
  23. econometrics/basic_parametric_estimation/__init__.py +31 -0
  24. econometrics/basic_parametric_estimation/gmm/__init__.py +13 -0
  25. econometrics/basic_parametric_estimation/gmm/gmm_model.py +256 -0
  26. econometrics/basic_parametric_estimation/mle/__init__.py +13 -0
  27. econometrics/basic_parametric_estimation/mle/mle_model.py +241 -0
  28. econometrics/basic_parametric_estimation/ols/__init__.py +13 -0
  29. econometrics/basic_parametric_estimation/ols/ols_model.py +141 -0
  30. econometrics/causal_inference/__init__.py +66 -0
  31. econometrics/causal_inference/causal_identification_strategy/__init__.py +104 -0
  32. econometrics/causal_inference/causal_identification_strategy/control_function.py +112 -0
  33. econometrics/causal_inference/causal_identification_strategy/difference_in_differences.py +107 -0
  34. econometrics/causal_inference/causal_identification_strategy/event_study.py +119 -0
  35. econometrics/causal_inference/causal_identification_strategy/first_difference.py +89 -0
  36. econometrics/causal_inference/causal_identification_strategy/fixed_effects.py +103 -0
  37. econometrics/causal_inference/causal_identification_strategy/hausman_test.py +69 -0
  38. econometrics/causal_inference/causal_identification_strategy/instrumental_variables.py +145 -0
  39. econometrics/causal_inference/causal_identification_strategy/mediation_analysis.py +121 -0
  40. econometrics/causal_inference/causal_identification_strategy/moderation_analysis.py +109 -0
  41. econometrics/causal_inference/causal_identification_strategy/propensity_score_matching.py +140 -0
  42. econometrics/causal_inference/causal_identification_strategy/random_effects.py +100 -0
  43. econometrics/causal_inference/causal_identification_strategy/regression_discontinuity.py +98 -0
  44. econometrics/causal_inference/causal_identification_strategy/synthetic_control.py +111 -0
  45. econometrics/causal_inference/causal_identification_strategy/triple_difference.py +86 -0
  46. econometrics/distribution_analysis/__init__.py +28 -0
  47. econometrics/distribution_analysis/oaxaca_blinder.py +184 -0
  48. econometrics/distribution_analysis/time_series_decomposition.py +152 -0
  49. econometrics/distribution_analysis/variance_decomposition.py +179 -0
  50. econometrics/missing_data/__init__.py +18 -0
  51. econometrics/missing_data/imputation_methods.py +219 -0
  52. econometrics/missing_data/missing_data_measurement_error/__init__.py +0 -0
  53. econometrics/model_specification_diagnostics_robust_inference/README.md +173 -0
  54. econometrics/model_specification_diagnostics_robust_inference/__init__.py +78 -0
  55. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/__init__.py +20 -0
  56. econometrics/model_specification_diagnostics_robust_inference/diagnostic_tests/diagnostic_tests_model.py +149 -0
  57. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/__init__.py +15 -0
  58. econometrics/model_specification_diagnostics_robust_inference/generalized_least_squares/gls_model.py +130 -0
  59. econometrics/model_specification_diagnostics_robust_inference/model_selection/__init__.py +18 -0
  60. econometrics/model_specification_diagnostics_robust_inference/model_selection/model_selection_model.py +286 -0
  61. econometrics/model_specification_diagnostics_robust_inference/regularization/__init__.py +15 -0
  62. econometrics/model_specification_diagnostics_robust_inference/regularization/regularization_model.py +177 -0
  63. econometrics/model_specification_diagnostics_robust_inference/robust_errors/__init__.py +15 -0
  64. econometrics/model_specification_diagnostics_robust_inference/robust_errors/robust_errors_model.py +122 -0
  65. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/__init__.py +15 -0
  66. econometrics/model_specification_diagnostics_robust_inference/simultaneous_equations/simultaneous_equations_model.py +246 -0
  67. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/__init__.py +15 -0
  68. econometrics/model_specification_diagnostics_robust_inference/weighted_least_squares/wls_model.py +127 -0
  69. econometrics/nonparametric/__init__.py +35 -0
  70. econometrics/nonparametric/gam_model.py +117 -0
  71. econometrics/nonparametric/kernel_regression.py +161 -0
  72. econometrics/nonparametric/nonparametric_semiparametric_methods/__init__.py +0 -0
  73. econometrics/nonparametric/quantile_regression.py +249 -0
  74. econometrics/nonparametric/spline_regression.py +100 -0
  75. econometrics/spatial_econometrics/__init__.py +68 -0
  76. econometrics/spatial_econometrics/geographically_weighted_regression.py +211 -0
  77. econometrics/spatial_econometrics/gwr_simple.py +154 -0
  78. econometrics/spatial_econometrics/spatial_autocorrelation.py +356 -0
  79. econometrics/spatial_econometrics/spatial_durbin_model.py +177 -0
  80. econometrics/spatial_econometrics/spatial_econometrics_new/__init__.py +0 -0
  81. econometrics/spatial_econometrics/spatial_regression.py +315 -0
  82. econometrics/spatial_econometrics/spatial_weights.py +226 -0
  83. econometrics/specific_data_modeling/micro_discrete_limited_data/README.md +164 -0
  84. econometrics/specific_data_modeling/micro_discrete_limited_data/__init__.py +40 -0
  85. econometrics/specific_data_modeling/micro_discrete_limited_data/count_data_models.py +311 -0
  86. econometrics/specific_data_modeling/micro_discrete_limited_data/discrete_choice_models.py +294 -0
  87. econometrics/specific_data_modeling/micro_discrete_limited_data/limited_dependent_variable_models.py +282 -0
  88. econometrics/specific_data_modeling/survival_duration_data/__init__.py +0 -0
  89. econometrics/specific_data_modeling/time_series_panel_data/__init__.py +143 -0
  90. econometrics/specific_data_modeling/time_series_panel_data/arima_model.py +104 -0
  91. econometrics/specific_data_modeling/time_series_panel_data/cointegration_vecm.py +334 -0
  92. econometrics/specific_data_modeling/time_series_panel_data/dynamic_panel_models.py +653 -0
  93. econometrics/specific_data_modeling/time_series_panel_data/exponential_smoothing.py +176 -0
  94. econometrics/specific_data_modeling/time_series_panel_data/garch_model.py +198 -0
  95. econometrics/specific_data_modeling/time_series_panel_data/panel_diagnostics.py +125 -0
  96. econometrics/specific_data_modeling/time_series_panel_data/panel_var.py +60 -0
  97. econometrics/specific_data_modeling/time_series_panel_data/structural_break_tests.py +87 -0
  98. econometrics/specific_data_modeling/time_series_panel_data/time_varying_parameter_models.py +106 -0
  99. econometrics/specific_data_modeling/time_series_panel_data/unit_root_tests.py +204 -0
  100. econometrics/specific_data_modeling/time_series_panel_data/var_svar_model.py +372 -0
  101. econometrics/statistical_inference/__init__.py +21 -0
  102. econometrics/statistical_inference/bootstrap_methods.py +162 -0
  103. econometrics/statistical_inference/permutation_test.py +177 -0
  104. econometrics/statistical_inference/statistical_inference_techniques/__init__.py +0 -0
  105. econometrics/statistics/distribution_decomposition_methods/__init__.py +0 -0
  106. econometrics/survival_analysis/__init__.py +18 -0
  107. econometrics/survival_analysis/survival_models.py +259 -0
  108. econometrics/tests/basic_parametric_estimation_tests/__init__.py +3 -0
  109. econometrics/tests/basic_parametric_estimation_tests/test_gmm.py +128 -0
  110. econometrics/tests/basic_parametric_estimation_tests/test_mle.py +127 -0
  111. econometrics/tests/basic_parametric_estimation_tests/test_ols.py +100 -0
  112. econometrics/tests/causal_inference_tests/__init__.py +3 -0
  113. econometrics/tests/causal_inference_tests/detailed_test.py +441 -0
  114. econometrics/tests/causal_inference_tests/test_all_methods.py +418 -0
  115. econometrics/tests/causal_inference_tests/test_causal_identification_strategy.py +202 -0
  116. econometrics/tests/causal_inference_tests/test_difference_in_differences.py +53 -0
  117. econometrics/tests/causal_inference_tests/test_instrumental_variables.py +44 -0
  118. econometrics/tests/model_specification_diagnostics_tests/__init__.py +3 -0
  119. econometrics/tests/model_specification_diagnostics_tests/test_diagnostic_tests.py +86 -0
  120. econometrics/tests/model_specification_diagnostics_tests/test_robust_errors.py +89 -0
  121. econometrics/tests/specific_data_modeling_tests/__init__.py +3 -0
  122. econometrics/tests/specific_data_modeling_tests/test_arima.py +98 -0
  123. econometrics/tests/specific_data_modeling_tests/test_dynamic_panel.py +198 -0
  124. econometrics/tests/specific_data_modeling_tests/test_exponential_smoothing.py +105 -0
  125. econometrics/tests/specific_data_modeling_tests/test_garch.py +118 -0
  126. econometrics/tests/specific_data_modeling_tests/test_micro_discrete_limited_data.py +189 -0
  127. econometrics/tests/specific_data_modeling_tests/test_unit_root.py +156 -0
  128. econometrics/tests/specific_data_modeling_tests/test_var.py +124 -0
  129. econometrics//321/206/320/254/320/272/321/205/342/225/235/320/220/321/205/320/237/320/241/321/205/320/264/320/267/321/207/342/226/222/342/225/227/321/204/342/225/235/320/250/321/205/320/225/320/230/321/207/342/225/221/320/267/321/205/320/230/320/226/321/206/320/256/320/240.md +544 -0
  130. prompts/__init__.py +0 -0
  131. prompts/analysis_guides.py +43 -0
  132. pyproject.toml +85 -0
  133. resources/MCP_MASTER_GUIDE.md +422 -0
  134. resources/MCP_TOOLS_DATA_FORMAT_GUIDE.md +185 -0
  135. resources/__init__.py +0 -0
  136. server.py +97 -0
  137. tools/README.md +88 -0
  138. tools/__init__.py +119 -0
  139. tools/causal_inference_adapter.py +658 -0
  140. tools/data_loader.py +213 -0
  141. tools/decorators.py +38 -0
  142. tools/distribution_analysis_adapter.py +121 -0
  143. tools/econometrics_adapter.py +286 -0
  144. tools/gwr_simple_adapter.py +54 -0
  145. tools/machine_learning_adapter.py +567 -0
  146. tools/mcp_tool_groups/__init__.py +15 -0
  147. tools/mcp_tool_groups/basic_parametric_tools.py +173 -0
  148. tools/mcp_tool_groups/causal_inference_tools.py +643 -0
  149. tools/mcp_tool_groups/distribution_analysis_tools.py +169 -0
  150. tools/mcp_tool_groups/machine_learning_tools.py +422 -0
  151. tools/mcp_tool_groups/microecon_tools.py +325 -0
  152. tools/mcp_tool_groups/missing_data_tools.py +117 -0
  153. tools/mcp_tool_groups/model_specification_tools.py +402 -0
  154. tools/mcp_tool_groups/nonparametric_tools.py +225 -0
  155. tools/mcp_tool_groups/spatial_econometrics_tools.py +323 -0
  156. tools/mcp_tool_groups/statistical_inference_tools.py +131 -0
  157. tools/mcp_tool_groups/time_series_tools.py +494 -0
  158. tools/mcp_tools_registry.py +124 -0
  159. tools/microecon_adapter.py +412 -0
  160. tools/missing_data_adapter.py +73 -0
  161. tools/model_specification_adapter.py +369 -0
  162. tools/nonparametric_adapter.py +190 -0
  163. tools/output_formatter.py +563 -0
  164. tools/spatial_econometrics_adapter.py +318 -0
  165. tools/statistical_inference_adapter.py +90 -0
  166. tools/survival_analysis_adapter.py +46 -0
  167. tools/time_series_panel_data_adapter.py +858 -0
  168. tools/time_series_panel_data_tools.py +65 -0
  169. aigroup_econ_mcp/__init__.py +0 -19
  170. aigroup_econ_mcp/cli.py +0 -82
  171. aigroup_econ_mcp/config.py +0 -561
  172. aigroup_econ_mcp/server.py +0 -452
  173. aigroup_econ_mcp/tools/__init__.py +0 -19
  174. aigroup_econ_mcp/tools/base.py +0 -470
  175. aigroup_econ_mcp/tools/cache.py +0 -533
  176. aigroup_econ_mcp/tools/data_loader.py +0 -195
  177. aigroup_econ_mcp/tools/file_parser.py +0 -1027
  178. aigroup_econ_mcp/tools/machine_learning.py +0 -60
  179. aigroup_econ_mcp/tools/ml_ensemble.py +0 -210
  180. aigroup_econ_mcp/tools/ml_evaluation.py +0 -272
  181. aigroup_econ_mcp/tools/ml_models.py +0 -54
  182. aigroup_econ_mcp/tools/ml_regularization.py +0 -186
  183. aigroup_econ_mcp/tools/monitoring.py +0 -555
  184. aigroup_econ_mcp/tools/optimized_example.py +0 -229
  185. aigroup_econ_mcp/tools/panel_data.py +0 -619
  186. aigroup_econ_mcp/tools/regression.py +0 -214
  187. aigroup_econ_mcp/tools/statistics.py +0 -154
  188. aigroup_econ_mcp/tools/time_series.py +0 -698
  189. aigroup_econ_mcp/tools/timeout.py +0 -283
  190. aigroup_econ_mcp/tools/tool_descriptions.py +0 -410
  191. aigroup_econ_mcp/tools/tool_handlers.py +0 -1016
  192. aigroup_econ_mcp/tools/tool_registry.py +0 -478
  193. aigroup_econ_mcp/tools/validation.py +0 -482
  194. aigroup_econ_mcp-1.3.3.dist-info/METADATA +0 -525
  195. aigroup_econ_mcp-1.3.3.dist-info/RECORD +0 -30
  196. aigroup_econ_mcp-1.3.3.dist-info/entry_points.txt +0 -2
  197. /aigroup_econ_mcp-1.3.3.dist-info/licenses/LICENSE → /LICENSE +0 -0
  198. {aigroup_econ_mcp-1.3.3.dist-info → aigroup_econ_mcp-2.0.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,293 @@
1
+ """
2
+ K-Means Clustering implementation for econometric analysis
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.cluster import KMeans, MiniBatchKMeans
7
+ from sklearn.metrics import silhouette_score, calinski_harabasz_score
8
+ from sklearn.preprocessing import StandardScaler
9
+ from sklearn.decomposition import PCA
10
+ from typing import Union, Optional, Dict, Any
11
+
12
+ # 可选导入matplotlib
13
+ try:
14
+ import matplotlib.pyplot as plt
15
+ MATPLOTLIB_AVAILABLE = True
16
+ except ImportError:
17
+ MATPLOTLIB_AVAILABLE = False
18
+ except UnicodeDecodeError:
19
+ # 处理编码问题
20
+ MATPLOTLIB_AVAILABLE = False
21
+
22
+
23
+ class EconKMeans:
24
+ """
25
+ K-Means Clustering for econometric analysis
26
+ """
27
+
28
+ def __init__(self, n_clusters: int = 8, init: str = 'k-means++', n_init: int = 10,
29
+ max_iter: int = 300, random_state: int = 42, algorithm: str = 'lloyd',
30
+ use_minibatch: bool = False, batch_size: int = 1000):
31
+ """
32
+ Initialize K-Means clustering model
33
+
34
+ Parameters:
35
+ -----------
36
+ n_clusters : int
37
+ Number of clusters to form
38
+ init : str, 'k-means++', 'random'
39
+ Method for initialization
40
+ n_init : int
41
+ Number of time the k-means algorithm will be run with different centroid seeds
42
+ max_iter : int
43
+ Maximum number of iterations of the k-means algorithm for a single run
44
+ random_state : int
45
+ Random state for reproducibility
46
+ algorithm : str, 'lloyd', 'elkan'
47
+ K-means algorithm to use
48
+ use_minibatch : bool
49
+ Whether to use MiniBatchKMeans for large datasets
50
+ batch_size : int
51
+ Size of the mini batches (only used when use_minibatch=True)
52
+ """
53
+ self.n_clusters = n_clusters
54
+ self.init = init
55
+ self.n_init = n_init
56
+ self.max_iter = max_iter
57
+ self.random_state = random_state
58
+ self.algorithm = algorithm
59
+ self.use_minibatch = use_minibatch
60
+ self.batch_size = batch_size
61
+ self.scaler = StandardScaler()
62
+
63
+ if use_minibatch:
64
+ self.model = MiniBatchKMeans(
65
+ n_clusters=n_clusters,
66
+ init=init,
67
+ max_iter=max_iter,
68
+ random_state=random_state,
69
+ batch_size=batch_size
70
+ )
71
+ else:
72
+ self.model = KMeans(
73
+ n_clusters=n_clusters,
74
+ init=init,
75
+ n_init=n_init,
76
+ max_iter=max_iter,
77
+ random_state=random_state,
78
+ algorithm=algorithm
79
+ )
80
+
81
+ def fit(self, X: Union[np.ndarray, pd.DataFrame]) -> 'EconKMeans':
82
+ """
83
+ Fit the K-Means clustering model
84
+
85
+ Parameters:
86
+ -----------
87
+ X : array-like of shape (n_samples, n_features)
88
+ Training data
89
+
90
+ Returns:
91
+ --------
92
+ self : EconKMeans
93
+ """
94
+ # Scale features
95
+ X_scaled = self.scaler.fit_transform(X)
96
+ self.model.fit(X_scaled)
97
+ return self
98
+
99
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
100
+ """
101
+ Predict the closest cluster each sample in X belongs to
102
+
103
+ Parameters:
104
+ -----------
105
+ X : array-like of shape (n_samples, n_features)
106
+ New data to predict
107
+
108
+ Returns:
109
+ --------
110
+ labels : ndarray of shape (n_samples,)
111
+ Index of the cluster each sample belongs to
112
+ """
113
+ # Scale features using the same scaler
114
+ X_scaled = self.scaler.transform(X)
115
+ return self.model.predict(X_scaled)
116
+
117
+ def fit_predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
118
+ """
119
+ Compute cluster centers and predict cluster index for each sample
120
+
121
+ Parameters:
122
+ -----------
123
+ X : array-like of shape (n_samples, n_features)
124
+ Training data
125
+
126
+ Returns:
127
+ --------
128
+ labels : ndarray of shape (n_samples,)
129
+ Index of the cluster each sample belongs to
130
+ """
131
+ # Scale features
132
+ X_scaled = self.scaler.fit_transform(X)
133
+ return self.model.fit_predict(X_scaled)
134
+
135
+ def cluster_centers(self) -> np.ndarray:
136
+ """
137
+ Get the cluster centers
138
+
139
+ Returns:
140
+ --------
141
+ centers : ndarray of shape (n_clusters, n_features)
142
+ Coordinates of cluster centers
143
+ """
144
+ return self.scaler.inverse_transform(self.model.cluster_centers_)
145
+
146
+ def evaluate(self, X: Union[np.ndarray, pd.DataFrame]) -> Dict[str, float]:
147
+ """
148
+ Evaluate clustering performance
149
+
150
+ Parameters:
151
+ -----------
152
+ X : array-like of shape (n_samples, n_features)
153
+ Data to evaluate
154
+
155
+ Returns:
156
+ --------
157
+ metrics : dict
158
+ Dictionary with evaluation metrics
159
+ """
160
+ # Scale features
161
+ X_scaled = self.scaler.transform(X)
162
+ labels = self.model.predict(X_scaled)
163
+
164
+ # Calculate metrics
165
+ silhouette = silhouette_score(X_scaled, labels)
166
+ calinski_harabasz = calinski_harabasz_score(X_scaled, labels)
167
+
168
+ return {
169
+ 'silhouette_score': silhouette,
170
+ 'calinski_harabasz_score': calinski_harabasz,
171
+ 'inertia': self.model.inertia_,
172
+ 'n_iter': self.model.n_iter_
173
+ }
174
+
175
+ def visualize_clusters(self, X: Union[np.ndarray, pd.DataFrame],
176
+ max_features: int = 10, figsize: tuple = (12, 8)) -> Optional:
177
+ """
178
+ Visualize clusters using PCA for dimensionality reduction
179
+
180
+ Parameters:
181
+ -----------
182
+ X : array-like of shape (n_samples, n_features)
183
+ Data to visualize
184
+ max_features : int
185
+ Maximum number of features to show in the plot
186
+ figsize : tuple
187
+ Figure size
188
+
189
+ Returns:
190
+ --------
191
+ fig : matplotlib Figure or None
192
+ The figure object, or None if matplotlib is not available
193
+ """
194
+ if not MATPLOTLIB_AVAILABLE:
195
+ print("Matplotlib is not available. Skipping visualization.")
196
+ return None
197
+
198
+ # Scale features
199
+ X_scaled = self.scaler.transform(X)
200
+ labels = self.model.predict(X_scaled)
201
+
202
+ # Use PCA for dimensionality reduction if there are more than 2 features
203
+ if X_scaled.shape[1] > 2:
204
+ pca = PCA(n_components=min(2, X_scaled.shape[1]))
205
+ X_pca = pca.fit_transform(X_scaled)
206
+ else:
207
+ X_pca = X_scaled
208
+
209
+ # Create plot
210
+ fig, ax = plt.subplots(figsize=figsize)
211
+
212
+ # Plot points colored by cluster
213
+ scatter = ax.scatter(X_pca[:, 0], X_pca[:, 1], c=labels, cmap='viridis', alpha=0.7)
214
+
215
+ # Plot cluster centers if available in PCA space
216
+ if hasattr(self.model, 'cluster_centers_'):
217
+ centers_pca = pca.transform(self.model.cluster_centers_) if X_scaled.shape[1] > 2 else self.model.cluster_centers_
218
+ ax.scatter(centers_pca[:, 0], centers_pca[:, 1], c='red', marker='x', s=200, linewidths=3)
219
+
220
+ ax.set_xlabel('Principal Component 1' if X_scaled.shape[1] > 2 else 'Feature 1')
221
+ ax.set_ylabel('Principal Component 2' if X_scaled.shape[1] > 2 else 'Feature 2')
222
+ ax.set_title('K-Means Clustering Results')
223
+
224
+ # Add colorbar
225
+ plt.colorbar(scatter, ax=ax)
226
+
227
+ return fig
228
+
229
+
230
+ def kmeans_analysis(X: Union[np.ndarray, pd.DataFrame],
231
+ n_clusters: int = 8,
232
+ init: str = 'k-means++',
233
+ n_init: int = 10,
234
+ max_iter: int = 300,
235
+ random_state: int = 42,
236
+ algorithm: str = 'lloyd',
237
+ use_minibatch: bool = False,
238
+ batch_size: int = 1000) -> dict:
239
+ """
240
+ Perform complete K-Means clustering analysis
241
+
242
+ Parameters:
243
+ -----------
244
+ X : array-like of shape (n_samples, n_features)
245
+ Features
246
+ n_clusters : int
247
+ Number of clusters to form
248
+ init : str, 'k-means++', 'random'
249
+ Method for initialization
250
+ n_init : int
251
+ Number of time the k-means algorithm will be run with different centroid seeds
252
+ max_iter : int
253
+ Maximum number of iterations of the k-means algorithm for a single run
254
+ random_state : int
255
+ Random state for reproducibility
256
+ algorithm : str, 'lloyd', 'elkan'
257
+ K-means algorithm to use
258
+ use_minibatch : bool
259
+ Whether to use MiniBatchKMeans for large datasets
260
+ batch_size : int
261
+ Size of the mini batches (only used when use_minibatch=True)
262
+
263
+ Returns:
264
+ --------
265
+ results : dict
266
+ Dictionary with model, cluster labels, centers, and evaluation metrics
267
+ """
268
+ # Initialize and fit model
269
+ kmeans_model = EconKMeans(
270
+ n_clusters=n_clusters,
271
+ init=init,
272
+ n_init=n_init,
273
+ max_iter=max_iter,
274
+ random_state=random_state,
275
+ algorithm=algorithm,
276
+ use_minibatch=use_minibatch,
277
+ batch_size=batch_size
278
+ )
279
+ labels = kmeans_model.fit_predict(X)
280
+
281
+ # Get cluster centers
282
+ centers = kmeans_model.cluster_centers()
283
+
284
+ # Evaluate clustering
285
+ metrics = kmeans_model.evaluate(X)
286
+
287
+ return {
288
+ 'model': kmeans_model,
289
+ 'labels': labels,
290
+ 'cluster_centers': centers,
291
+ 'metrics': metrics,
292
+ 'X': X
293
+ }
@@ -0,0 +1,264 @@
1
+ """
2
+ Neural Network implementation for econometric analysis
3
+ """
4
+ import numpy as np
5
+ import pandas as pd
6
+ from sklearn.neural_network import MLPRegressor, MLPClassifier
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.metrics import mean_squared_error, accuracy_score
9
+ from sklearn.preprocessing import StandardScaler
10
+ from typing import Union, Optional, List, Tuple
11
+
12
+
13
+ class EconNeuralNetwork:
14
+ """
15
+ Neural Network for econometric analysis with both regression and classification capabilities
16
+ """
17
+
18
+ def __init__(self, problem_type: str = 'regression', hidden_layer_sizes: tuple = (100,),
19
+ activation: str = 'relu', solver: str = 'adam', alpha: float = 0.0001,
20
+ learning_rate: str = 'constant', learning_rate_init: float = 0.001,
21
+ max_iter: int = 200, random_state: int = 42):
22
+ """
23
+ Initialize Neural Network model
24
+
25
+ Parameters:
26
+ -----------
27
+ problem_type : str, 'regression' or 'classification'
28
+ Type of problem to solve
29
+ hidden_layer_sizes : tuple
30
+ The ith element represents the number of neurons in the ith hidden layer
31
+ activation : str, 'identity', 'logistic', 'tanh', 'relu'
32
+ Activation function for the hidden layer
33
+ solver : str, 'lbfgs', 'sgd', 'adam'
34
+ The solver for weight optimization
35
+ alpha : float
36
+ L2 penalty (regularization term) parameter
37
+ learning_rate : str, 'constant', 'invscaling', 'adaptive'
38
+ Learning rate schedule for weight updates
39
+ learning_rate_init : float
40
+ The initial learning rate used
41
+ max_iter : int
42
+ Maximum number of iterations
43
+ random_state : int
44
+ Random state for reproducibility
45
+ """
46
+ self.problem_type = problem_type
47
+ self.hidden_layer_sizes = hidden_layer_sizes
48
+ self.activation = activation
49
+ self.solver = solver
50
+ self.alpha = alpha
51
+ self.learning_rate = learning_rate
52
+ self.learning_rate_init = learning_rate_init
53
+ self.max_iter = max_iter
54
+ self.random_state = random_state
55
+ self.scaler = StandardScaler()
56
+
57
+ if problem_type == 'regression':
58
+ self.model = MLPRegressor(
59
+ hidden_layer_sizes=hidden_layer_sizes,
60
+ activation=activation,
61
+ solver=solver,
62
+ alpha=alpha,
63
+ learning_rate=learning_rate,
64
+ learning_rate_init=learning_rate_init,
65
+ max_iter=max_iter,
66
+ random_state=random_state
67
+ )
68
+ elif problem_type == 'classification':
69
+ self.model = MLPClassifier(
70
+ hidden_layer_sizes=hidden_layer_sizes,
71
+ activation=activation,
72
+ solver=solver,
73
+ alpha=alpha,
74
+ learning_rate=learning_rate,
75
+ learning_rate_init=learning_rate_init,
76
+ max_iter=max_iter,
77
+ random_state=random_state
78
+ )
79
+ else:
80
+ raise ValueError("problem_type must be either 'regression' or 'classification'")
81
+
82
+ def fit(self, X: Union[np.ndarray, pd.DataFrame], y: Union[np.ndarray, pd.Series]) -> 'EconNeuralNetwork':
83
+ """
84
+ Fit the Neural Network model
85
+
86
+ Parameters:
87
+ -----------
88
+ X : array-like of shape (n_samples, n_features)
89
+ Training data
90
+ y : array-like of shape (n_samples,)
91
+ Target values
92
+
93
+ Returns:
94
+ --------
95
+ self : EconNeuralNetwork
96
+ """
97
+ # Scale features
98
+ X_scaled = self.scaler.fit_transform(X)
99
+ self.model.fit(X_scaled, y)
100
+ return self
101
+
102
+ def predict(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
103
+ """
104
+ Predict using the Neural Network model
105
+
106
+ Parameters:
107
+ -----------
108
+ X : array-like of shape (n_samples, n_features)
109
+ Samples
110
+
111
+ Returns:
112
+ --------
113
+ y_pred : ndarray of shape (n_samples,)
114
+ Predicted values
115
+ """
116
+ # Scale features using the same scaler
117
+ X_scaled = self.scaler.transform(X)
118
+ return self.model.predict(X_scaled)
119
+
120
+ def predict_proba(self, X: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
121
+ """
122
+ Predict class probabilities using the Neural Network model (classification only)
123
+
124
+ Parameters:
125
+ -----------
126
+ X : array-like of shape (n_samples, n_features)
127
+ Samples
128
+
129
+ Returns:
130
+ --------
131
+ y_proba : ndarray of shape (n_samples, n_classes)
132
+ Predicted class probabilities
133
+ """
134
+ if self.problem_type != 'classification':
135
+ raise ValueError("predict_proba is only available for classification problems")
136
+
137
+ # Scale features using the same scaler
138
+ X_scaled = self.scaler.transform(X)
139
+ return self.model.predict_proba(X_scaled)
140
+
141
+ def evaluate(self, X: Union[np.ndarray, pd.DataFrame],
142
+ y: Union[np.ndarray, pd.Series]) -> dict:
143
+ """
144
+ Evaluate model performance
145
+
146
+ Parameters:
147
+ -----------
148
+ X : array-like of shape (n_samples, n_features)
149
+ Test data
150
+ y : array-like of shape (n_samples,)
151
+ True values
152
+
153
+ Returns:
154
+ --------
155
+ metrics : dict
156
+ Dictionary with evaluation metrics
157
+ """
158
+ y_pred = self.predict(X)
159
+
160
+ if self.problem_type == 'regression':
161
+ mse = mean_squared_error(y, y_pred)
162
+ rmse = np.sqrt(mse)
163
+ return {
164
+ 'mse': mse,
165
+ 'rmse': rmse,
166
+ 'predictions': y_pred
167
+ }
168
+ else:
169
+ accuracy = accuracy_score(y, y_pred)
170
+ return {
171
+ 'accuracy': accuracy,
172
+ 'predictions': y_pred
173
+ }
174
+
175
+
176
+ def neural_network_analysis(X: Union[np.ndarray, pd.DataFrame],
177
+ y: Union[np.ndarray, pd.Series],
178
+ problem_type: str = 'regression',
179
+ hidden_layer_sizes: tuple = (100,),
180
+ activation: str = 'relu',
181
+ solver: str = 'adam',
182
+ test_size: float = 0.2,
183
+ alpha: float = 0.0001,
184
+ learning_rate: str = 'constant',
185
+ learning_rate_init: float = 0.001,
186
+ max_iter: int = 200,
187
+ random_state: int = 42) -> dict:
188
+ """
189
+ Perform complete Neural Network analysis
190
+
191
+ Parameters:
192
+ -----------
193
+ X : array-like of shape (n_samples, n_features)
194
+ Features
195
+ y : array-like of shape (n_samples,)
196
+ Target variable
197
+ problem_type : str, 'regression' or 'classification'
198
+ Type of problem to solve
199
+ hidden_layer_sizes : tuple
200
+ The ith element represents the number of neurons in the ith hidden layer
201
+ activation : str, 'identity', 'logistic', 'tanh', 'relu'
202
+ Activation function for the hidden layer
203
+ solver : str, 'lbfgs', 'sgd', 'adam'
204
+ The solver for weight optimization
205
+ test_size : float
206
+ Proportion of dataset to include in test split
207
+ alpha : float
208
+ L2 penalty (regularization term) parameter
209
+ learning_rate : str, 'constant', 'invscaling', 'adaptive'
210
+ Learning rate schedule for weight updates
211
+ learning_rate_init : float
212
+ The initial learning rate used
213
+ max_iter : int
214
+ Maximum number of iterations
215
+ random_state : int
216
+ Random state for reproducibility
217
+
218
+ Returns:
219
+ --------
220
+ results : dict
221
+ Dictionary with model, predictions, and evaluation metrics
222
+ """
223
+ # Split data
224
+ X_train, X_test, y_train, y_test = train_test_split(
225
+ X, y, test_size=test_size, random_state=random_state
226
+ )
227
+
228
+ # Initialize and fit model
229
+ nn_model = EconNeuralNetwork(
230
+ problem_type=problem_type,
231
+ hidden_layer_sizes=hidden_layer_sizes,
232
+ activation=activation,
233
+ solver=solver,
234
+ alpha=alpha,
235
+ learning_rate=learning_rate,
236
+ learning_rate_init=learning_rate_init,
237
+ max_iter=max_iter,
238
+ random_state=random_state
239
+ )
240
+ nn_model.fit(X_train, y_train)
241
+
242
+ # Evaluate model
243
+ train_results = nn_model.evaluate(X_train, y_train)
244
+ test_results = nn_model.evaluate(X_test, y_test)
245
+
246
+ # For classification, also get probabilities
247
+ if problem_type == 'classification':
248
+ train_proba = nn_model.predict_proba(X_train)
249
+ test_proba = nn_model.predict_proba(X_test)
250
+ else:
251
+ train_proba = None
252
+ test_proba = None
253
+
254
+ return {
255
+ 'model': nn_model,
256
+ 'train_results': train_results,
257
+ 'test_results': test_results,
258
+ 'train_proba': train_proba,
259
+ 'test_proba': test_proba,
260
+ 'X_train': X_train,
261
+ 'X_test': X_test,
262
+ 'y_train': y_train,
263
+ 'y_test': y_test
264
+ }