metacountregressor 0.1.308__py3-none-any.whl → 0.1.309__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/helperprocess.py +6 -2
- metacountregressor/main.py +1 -1
- metacountregressor/metaheuristics.py +1 -0
- metacountregressor/solution.py +73 -12
- {metacountregressor-0.1.308.dist-info → metacountregressor-0.1.309.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.308.dist-info → metacountregressor-0.1.309.dist-info}/RECORD +9 -9
- {metacountregressor-0.1.308.dist-info → metacountregressor-0.1.309.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.308.dist-info → metacountregressor-0.1.309.dist-info}/licenses/LICENSE.txt +0 -0
- {metacountregressor-0.1.308.dist-info → metacountregressor-0.1.309.dist-info}/top_level.txt +0 -0
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4
4
|
import csv
|
5
5
|
import matplotlib.pyplot as plt
|
6
6
|
from scipy import stats as st
|
7
|
-
from sklearn.preprocessing import StandardScaler
|
7
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
8
8
|
import os
|
9
9
|
import shutil
|
10
10
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
|
|
413
413
|
# Apply custom function
|
414
414
|
data = data.apply(settings['apply_func'])
|
415
415
|
output_df[column] = data
|
416
|
+
elif settings['type'] == 'normalized':
|
417
|
+
# Normalize the column
|
418
|
+
scaler = MinMaxScaler
|
419
|
+
output_df[column] = scaler.fit_transform(df[[column]]).flatten()
|
416
420
|
|
417
421
|
elif settings['type'] == 'none':
|
418
422
|
# Leave the column unchanged
|
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
|
|
447
451
|
# Otherwise, fallback to continuous standardization
|
448
452
|
return {
|
449
453
|
'type': 'continuous',
|
450
|
-
'apply_func': (lambda x: (x - series.mean()) / series.std())
|
454
|
+
'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
|
451
455
|
}
|
452
456
|
else:
|
453
457
|
# Default fallback (leave the column unchanged)
|
metacountregressor/main.py
CHANGED
@@ -187,7 +187,7 @@ def main(args, **kwargs):
|
|
187
187
|
a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
|
188
188
|
# some example argument, these are defualt so the following line is just for claritity
|
189
189
|
args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
|
190
|
-
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME":
|
190
|
+
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
|
191
191
|
# Fit the model with metacountregressor
|
192
192
|
# Step 5: Transform the dataset based on the configuration
|
193
193
|
#data_new = helperprocess.transform_dataframe(dataset, config)
|
@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
|
|
422
422
|
self.iter = kwargs.get('_max_iter', 10000)
|
423
423
|
self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
|
424
424
|
self.instance_number = str(kwargs.get('instance_number', 1))
|
425
|
+
self.instance_number = objective_function.instance_number
|
425
426
|
self.get_directory()
|
426
427
|
|
427
428
|
self._population = list()
|
metacountregressor/solution.py
CHANGED
@@ -30,6 +30,7 @@ from scipy.special import gammaln
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
33
|
+
from autograd import hessian as autograd_hessian
|
33
34
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
34
35
|
from texttable import Texttable
|
35
36
|
import time
|
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
|
|
123
124
|
|
124
125
|
def __init__(self, x_data, y_data, **kwargs):
|
125
126
|
self.gbl_best = 1000000.0
|
127
|
+
self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
|
126
128
|
self.run_bootstrap = kwargs.get('run_bootstrap', False)
|
127
129
|
self.linear_regression = kwargs.get('linear_model', False)
|
128
130
|
self.reg_penalty = kwargs.get('reg_penalty',1)
|
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
|
|
186
188
|
self.MP = 0
|
187
189
|
# Nelder-Mead-BFGS
|
188
190
|
|
189
|
-
self._max_characteristics = kwargs.get('_max_vars',
|
191
|
+
self._max_characteristics = kwargs.get('_max_vars', 90)
|
190
192
|
|
191
193
|
self.beta_dict = dict
|
192
194
|
if 'model_terms' in kwargs:
|
@@ -611,7 +613,7 @@ class ObjectiveFunction(object):
|
|
611
613
|
Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
|
612
614
|
"""
|
613
615
|
if hard_code:
|
614
|
-
# Grouped
|
616
|
+
# Grouped Terrs TODO
|
615
617
|
manual_fit_spec = {
|
616
618
|
'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
|
617
619
|
'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
|
@@ -5602,13 +5604,42 @@ class ObjectiveFunction(object):
|
|
5602
5604
|
return covariance
|
5603
5605
|
|
5604
5606
|
|
5607
|
+
# Numerical Hessian (finite differences)
|
5608
|
+
def numerical_hessian_post(self, f, theta, epsilon=1e-5):
|
5609
|
+
n = len(theta)
|
5610
|
+
hessian = np.zeros((n, n))
|
5611
|
+
for i in range(n):
|
5612
|
+
for j in range(n):
|
5613
|
+
theta_ij_plus = theta.copy()
|
5614
|
+
theta_ij_minus = theta.copy()
|
5615
|
+
theta_ij_plus[i] += epsilon
|
5616
|
+
theta_ij_plus[j] += epsilon
|
5617
|
+
theta_ij_minus[i] += epsilon
|
5618
|
+
theta_ij_minus[j] -= epsilon
|
5619
|
+
|
5620
|
+
f_ij_plus = f(theta_ij_plus)
|
5621
|
+
f_ij_minus = f(theta_ij_minus)
|
5622
|
+
f_original = f(theta)
|
5623
|
+
|
5624
|
+
hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
|
5625
|
+
return hessian
|
5626
|
+
|
5627
|
+
|
5605
5628
|
def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
|
5606
5629
|
# sample_size = len(self._x_data) - len(optim_res['x']) -1
|
5607
5630
|
sample_size = len(self._x_data)
|
5608
5631
|
convergence = optim_res['success']
|
5609
5632
|
coeff_ = optim_res['x']
|
5610
5633
|
penalty = 0
|
5634
|
+
stderr_opg = None
|
5635
|
+
if self.run_numerical_hessian:
|
5636
|
+
|
5637
|
+
stderr_opg = self.stderr
|
5638
|
+
|
5611
5639
|
|
5640
|
+
|
5641
|
+
|
5642
|
+
|
5612
5643
|
if 'hess_inv' in optim_res:
|
5613
5644
|
covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
|
5614
5645
|
if robust else optim_res['hess_inv']
|
@@ -5617,9 +5648,11 @@ class ObjectiveFunction(object):
|
|
5617
5648
|
covariance = self.handle_covariance(covariance)
|
5618
5649
|
covariance = np.clip(covariance, 0, None)
|
5619
5650
|
stderr = np.sqrt(np.diag(covariance))
|
5620
|
-
|
5621
|
-
|
5622
|
-
|
5651
|
+
if stderr_opg:
|
5652
|
+
stderr = np.minimum(stderr, stderr_opg)
|
5653
|
+
|
5654
|
+
|
5655
|
+
|
5623
5656
|
if is_dispersion:
|
5624
5657
|
stderr[-1] = random.uniform(0.001, 0.005)
|
5625
5658
|
|
@@ -5912,6 +5945,9 @@ class ObjectiveFunction(object):
|
|
5912
5945
|
else:
|
5913
5946
|
self.draws = 0
|
5914
5947
|
|
5948
|
+
def hessian_loglik_function(self, params, *args):
|
5949
|
+
return self._loglik_gradient(params, *args)
|
5950
|
+
|
5915
5951
|
def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
|
5916
5952
|
"""
|
5917
5953
|
Run the optimization process with draws logic and update the Solution object.
|
@@ -5941,7 +5977,7 @@ class ObjectiveFunction(object):
|
|
5941
5977
|
|
5942
5978
|
|
5943
5979
|
#method = 'Nelder-Mead-BFGS'
|
5944
|
-
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter':
|
5980
|
+
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
|
5945
5981
|
args=(
|
5946
5982
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
5947
5983
|
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
@@ -5956,9 +5992,24 @@ class ObjectiveFunction(object):
|
|
5956
5992
|
),
|
5957
5993
|
method=method,
|
5958
5994
|
bounds=bounds,
|
5959
|
-
tol=tol.get('ftol', 1e-
|
5995
|
+
tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
|
5960
5996
|
options=options
|
5961
5997
|
)
|
5998
|
+
if self.run_numerical_hessian:
|
5999
|
+
std_errors = self.bootstrap_std_dev(
|
6000
|
+
initial_params=optimization_result.x,
|
6001
|
+
XX=XX,
|
6002
|
+
y=y,
|
6003
|
+
dispersion=dispersion,
|
6004
|
+
bounds=bounds,
|
6005
|
+
tol=tol,
|
6006
|
+
mod=mod,
|
6007
|
+
n_bootstraps=5
|
6008
|
+
)
|
6009
|
+
self.stderr = std_errors
|
6010
|
+
|
6011
|
+
|
6012
|
+
|
5962
6013
|
|
5963
6014
|
|
5964
6015
|
|
@@ -6032,8 +6083,8 @@ class ObjectiveFunction(object):
|
|
6032
6083
|
),
|
6033
6084
|
method=self.method_ll,
|
6034
6085
|
bounds=bounds,
|
6035
|
-
tol=tol.get('ftol', 1e-
|
6036
|
-
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter':
|
6086
|
+
tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
|
6087
|
+
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
|
6037
6088
|
)
|
6038
6089
|
|
6039
6090
|
# Store the parameter estimates from this bootstrap iteration
|
@@ -6122,6 +6173,7 @@ class ObjectiveFunction(object):
|
|
6122
6173
|
# Validation metrics if test data is available (in-sample and out-of-sample MAE)
|
6123
6174
|
in_sample_mae = None
|
6124
6175
|
out_sample_mae = None
|
6176
|
+
out_sample_validation = None
|
6125
6177
|
if self.is_multi and XX_test is not None:
|
6126
6178
|
in_sample_mae = self.validation(
|
6127
6179
|
optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
|
@@ -6129,8 +6181,12 @@ class ObjectiveFunction(object):
|
|
6129
6181
|
out_sample_mae = self.validation(
|
6130
6182
|
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
|
6131
6183
|
)
|
6184
|
+
if self.val_percentage > 0:
|
6185
|
+
out_sample_validation = self.validation(
|
6186
|
+
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
|
6187
|
+
)
|
6132
6188
|
|
6133
|
-
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
|
6189
|
+
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
|
6134
6190
|
|
6135
6191
|
else:
|
6136
6192
|
# Optimization failed, return None for all metrics
|
@@ -6225,7 +6281,8 @@ class ObjectiveFunction(object):
|
|
6225
6281
|
|
6226
6282
|
# Dispersion adds one additional parameter if enabled
|
6227
6283
|
dispersion_param = 1 if dispersion > 0 else 0
|
6228
|
-
|
6284
|
+
total = sum(self.get_num_params()) + dispersion_param
|
6285
|
+
return total
|
6229
6286
|
|
6230
6287
|
def _build_initial_params(self, num_coefficients, dispersion):
|
6231
6288
|
"""
|
@@ -6294,7 +6351,7 @@ class ObjectiveFunction(object):
|
|
6294
6351
|
)
|
6295
6352
|
|
6296
6353
|
# Post-process results
|
6297
|
-
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
|
6354
|
+
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
|
6298
6355
|
optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
|
6299
6356
|
)
|
6300
6357
|
|
@@ -6326,10 +6383,14 @@ class ObjectiveFunction(object):
|
|
6326
6383
|
|
6327
6384
|
# Add metrics to solution object
|
6328
6385
|
sol = Solution() # Assuming Solution is the appropriate class to store results
|
6386
|
+
|
6329
6387
|
sol.add_objective(
|
6330
6388
|
bic=bic,
|
6331
6389
|
aic=aic,
|
6332
6390
|
loglik=log_ll,
|
6391
|
+
TRAIN=in_sample_mae,
|
6392
|
+
TEST=out_sample_mae,
|
6393
|
+
VAL=out_sample_val,
|
6333
6394
|
num_parm=paramNum,
|
6334
6395
|
GOF=other_measures
|
6335
6396
|
)
|
@@ -3,18 +3,18 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
3
3
|
metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
|
4
4
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
5
5
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
6
|
-
metacountregressor/helperprocess.py,sha256=
|
7
|
-
metacountregressor/main.py,sha256=
|
6
|
+
metacountregressor/helperprocess.py,sha256=8PFxX3KTsWH0MlfhniDzKQOJQ63LmJ0eg6cYhQP_fRA,26162
|
7
|
+
metacountregressor/main.py,sha256=tGOm8DdbdyDf316qIxDAre6l6GzfJIWYNYIBaSeIemI,23685
|
8
8
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
9
|
-
metacountregressor/metaheuristics.py,sha256=
|
9
|
+
metacountregressor/metaheuristics.py,sha256=P0Xjlvhp1cEwZFACrqeeets6x8BK7F2iDyu1OfS4bog,107010
|
10
10
|
metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
|
11
11
|
metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
|
12
12
|
metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,936
|
13
13
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
14
|
-
metacountregressor/solution.py,sha256=
|
14
|
+
metacountregressor/solution.py,sha256=YRskJOR7MU50z22mdt5J9KLMmzHXZNXGnHRMLQPc3R0,319113
|
15
15
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
16
|
-
metacountregressor-0.1.
|
17
|
-
metacountregressor-0.1.
|
18
|
-
metacountregressor-0.1.
|
19
|
-
metacountregressor-0.1.
|
20
|
-
metacountregressor-0.1.
|
16
|
+
metacountregressor-0.1.309.dist-info/licenses/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
metacountregressor-0.1.309.dist-info/METADATA,sha256=vLvLKlMnboMQGkDkupIo-Uwr9gx-rdM5HuEvrt08uMs,23581
|
18
|
+
metacountregressor-0.1.309.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
|
19
|
+
metacountregressor-0.1.309.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
20
|
+
metacountregressor-0.1.309.dist-info/RECORD,,
|
{metacountregressor-0.1.308.dist-info → metacountregressor-0.1.309.dist-info}/licenses/LICENSE.txt
RENAMED
File without changes
|
File without changes
|