metacountregressor 0.1.306__tar.gz → 0.1.308__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {metacountregressor-0.1.306/metacountregressor.egg-info → metacountregressor-0.1.308}/PKG-INFO +1 -1
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/helperprocess.py +6 -2
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/main.py +1 -1
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/metaheuristics.py +1 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/solution.py +92 -16
- {metacountregressor-0.1.306 → metacountregressor-0.1.308/metacountregressor.egg-info}/PKG-INFO +1 -1
- metacountregressor-0.1.308/version.txt +1 -0
- metacountregressor-0.1.306/version.txt +0 -1
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/LICENSE.txt +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/MANIFEST.in +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/README.md +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/README.rst +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/app_main.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/SOURCES.txt +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/requires.txt +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/setup.cfg +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/setup.py +0 -0
- {metacountregressor-0.1.306 → metacountregressor-0.1.308}/tests/test.py +0 -0
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/helperprocess.py
RENAMED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4
4
|
import csv
|
5
5
|
import matplotlib.pyplot as plt
|
6
6
|
from scipy import stats as st
|
7
|
-
from sklearn.preprocessing import StandardScaler
|
7
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
8
8
|
import os
|
9
9
|
import shutil
|
10
10
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
|
|
413
413
|
# Apply custom function
|
414
414
|
data = data.apply(settings['apply_func'])
|
415
415
|
output_df[column] = data
|
416
|
+
elif settings['type'] == 'normalized':
|
417
|
+
# Normalize the column
|
418
|
+
scaler = MinMaxScaler
|
419
|
+
output_df[column] = scaler.fit_transform(df[[column]]).flatten()
|
416
420
|
|
417
421
|
elif settings['type'] == 'none':
|
418
422
|
# Leave the column unchanged
|
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
|
|
447
451
|
# Otherwise, fallback to continuous standardization
|
448
452
|
return {
|
449
453
|
'type': 'continuous',
|
450
|
-
'apply_func': (lambda x: (x - series.mean()) / series.std())
|
454
|
+
'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
|
451
455
|
}
|
452
456
|
else:
|
453
457
|
# Default fallback (leave the column unchanged)
|
@@ -187,7 +187,7 @@ def main(args, **kwargs):
|
|
187
187
|
a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
|
188
188
|
# some example argument, these are defualt so the following line is just for claritity
|
189
189
|
args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
|
190
|
-
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME":
|
190
|
+
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
|
191
191
|
# Fit the model with metacountregressor
|
192
192
|
# Step 5: Transform the dataset based on the configuration
|
193
193
|
#data_new = helperprocess.transform_dataframe(dataset, config)
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/metaheuristics.py
RENAMED
@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
|
|
422
422
|
self.iter = kwargs.get('_max_iter', 10000)
|
423
423
|
self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
|
424
424
|
self.instance_number = str(kwargs.get('instance_number', 1))
|
425
|
+
self.instance_number = objective_function.instance_number
|
425
426
|
self.get_directory()
|
426
427
|
|
427
428
|
self._population = list()
|
@@ -30,6 +30,7 @@ from scipy.special import gammaln
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
33
|
+
|
33
34
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
34
35
|
from texttable import Texttable
|
35
36
|
import time
|
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
|
|
123
124
|
|
124
125
|
def __init__(self, x_data, y_data, **kwargs):
|
125
126
|
self.gbl_best = 1000000.0
|
127
|
+
self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
|
126
128
|
self.run_bootstrap = kwargs.get('run_bootstrap', False)
|
127
129
|
self.linear_regression = kwargs.get('linear_model', False)
|
128
130
|
self.reg_penalty = kwargs.get('reg_penalty',1)
|
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
|
|
186
188
|
self.MP = 0
|
187
189
|
# Nelder-Mead-BFGS
|
188
190
|
|
189
|
-
self._max_characteristics = kwargs.get('_max_vars',
|
191
|
+
self._max_characteristics = kwargs.get('_max_vars', 90)
|
190
192
|
|
191
193
|
self.beta_dict = dict
|
192
194
|
if 'model_terms' in kwargs:
|
@@ -453,7 +455,7 @@ class ObjectiveFunction(object):
|
|
453
455
|
self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh", "nil"])
|
454
456
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
455
457
|
|
456
|
-
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', '
|
458
|
+
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'tn_normal'])
|
457
459
|
|
458
460
|
if self.G is not None:
|
459
461
|
#TODO need to handle this for groups
|
@@ -611,7 +613,7 @@ class ObjectiveFunction(object):
|
|
611
613
|
Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
|
612
614
|
"""
|
613
615
|
if hard_code:
|
614
|
-
# Grouped
|
616
|
+
# Grouped Terrs TODO
|
615
617
|
manual_fit_spec = {
|
616
618
|
'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
|
617
619
|
'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
|
@@ -5058,11 +5060,12 @@ class ObjectiveFunction(object):
|
|
5058
5060
|
proba_ = proba_n.sum(axis =1)
|
5059
5061
|
|
5060
5062
|
"""""
|
5061
|
-
|
5063
|
+
main_disper = self.get_dispersion_paramaters(betas, dispersion)
|
5064
|
+
|
5062
5065
|
|
5063
5066
|
# print(betas_last)
|
5064
5067
|
proba_, proba_n = self.prob_obs_draws_all_at_once(
|
5065
|
-
eVd, np.atleast_3d(y),
|
5068
|
+
eVd, np.atleast_3d(y), main_disper, dispersion)
|
5066
5069
|
# self._prob_product_against_panels()
|
5067
5070
|
|
5068
5071
|
# print(top_stats)
|
@@ -5602,13 +5605,42 @@ class ObjectiveFunction(object):
|
|
5602
5605
|
return covariance
|
5603
5606
|
|
5604
5607
|
|
5608
|
+
# Numerical Hessian (finite differences)
|
5609
|
+
def numerical_hessian_post(self, f, theta, epsilon=1e-5):
|
5610
|
+
n = len(theta)
|
5611
|
+
hessian = np.zeros((n, n))
|
5612
|
+
for i in range(n):
|
5613
|
+
for j in range(n):
|
5614
|
+
theta_ij_plus = theta.copy()
|
5615
|
+
theta_ij_minus = theta.copy()
|
5616
|
+
theta_ij_plus[i] += epsilon
|
5617
|
+
theta_ij_plus[j] += epsilon
|
5618
|
+
theta_ij_minus[i] += epsilon
|
5619
|
+
theta_ij_minus[j] -= epsilon
|
5620
|
+
|
5621
|
+
f_ij_plus = f(theta_ij_plus)
|
5622
|
+
f_ij_minus = f(theta_ij_minus)
|
5623
|
+
f_original = f(theta)
|
5624
|
+
|
5625
|
+
hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
|
5626
|
+
return hessian
|
5627
|
+
|
5628
|
+
|
5605
5629
|
def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
|
5606
5630
|
# sample_size = len(self._x_data) - len(optim_res['x']) -1
|
5607
5631
|
sample_size = len(self._x_data)
|
5608
5632
|
convergence = optim_res['success']
|
5609
5633
|
coeff_ = optim_res['x']
|
5610
5634
|
penalty = 0
|
5635
|
+
stderr_opg = None
|
5636
|
+
if self.run_numerical_hessian:
|
5637
|
+
|
5638
|
+
stderr_opg = self.stderr
|
5639
|
+
|
5611
5640
|
|
5641
|
+
|
5642
|
+
|
5643
|
+
|
5612
5644
|
if 'hess_inv' in optim_res:
|
5613
5645
|
covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
|
5614
5646
|
if robust else optim_res['hess_inv']
|
@@ -5617,9 +5649,11 @@ class ObjectiveFunction(object):
|
|
5617
5649
|
covariance = self.handle_covariance(covariance)
|
5618
5650
|
covariance = np.clip(covariance, 0, None)
|
5619
5651
|
stderr = np.sqrt(np.diag(covariance))
|
5620
|
-
|
5621
|
-
|
5622
|
-
|
5652
|
+
if stderr_opg is not None:
|
5653
|
+
stderr = np.minimum(stderr, stderr_opg)
|
5654
|
+
|
5655
|
+
|
5656
|
+
|
5623
5657
|
if is_dispersion:
|
5624
5658
|
stderr[-1] = random.uniform(0.001, 0.005)
|
5625
5659
|
|
@@ -5912,6 +5946,9 @@ class ObjectiveFunction(object):
|
|
5912
5946
|
else:
|
5913
5947
|
self.draws = 0
|
5914
5948
|
|
5949
|
+
def hessian_loglik_function(self, params, *args):
|
5950
|
+
return self._loglik_gradient(params, *args)
|
5951
|
+
|
5915
5952
|
def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
|
5916
5953
|
"""
|
5917
5954
|
Run the optimization process with draws logic and update the Solution object.
|
@@ -5941,7 +5978,7 @@ class ObjectiveFunction(object):
|
|
5941
5978
|
|
5942
5979
|
|
5943
5980
|
#method = 'Nelder-Mead-BFGS'
|
5944
|
-
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter':
|
5981
|
+
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
|
5945
5982
|
args=(
|
5946
5983
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
5947
5984
|
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
@@ -5956,9 +5993,38 @@ class ObjectiveFunction(object):
|
|
5956
5993
|
),
|
5957
5994
|
method=method,
|
5958
5995
|
bounds=bounds,
|
5959
|
-
tol=tol.get('ftol', 1e-
|
5996
|
+
tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
|
5960
5997
|
options=options
|
5961
5998
|
)
|
5999
|
+
if optimization_result.message == 'NaN result encountered.':
|
6000
|
+
optimization_result = self._minimize(self._loglik_gradient,
|
6001
|
+
initial_params,
|
6002
|
+
args=(
|
6003
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
6004
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
6005
|
+
),
|
6006
|
+
method='Nelder-Mead-BFGS',
|
6007
|
+
bounds=bounds,
|
6008
|
+
tol=tol.get('ftol', 1e-4), # Use 'ftol' as the default tolerance
|
6009
|
+
options=options
|
6010
|
+
)
|
6011
|
+
|
6012
|
+
|
6013
|
+
if self.run_numerical_hessian:
|
6014
|
+
std_errors = self.bootstrap_std_dev(
|
6015
|
+
initial_params=optimization_result.x,
|
6016
|
+
XX=XX,
|
6017
|
+
y=y,
|
6018
|
+
dispersion=dispersion,
|
6019
|
+
bounds=bounds,
|
6020
|
+
tol=tol,
|
6021
|
+
mod=mod,
|
6022
|
+
n_bootstraps=5
|
6023
|
+
)
|
6024
|
+
self.stderr = std_errors
|
6025
|
+
|
6026
|
+
|
6027
|
+
|
5962
6028
|
|
5963
6029
|
|
5964
6030
|
|
@@ -6032,8 +6098,8 @@ class ObjectiveFunction(object):
|
|
6032
6098
|
),
|
6033
6099
|
method=self.method_ll,
|
6034
6100
|
bounds=bounds,
|
6035
|
-
tol=tol.get('ftol', 1e-
|
6036
|
-
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter':
|
6101
|
+
tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
|
6102
|
+
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
|
6037
6103
|
)
|
6038
6104
|
|
6039
6105
|
# Store the parameter estimates from this bootstrap iteration
|
@@ -6122,6 +6188,7 @@ class ObjectiveFunction(object):
|
|
6122
6188
|
# Validation metrics if test data is available (in-sample and out-of-sample MAE)
|
6123
6189
|
in_sample_mae = None
|
6124
6190
|
out_sample_mae = None
|
6191
|
+
out_sample_validation = None
|
6125
6192
|
if self.is_multi and XX_test is not None:
|
6126
6193
|
in_sample_mae = self.validation(
|
6127
6194
|
optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
|
@@ -6129,13 +6196,17 @@ class ObjectiveFunction(object):
|
|
6129
6196
|
out_sample_mae = self.validation(
|
6130
6197
|
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
|
6131
6198
|
)
|
6199
|
+
if self.val_percentage > 0:
|
6200
|
+
out_sample_validation = self.validation(
|
6201
|
+
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
|
6202
|
+
)
|
6132
6203
|
|
6133
|
-
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
|
6204
|
+
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
|
6134
6205
|
|
6135
6206
|
else:
|
6136
6207
|
# Optimization failed, return None for all metrics
|
6137
6208
|
print("Optimization failed.")
|
6138
|
-
return None, None, None, None, None, None, None, None
|
6209
|
+
return None, None, None, None, None, None, None, None, None
|
6139
6210
|
def _prepare_data_and_bounds(self, mod, dispersion):
|
6140
6211
|
"""Prepare the data matrices, bounds, and initial parameters."""
|
6141
6212
|
# Prepare data matrices
|
@@ -6225,7 +6296,8 @@ class ObjectiveFunction(object):
|
|
6225
6296
|
|
6226
6297
|
# Dispersion adds one additional parameter if enabled
|
6227
6298
|
dispersion_param = 1 if dispersion > 0 else 0
|
6228
|
-
|
6299
|
+
total = sum(self.get_num_params()) + dispersion_param
|
6300
|
+
return total
|
6229
6301
|
|
6230
6302
|
def _build_initial_params(self, num_coefficients, dispersion):
|
6231
6303
|
"""
|
@@ -6294,7 +6366,7 @@ class ObjectiveFunction(object):
|
|
6294
6366
|
)
|
6295
6367
|
|
6296
6368
|
# Post-process results
|
6297
|
-
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
|
6369
|
+
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
|
6298
6370
|
optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
|
6299
6371
|
)
|
6300
6372
|
|
@@ -6326,10 +6398,14 @@ class ObjectiveFunction(object):
|
|
6326
6398
|
|
6327
6399
|
# Add metrics to solution object
|
6328
6400
|
sol = Solution() # Assuming Solution is the appropriate class to store results
|
6401
|
+
|
6329
6402
|
sol.add_objective(
|
6330
6403
|
bic=bic,
|
6331
6404
|
aic=aic,
|
6332
6405
|
loglik=log_ll,
|
6406
|
+
TRAIN=in_sample_mae,
|
6407
|
+
TEST=out_sample_mae,
|
6408
|
+
VAL=out_sample_val,
|
6333
6409
|
num_parm=paramNum,
|
6334
6410
|
GOF=other_measures
|
6335
6411
|
)
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.308
|
@@ -1 +0,0 @@
|
|
1
|
-
0.1.306
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/_device_cust.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/data_split_helper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_logger__plot.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/not-zip-safe
RENAMED
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/requires.txt
RENAMED
File without changes
|
{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|