metacountregressor 0.1.305__tar.gz → 0.1.307__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {metacountregressor-0.1.305/metacountregressor.egg-info → metacountregressor-0.1.307}/PKG-INFO +1 -1
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/helperprocess.py +6 -2
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/main.py +1 -1
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/metaheuristics.py +1 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/solution.py +78 -16
- {metacountregressor-0.1.305 → metacountregressor-0.1.307/metacountregressor.egg-info}/PKG-INFO +1 -1
- metacountregressor-0.1.307/version.txt +1 -0
- metacountregressor-0.1.305/version.txt +0 -1
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/LICENSE.txt +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/MANIFEST.in +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/README.md +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/README.rst +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/app_main.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/SOURCES.txt +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/requires.txt +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/setup.cfg +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/setup.py +0 -0
- {metacountregressor-0.1.305 → metacountregressor-0.1.307}/tests/test.py +0 -0
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/helperprocess.py
RENAMED
@@ -4,7 +4,7 @@ import pandas as pd
|
|
4
4
|
import csv
|
5
5
|
import matplotlib.pyplot as plt
|
6
6
|
from scipy import stats as st
|
7
|
-
from sklearn.preprocessing import StandardScaler
|
7
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
8
8
|
import os
|
9
9
|
import shutil
|
10
10
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
|
|
413
413
|
# Apply custom function
|
414
414
|
data = data.apply(settings['apply_func'])
|
415
415
|
output_df[column] = data
|
416
|
+
elif settings['type'] == 'normalized':
|
417
|
+
# Normalize the column
|
418
|
+
scaler = MinMaxScaler
|
419
|
+
output_df[column] = scaler.fit_transform(df[[column]]).flatten()
|
416
420
|
|
417
421
|
elif settings['type'] == 'none':
|
418
422
|
# Leave the column unchanged
|
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
|
|
447
451
|
# Otherwise, fallback to continuous standardization
|
448
452
|
return {
|
449
453
|
'type': 'continuous',
|
450
|
-
'apply_func': (lambda x: (x - series.mean()) / series.std())
|
454
|
+
'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
|
451
455
|
}
|
452
456
|
else:
|
453
457
|
# Default fallback (leave the column unchanged)
|
@@ -187,7 +187,7 @@ def main(args, **kwargs):
|
|
187
187
|
a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
|
188
188
|
# some example argument, these are defualt so the following line is just for claritity
|
189
189
|
args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
|
190
|
-
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME":
|
190
|
+
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
|
191
191
|
# Fit the model with metacountregressor
|
192
192
|
# Step 5: Transform the dataset based on the configuration
|
193
193
|
#data_new = helperprocess.transform_dataframe(dataset, config)
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/metaheuristics.py
RENAMED
@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
|
|
422
422
|
self.iter = kwargs.get('_max_iter', 10000)
|
423
423
|
self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
|
424
424
|
self.instance_number = str(kwargs.get('instance_number', 1))
|
425
|
+
self.instance_number = objective_function.instance_number
|
425
426
|
self.get_directory()
|
426
427
|
|
427
428
|
self._population = list()
|
@@ -30,6 +30,7 @@ from scipy.special import gammaln
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
33
|
+
from autograd import hessian as autograd_hessian
|
33
34
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
34
35
|
from texttable import Texttable
|
35
36
|
import time
|
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
|
|
123
124
|
|
124
125
|
def __init__(self, x_data, y_data, **kwargs):
|
125
126
|
self.gbl_best = 1000000.0
|
127
|
+
self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
|
126
128
|
self.run_bootstrap = kwargs.get('run_bootstrap', False)
|
127
129
|
self.linear_regression = kwargs.get('linear_model', False)
|
128
130
|
self.reg_penalty = kwargs.get('reg_penalty',1)
|
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
|
|
186
188
|
self.MP = 0
|
187
189
|
# Nelder-Mead-BFGS
|
188
190
|
|
189
|
-
self._max_characteristics = kwargs.get('_max_vars',
|
191
|
+
self._max_characteristics = kwargs.get('_max_vars', 90)
|
190
192
|
|
191
193
|
self.beta_dict = dict
|
192
194
|
if 'model_terms' in kwargs:
|
@@ -611,11 +613,12 @@ class ObjectiveFunction(object):
|
|
611
613
|
Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
|
612
614
|
"""
|
613
615
|
if hard_code:
|
616
|
+
# Grouped Terrs TODO
|
614
617
|
manual_fit_spec = {
|
615
618
|
'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
|
616
619
|
'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
|
617
620
|
'rdm_cor_terms': [],
|
618
|
-
'
|
621
|
+
'group_rdm': [],
|
619
622
|
'hetro_in_means': [],
|
620
623
|
'transformations': ['no', 'log', 'log', 'no', 'no', 'no', 'no'],
|
621
624
|
'dispersion': 1
|
@@ -637,7 +640,7 @@ class ObjectiveFunction(object):
|
|
637
640
|
'fixed_terms': ['const'],
|
638
641
|
'rdm_terms': [],
|
639
642
|
'rdm_cor_terms': [],
|
640
|
-
'
|
643
|
+
'group_rdm': [],
|
641
644
|
'hetro_in_means': [],
|
642
645
|
'transformations': ['no'],
|
643
646
|
'dispersion': 1
|
@@ -5601,13 +5604,42 @@ class ObjectiveFunction(object):
|
|
5601
5604
|
return covariance
|
5602
5605
|
|
5603
5606
|
|
5607
|
+
# Numerical Hessian (finite differences)
|
5608
|
+
def numerical_hessian_post(self, f, theta, epsilon=1e-5):
|
5609
|
+
n = len(theta)
|
5610
|
+
hessian = np.zeros((n, n))
|
5611
|
+
for i in range(n):
|
5612
|
+
for j in range(n):
|
5613
|
+
theta_ij_plus = theta.copy()
|
5614
|
+
theta_ij_minus = theta.copy()
|
5615
|
+
theta_ij_plus[i] += epsilon
|
5616
|
+
theta_ij_plus[j] += epsilon
|
5617
|
+
theta_ij_minus[i] += epsilon
|
5618
|
+
theta_ij_minus[j] -= epsilon
|
5619
|
+
|
5620
|
+
f_ij_plus = f(theta_ij_plus)
|
5621
|
+
f_ij_minus = f(theta_ij_minus)
|
5622
|
+
f_original = f(theta)
|
5623
|
+
|
5624
|
+
hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
|
5625
|
+
return hessian
|
5626
|
+
|
5627
|
+
|
5604
5628
|
def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
|
5605
5629
|
# sample_size = len(self._x_data) - len(optim_res['x']) -1
|
5606
5630
|
sample_size = len(self._x_data)
|
5607
5631
|
convergence = optim_res['success']
|
5608
5632
|
coeff_ = optim_res['x']
|
5609
5633
|
penalty = 0
|
5634
|
+
stderr_opg = None
|
5635
|
+
if self.run_numerical_hessian:
|
5636
|
+
|
5637
|
+
stderr_opg = self.stderr
|
5638
|
+
|
5610
5639
|
|
5640
|
+
|
5641
|
+
|
5642
|
+
|
5611
5643
|
if 'hess_inv' in optim_res:
|
5612
5644
|
covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
|
5613
5645
|
if robust else optim_res['hess_inv']
|
@@ -5616,9 +5648,11 @@ class ObjectiveFunction(object):
|
|
5616
5648
|
covariance = self.handle_covariance(covariance)
|
5617
5649
|
covariance = np.clip(covariance, 0, None)
|
5618
5650
|
stderr = np.sqrt(np.diag(covariance))
|
5619
|
-
|
5620
|
-
|
5621
|
-
|
5651
|
+
if stderr_opg:
|
5652
|
+
stderr = np.minimum(stderr, stderr_opg)
|
5653
|
+
|
5654
|
+
|
5655
|
+
|
5622
5656
|
if is_dispersion:
|
5623
5657
|
stderr[-1] = random.uniform(0.001, 0.005)
|
5624
5658
|
|
@@ -5911,6 +5945,9 @@ class ObjectiveFunction(object):
|
|
5911
5945
|
else:
|
5912
5946
|
self.draws = 0
|
5913
5947
|
|
5948
|
+
def hessian_loglik_function(self, params, *args):
|
5949
|
+
return self._loglik_gradient(params, *args)
|
5950
|
+
|
5914
5951
|
def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
|
5915
5952
|
"""
|
5916
5953
|
Run the optimization process with draws logic and update the Solution object.
|
@@ -5940,7 +5977,7 @@ class ObjectiveFunction(object):
|
|
5940
5977
|
|
5941
5978
|
|
5942
5979
|
#method = 'Nelder-Mead-BFGS'
|
5943
|
-
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter':
|
5980
|
+
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
|
5944
5981
|
args=(
|
5945
5982
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
5946
5983
|
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
@@ -5955,9 +5992,24 @@ class ObjectiveFunction(object):
|
|
5955
5992
|
),
|
5956
5993
|
method=method,
|
5957
5994
|
bounds=bounds,
|
5958
|
-
tol=tol.get('ftol', 1e-
|
5995
|
+
tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
|
5959
5996
|
options=options
|
5960
5997
|
)
|
5998
|
+
if self.run_numerical_hessian:
|
5999
|
+
std_errors = self.bootstrap_std_dev(
|
6000
|
+
initial_params=optimization_result.x,
|
6001
|
+
XX=XX,
|
6002
|
+
y=y,
|
6003
|
+
dispersion=dispersion,
|
6004
|
+
bounds=bounds,
|
6005
|
+
tol=tol,
|
6006
|
+
mod=mod,
|
6007
|
+
n_bootstraps=5
|
6008
|
+
)
|
6009
|
+
self.stderr = std_errors
|
6010
|
+
|
6011
|
+
|
6012
|
+
|
5961
6013
|
|
5962
6014
|
|
5963
6015
|
|
@@ -6031,8 +6083,8 @@ class ObjectiveFunction(object):
|
|
6031
6083
|
),
|
6032
6084
|
method=self.method_ll,
|
6033
6085
|
bounds=bounds,
|
6034
|
-
tol=tol.get('ftol', 1e-
|
6035
|
-
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter':
|
6086
|
+
tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
|
6087
|
+
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
|
6036
6088
|
)
|
6037
6089
|
|
6038
6090
|
# Store the parameter estimates from this bootstrap iteration
|
@@ -6121,6 +6173,7 @@ class ObjectiveFunction(object):
|
|
6121
6173
|
# Validation metrics if test data is available (in-sample and out-of-sample MAE)
|
6122
6174
|
in_sample_mae = None
|
6123
6175
|
out_sample_mae = None
|
6176
|
+
out_sample_validation = None
|
6124
6177
|
if self.is_multi and XX_test is not None:
|
6125
6178
|
in_sample_mae = self.validation(
|
6126
6179
|
optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
|
@@ -6128,8 +6181,12 @@ class ObjectiveFunction(object):
|
|
6128
6181
|
out_sample_mae = self.validation(
|
6129
6182
|
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
|
6130
6183
|
)
|
6184
|
+
if self.val_percentage > 0:
|
6185
|
+
out_sample_validation = self.validation(
|
6186
|
+
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
|
6187
|
+
)
|
6131
6188
|
|
6132
|
-
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
|
6189
|
+
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
|
6133
6190
|
|
6134
6191
|
else:
|
6135
6192
|
# Optimization failed, return None for all metrics
|
@@ -6224,7 +6281,8 @@ class ObjectiveFunction(object):
|
|
6224
6281
|
|
6225
6282
|
# Dispersion adds one additional parameter if enabled
|
6226
6283
|
dispersion_param = 1 if dispersion > 0 else 0
|
6227
|
-
|
6284
|
+
total = sum(self.get_num_params()) + dispersion_param
|
6285
|
+
return total
|
6228
6286
|
|
6229
6287
|
def _build_initial_params(self, num_coefficients, dispersion):
|
6230
6288
|
"""
|
@@ -6238,11 +6296,11 @@ class ObjectiveFunction(object):
|
|
6238
6296
|
Initial parameter array.
|
6239
6297
|
"""
|
6240
6298
|
# Generate random initial coefficients
|
6241
|
-
initial_params = np.random.uniform(
|
6299
|
+
initial_params = np.random.uniform(0.0000, 0.01, size=num_coefficients)
|
6242
6300
|
parma_sum = sum(self.get_num_params()[:2])
|
6243
6301
|
|
6244
6302
|
|
6245
|
-
initial_params[parma_sum:-dispersion] =0.
|
6303
|
+
initial_params[parma_sum:-dispersion] =0.0001
|
6246
6304
|
|
6247
6305
|
# Add dispersion parameter if applicable
|
6248
6306
|
if dispersion > 0:
|
@@ -6251,7 +6309,7 @@ class ObjectiveFunction(object):
|
|
6251
6309
|
|
6252
6310
|
return initial_params
|
6253
6311
|
|
6254
|
-
def fitRegression(self, mod, dispersion=0, maxiter=
|
6312
|
+
def fitRegression(self, mod, dispersion=0, maxiter=20000, batch_size=None, num_hess=False, **kwargs):
|
6255
6313
|
"""
|
6256
6314
|
Fits a Poisson regression, NB regression (dispersion=1), or GP regression (dispersion=2).
|
6257
6315
|
|
@@ -6293,7 +6351,7 @@ class ObjectiveFunction(object):
|
|
6293
6351
|
)
|
6294
6352
|
|
6295
6353
|
# Post-process results
|
6296
|
-
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
|
6354
|
+
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
|
6297
6355
|
optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
|
6298
6356
|
)
|
6299
6357
|
|
@@ -6325,10 +6383,14 @@ class ObjectiveFunction(object):
|
|
6325
6383
|
|
6326
6384
|
# Add metrics to solution object
|
6327
6385
|
sol = Solution() # Assuming Solution is the appropriate class to store results
|
6386
|
+
|
6328
6387
|
sol.add_objective(
|
6329
6388
|
bic=bic,
|
6330
6389
|
aic=aic,
|
6331
6390
|
loglik=log_ll,
|
6391
|
+
TRAIN=in_sample_mae,
|
6392
|
+
TEST=out_sample_mae,
|
6393
|
+
VAL=out_sample_val,
|
6332
6394
|
num_parm=paramNum,
|
6333
6395
|
GOF=other_measures
|
6334
6396
|
)
|
@@ -0,0 +1 @@
|
|
1
|
+
0.1.307
|
@@ -1 +0,0 @@
|
|
1
|
-
0.1.305
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/_device_cust.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/data_split_helper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor/pareto_logger__plot.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/not-zip-safe
RENAMED
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/requires.txt
RENAMED
File without changes
|
{metacountregressor-0.1.305 → metacountregressor-0.1.307}/metacountregressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|