metacountregressor 0.1.237__tar.gz → 0.1.241__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {metacountregressor-0.1.237/metacountregressor.egg-info → metacountregressor-0.1.241}/PKG-INFO +1 -1
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/metaheuristics.py +3 -3
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/solution.py +261 -42
- {metacountregressor-0.1.237 → metacountregressor-0.1.241/metacountregressor.egg-info}/PKG-INFO +1 -1
- metacountregressor-0.1.241/version.txt +1 -0
- metacountregressor-0.1.237/version.txt +0 -1
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/LICENSE.txt +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/MANIFEST.in +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/README.md +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/README.rst +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/app_main.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/helperprocess.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/main.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/SOURCES.txt +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/requires.txt +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/setup.cfg +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/setup.py +0 -0
- {metacountregressor-0.1.237 → metacountregressor-0.1.241}/tests/test.py +0 -0
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/metaheuristics.py
RENAMED
|
@@ -236,15 +236,15 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
|
|
236
236
|
else:
|
|
237
237
|
de = DifferentialEvolution(objective_function, **kwargs)
|
|
238
238
|
|
|
239
|
-
iterations, solutions, best_solutions, best_fitness, best_struct
|
|
239
|
+
iterations, solutions, best_solutions, best_fitness, best_struct = de.differential_evolution_run(
|
|
240
240
|
initial_slns=initial_slns, mod_init=man)
|
|
241
|
-
|
|
241
|
+
AVERAGE_BEST = st.mean(best_solutions)
|
|
242
242
|
end = datetime.now()
|
|
243
243
|
elapsed_time = end - start
|
|
244
244
|
return DifferentialEvolutionResults(elapsed_time=elapsed_time, iteration=iterations,
|
|
245
245
|
iter_solution=solutions, best_solutions=best_solutions,
|
|
246
246
|
best_fitness=best_fitness,
|
|
247
|
-
best_struct=best_struct, average_best=
|
|
247
|
+
best_struct=best_struct, average_best=AVERAGE_BEST)
|
|
248
248
|
|
|
249
249
|
|
|
250
250
|
def simulated_annealing(objective_function, initial_slns=None, **kwargs):
|
|
@@ -33,6 +33,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
|
33
33
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
34
34
|
from texttable import Texttable
|
|
35
35
|
import time
|
|
36
|
+
|
|
36
37
|
try:
|
|
37
38
|
from ._device_cust import device as dev
|
|
38
39
|
from .pareto_file import Pareto, Solution
|
|
@@ -122,14 +123,15 @@ class ObjectiveFunction(object):
|
|
|
122
123
|
|
|
123
124
|
def __init__(self, x_data, y_data, **kwargs):
|
|
124
125
|
self.gbl_best = 1000000.0
|
|
126
|
+
self.run_bootstrap = kwargs.get('run_bootstrap', False)
|
|
125
127
|
self.linear_regression = kwargs.get('linear_model', False)
|
|
126
|
-
self.reg_penalty =
|
|
128
|
+
self.reg_penalty = 1
|
|
127
129
|
self.power_up_ll = False
|
|
128
130
|
self.nb_parma = 1
|
|
129
131
|
self.bic = None
|
|
130
132
|
self.other_bic = False
|
|
131
133
|
self.test_flag = 1
|
|
132
|
-
self.no_extra_param =
|
|
134
|
+
self.no_extra_param =0 #if true, fix dispersion. w
|
|
133
135
|
if self.other_bic:
|
|
134
136
|
print('change this to false latter ')
|
|
135
137
|
|
|
@@ -151,7 +153,7 @@ class ObjectiveFunction(object):
|
|
|
151
153
|
self.rdm_fit = None
|
|
152
154
|
self.rdm_cor_fit = None
|
|
153
155
|
self.dist_fit = None
|
|
154
|
-
self.rounding_point = kwargs.get('decimals_in_coeff',
|
|
156
|
+
self.rounding_point = kwargs.get('decimals_in_coeff', 4)
|
|
155
157
|
self.MAE = None
|
|
156
158
|
self.best_obj_1 = 1000000.0
|
|
157
159
|
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
@@ -426,7 +428,7 @@ class ObjectiveFunction(object):
|
|
|
426
428
|
|
|
427
429
|
|
|
428
430
|
|
|
429
|
-
self.Ndraws = kwargs.get('Ndraws',
|
|
431
|
+
self.Ndraws = kwargs.get('Ndraws', 100)
|
|
430
432
|
self.draws1 = None
|
|
431
433
|
self.initial_sig = 1 # pass the test of a single model
|
|
432
434
|
self.pvalue_sig_value = .1
|
|
@@ -449,7 +451,7 @@ class ObjectiveFunction(object):
|
|
|
449
451
|
# define the variables
|
|
450
452
|
|
|
451
453
|
|
|
452
|
-
self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh"])
|
|
454
|
+
self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh", "nil"])
|
|
453
455
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
454
456
|
|
|
455
457
|
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
@@ -485,8 +487,8 @@ class ObjectiveFunction(object):
|
|
|
485
487
|
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
486
488
|
#model_types = [[0]]
|
|
487
489
|
|
|
488
|
-
if self:
|
|
489
|
-
model_types = [[
|
|
490
|
+
if self.linear_regression:
|
|
491
|
+
model_types = [[1]]
|
|
490
492
|
self.grad_yes = False
|
|
491
493
|
|
|
492
494
|
print(f'Linear Model Selected: turning off gradient calculation')
|
|
@@ -494,6 +496,11 @@ class ObjectiveFunction(object):
|
|
|
494
496
|
|
|
495
497
|
model_t_dict = {'Poisson':0,
|
|
496
498
|
"NB":1}
|
|
499
|
+
if self.linear_regression:
|
|
500
|
+
# Rename key "NB" to "sigma" if it exists in the dictionary
|
|
501
|
+
if "NB" in model_t_dict:
|
|
502
|
+
model_t_dict["sigma"] = model_t_dict.pop("NB")
|
|
503
|
+
|
|
497
504
|
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
498
505
|
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
499
506
|
# Print the formatted result
|
|
@@ -503,6 +510,7 @@ class ObjectiveFunction(object):
|
|
|
503
510
|
|
|
504
511
|
self._model_type_codes = ['p', 'nb',
|
|
505
512
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
|
513
|
+
self.update_model_type_codes()
|
|
506
514
|
self._variable = [True] * len(self._discrete_values)
|
|
507
515
|
self._lower_bounds = [None] * \
|
|
508
516
|
len(self._discrete_values) # TODO have continus
|
|
@@ -522,7 +530,18 @@ class ObjectiveFunction(object):
|
|
|
522
530
|
|
|
523
531
|
self.solution_analyst = None
|
|
524
532
|
|
|
533
|
+
def update_model_type_codes(self):
|
|
534
|
+
if self.linear_regression:
|
|
535
|
+
# Recursively update all occurrences of 'nb' to 'sigma'
|
|
536
|
+
def replace_nb_with_sigma(item):
|
|
537
|
+
if isinstance(item, list):
|
|
538
|
+
return [replace_nb_with_sigma(sub_item) for sub_item in item]
|
|
539
|
+
elif item == 'nb':
|
|
540
|
+
return 'sigma'
|
|
541
|
+
return item
|
|
525
542
|
|
|
543
|
+
# Update the _model_type_codes list
|
|
544
|
+
self._model_type_codes = replace_nb_with_sigma(self._model_type_codes)
|
|
526
545
|
|
|
527
546
|
|
|
528
547
|
def over_ride_self(self, **kwargs):
|
|
@@ -584,6 +603,7 @@ class ObjectiveFunction(object):
|
|
|
584
603
|
self.set_defined_seed(42) # Set a specific seed
|
|
585
604
|
|
|
586
605
|
modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
|
|
606
|
+
self.significant = 1
|
|
587
607
|
self.makeRegression(modified_fit) # Perform regression with the modified fit
|
|
588
608
|
|
|
589
609
|
|
|
@@ -820,6 +840,10 @@ class ObjectiveFunction(object):
|
|
|
820
840
|
|
|
821
841
|
if dispersion == 0:
|
|
822
842
|
return None
|
|
843
|
+
if dispersion == 1:
|
|
844
|
+
return np.clip(np.exp(betas[-1]),None, 2)
|
|
845
|
+
|
|
846
|
+
|
|
823
847
|
elif dispersion == 2 or dispersion == 1:
|
|
824
848
|
if self.no_extra_param:
|
|
825
849
|
return self.nb_parma
|
|
@@ -1126,7 +1150,8 @@ class ObjectiveFunction(object):
|
|
|
1126
1150
|
print("-" * 80)
|
|
1127
1151
|
|
|
1128
1152
|
if solution is not None:
|
|
1129
|
-
|
|
1153
|
+
if self.is_multi:
|
|
1154
|
+
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1130
1155
|
|
|
1131
1156
|
self.pvalues = [self.round_with_padding(
|
|
1132
1157
|
x, 2) for x in self.pvalues]
|
|
@@ -1140,13 +1165,15 @@ class ObjectiveFunction(object):
|
|
|
1140
1165
|
self.zvalues = np.append(self.zvalues, 50)
|
|
1141
1166
|
|
|
1142
1167
|
elif self.coeff_[-1] < 0.25:
|
|
1143
|
-
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1144
|
-
print(np.exp(self.coeff_[-1]))
|
|
1168
|
+
#print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1169
|
+
print(f'dispession is para,aters {np.exp(self.coeff_[-1])}')
|
|
1145
1170
|
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1171
|
+
|
|
1146
1172
|
|
|
1147
1173
|
|
|
1174
|
+
|
|
1175
|
+
self.coeff_ = self.convert_coefficients(self.coeff_, model)
|
|
1148
1176
|
self.coeff_ = [self.round_with_padding(x, self.rounding_point) for x in self.coeff_]
|
|
1149
|
-
|
|
1150
1177
|
self.stderr = [self.round_with_padding(x, 2) for x in self.stderr]
|
|
1151
1178
|
self.zvalues = [self.round_with_padding(
|
|
1152
1179
|
x, 2) for x in self.zvalues]
|
|
@@ -1549,7 +1576,12 @@ class ObjectiveFunction(object):
|
|
|
1549
1576
|
fixed_vars, random_vars, random_var_cor) # TODO handle distrubution
|
|
1550
1577
|
|
|
1551
1578
|
distributions = alpha_rdm.copy()
|
|
1552
|
-
|
|
1579
|
+
if self.linear_regression:
|
|
1580
|
+
|
|
1581
|
+
transformations = ['nil'] * len(alpha) # todo add transformations
|
|
1582
|
+
else:
|
|
1583
|
+
transformations = ['no'] * len(alpha) # todo add transformations
|
|
1584
|
+
|
|
1553
1585
|
cnt = 0
|
|
1554
1586
|
joined_alpha = np.add(alpha_rdm, alpha_rdm_cor)
|
|
1555
1587
|
for i, x in enumerate(joined_alpha):
|
|
@@ -1961,7 +1993,7 @@ class ObjectiveFunction(object):
|
|
|
1961
1993
|
subpvalues = pvalues.copy()
|
|
1962
1994
|
else:
|
|
1963
1995
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1964
|
-
|
|
1996
|
+
|
|
1965
1997
|
if pvalues[-1] > sig_value:
|
|
1966
1998
|
vio_counts += 1
|
|
1967
1999
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -3253,6 +3285,35 @@ class ObjectiveFunction(object):
|
|
|
3253
3285
|
print('output', out)
|
|
3254
3286
|
return out
|
|
3255
3287
|
|
|
3288
|
+
def custom_betas_to_penalise(self, params, dispersion):
|
|
3289
|
+
num_params = self.get_num_params()
|
|
3290
|
+
skip_count = sum(num_params[:2])
|
|
3291
|
+
betas_start = params[:skip_count]
|
|
3292
|
+
if dispersion:
|
|
3293
|
+
betas_end = params[-dispersion:]
|
|
3294
|
+
betas_ = np.concatenate((betas_start,betas_end))
|
|
3295
|
+
return betas_
|
|
3296
|
+
else: return betas_start
|
|
3297
|
+
|
|
3298
|
+
|
|
3299
|
+
def convert_coefficients(self, params, dispersion):
|
|
3300
|
+
num_params = self.get_num_params()
|
|
3301
|
+
skip_count = sum(num_params[:2])
|
|
3302
|
+
remain_params = num_params[2:]
|
|
3303
|
+
params[skip_count:skip_count+remain_params[1]] = np.abs(params[skip_count:skip_count+remain_params[1]])
|
|
3304
|
+
return params
|
|
3305
|
+
|
|
3306
|
+
|
|
3307
|
+
|
|
3308
|
+
def custom_penalty(self, params, penalty):
|
|
3309
|
+
num_params = self.get_num_params()
|
|
3310
|
+
skip_count = sum(num_params[:2])
|
|
3311
|
+
|
|
3312
|
+
for i in params[skip_count:-1]:
|
|
3313
|
+
if i < 0.25:
|
|
3314
|
+
penalty += self.reg_penalty*np.maximum(0, 2.25 -i)**2
|
|
3315
|
+
return penalty
|
|
3316
|
+
|
|
3256
3317
|
# p is the paramaterisation GP1 is at 0
|
|
3257
3318
|
def general_poisson(self, mu, y, nu, p=0): # TODO laxywhere??
|
|
3258
3319
|
|
|
@@ -3915,8 +3976,10 @@ class ObjectiveFunction(object):
|
|
|
3915
3976
|
|
|
3916
3977
|
|
|
3917
3978
|
if dispersion:
|
|
3979
|
+
sigma = dispersion
|
|
3918
3980
|
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3919
3981
|
|
|
3982
|
+
epsilon = np.random.normal(loc=0, scale=sigma, size=eta.shape)
|
|
3920
3983
|
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3921
3984
|
#print('check if this holds size')
|
|
3922
3985
|
else:
|
|
@@ -3925,7 +3988,7 @@ class ObjectiveFunction(object):
|
|
|
3925
3988
|
|
|
3926
3989
|
|
|
3927
3990
|
if linear:
|
|
3928
|
-
eta = eta.astype('float')
|
|
3991
|
+
eta = eta.astype('float') +epsilon.astype('float')
|
|
3929
3992
|
return eta
|
|
3930
3993
|
|
|
3931
3994
|
|
|
@@ -4591,6 +4654,41 @@ class ObjectiveFunction(object):
|
|
|
4591
4654
|
pass
|
|
4592
4655
|
"""
|
|
4593
4656
|
pass
|
|
4657
|
+
def _linear_logliklihood(self, y, eta, sigma):
|
|
4658
|
+
"""
|
|
4659
|
+
Calculate the log-likelihood for a linear regression model with random parameters.
|
|
4660
|
+
|
|
4661
|
+
Parameters:
|
|
4662
|
+
y (np.ndarray): Observed responses (n_samples,).
|
|
4663
|
+
eta (np.ndarray): Predicted values (linear predictor) (n_samples, 1, n_draws).
|
|
4664
|
+
sigma (float): Standard deviation of the error term.
|
|
4665
|
+
|
|
4666
|
+
Returns:
|
|
4667
|
+
float: The log-likelihood value aggregated across all draws.
|
|
4668
|
+
"""
|
|
4669
|
+
n_samples, _, n_draws = eta.shape # Number of observations and draws
|
|
4670
|
+
|
|
4671
|
+
# Repeat y to match the shape of eta
|
|
4672
|
+
y_repeated = np.repeat(y, n_draws, axis=2) # Shape (n_samples, 1, n_draws)
|
|
4673
|
+
|
|
4674
|
+
# Calculate residuals for each draw
|
|
4675
|
+
residuals = y_repeated - eta # Shape (n_samples, 1, n_draws)
|
|
4676
|
+
|
|
4677
|
+
# Calculate the residual sum of squares (RSS) for each draw
|
|
4678
|
+
rss = np.sum(residuals ** 2, axis=(0, 1)) # Shape (n_draws,)
|
|
4679
|
+
|
|
4680
|
+
# Log-likelihood for each draw
|
|
4681
|
+
log_likelihood_per_draw = (
|
|
4682
|
+
-0.5 * n_samples * np.log(2 * np.pi) # Constant term
|
|
4683
|
+
- 0.5 * n_samples * np.log(sigma**2) # Variance term
|
|
4684
|
+
- 0.5 * rss / sigma**2 # Residual term
|
|
4685
|
+
) # Shape (n_draws,)
|
|
4686
|
+
|
|
4687
|
+
# Aggregate across draws (e.g., take the mean log-likelihood)
|
|
4688
|
+
log_likelihood_value = np.mean(log_likelihood_per_draw)
|
|
4689
|
+
|
|
4690
|
+
return log_likelihood_value
|
|
4691
|
+
|
|
4594
4692
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
4595
4693
|
return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
|
|
4596
4694
|
zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None, model_nature=None, kwarg=None,
|
|
@@ -4654,8 +4752,9 @@ class ObjectiveFunction(object):
|
|
|
4654
4752
|
|
|
4655
4753
|
if self.linear_regression:
|
|
4656
4754
|
# LINEAR MODEL PROCESS
|
|
4657
|
-
mse =
|
|
4658
|
-
|
|
4755
|
+
mse = self._linear_logliklihood(y, eVd, main_disper)
|
|
4756
|
+
#mse = np.mean((y - eVd) ** 2)
|
|
4757
|
+
return (-mse + penalty)*self.minimize_scaler
|
|
4659
4758
|
|
|
4660
4759
|
### GLM PROCESS ########
|
|
4661
4760
|
llf_main = self.loglik_obs(
|
|
@@ -4671,7 +4770,10 @@ class ObjectiveFunction(object):
|
|
|
4671
4770
|
|
|
4672
4771
|
loglik += 2*loglik
|
|
4673
4772
|
print('am i powering up')
|
|
4674
|
-
|
|
4773
|
+
|
|
4774
|
+
b_pen = self.custom_betas_to_penalise(betas, dispersion)
|
|
4775
|
+
penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
|
|
4776
|
+
penalty = self.custom_penalty(betas, penalty)
|
|
4675
4777
|
|
|
4676
4778
|
if not np.isreal(loglik):
|
|
4677
4779
|
loglik = - 10000000.0
|
|
@@ -4888,12 +4990,24 @@ class ObjectiveFunction(object):
|
|
|
4888
4990
|
betas_hetro_sd = None
|
|
4889
4991
|
|
|
4890
4992
|
Vdr = dev.cust_einsum("njk,nkr -> njr", Xdr, Br) # (N,P,R)
|
|
4891
|
-
if self:
|
|
4993
|
+
if self.linear_regression:
|
|
4892
4994
|
### LINEAR MODEL WAY #######
|
|
4893
4995
|
eVd = np.clip(
|
|
4894
4996
|
Vdf[:, :, None] + Vdr + Vdh + dev.np.array(offset), None, None)
|
|
4895
|
-
|
|
4896
|
-
|
|
4997
|
+
main_disper = self.get_dispersion_paramaters(betas, dispersion)
|
|
4998
|
+
penalty, main_disper = self._penalty_dispersion(
|
|
4999
|
+
dispersion, main_disper, eVd, y, penalty, model_nature)
|
|
5000
|
+
error_term = np.random.normal(loc=0, scale=main_disper, size=eVd.shape)
|
|
5001
|
+
b_pen = self.custom_betas_to_penalise(betas, dispersion)
|
|
5002
|
+
penalty += self.regularise_l2(b_pen) + self.regularise_l1(b_pen)
|
|
5003
|
+
#penalty = 0
|
|
5004
|
+
penalty = self.custom_penalty(betas, penalty)
|
|
5005
|
+
# LINEAR MODEL PROCESS
|
|
5006
|
+
mse = self._linear_logliklihood(y, eVd, main_disper)
|
|
5007
|
+
#mse = np.mean((y - eVd) ** 2)
|
|
5008
|
+
|
|
5009
|
+
return -mse + penalty
|
|
5010
|
+
|
|
4897
5011
|
|
|
4898
5012
|
##### GLM WAY #####
|
|
4899
5013
|
eVd = dev.np.exp(np.clip(
|
|
@@ -4959,7 +5073,7 @@ class ObjectiveFunction(object):
|
|
|
4959
5073
|
if self.power_up_ll:
|
|
4960
5074
|
penalty += self.regularise_l2(betas)
|
|
4961
5075
|
|
|
4962
|
-
penalty += self.regularise_l2(betas)
|
|
5076
|
+
penalty += self.regularise_l2(betas) + self.regularise_l1(betas)
|
|
4963
5077
|
if not return_gradient:
|
|
4964
5078
|
|
|
4965
5079
|
output = ((-loglik + penalty)*self.minimize_scaler,)
|
|
@@ -5022,6 +5136,11 @@ class ObjectiveFunction(object):
|
|
|
5022
5136
|
else:
|
|
5023
5137
|
return -self.reg_penalty*sum(np.square(betas.copy()))
|
|
5024
5138
|
|
|
5139
|
+
def regularise_l1(self, betas, backwards = False):
|
|
5140
|
+
if backwards == False:
|
|
5141
|
+
return self.reg_penalty*sum(np.square(betas.copy()))
|
|
5142
|
+
else:
|
|
5143
|
+
return -self.reg_penalty*sum(np.abs(betas.copy()))
|
|
5025
5144
|
|
|
5026
5145
|
def _concat_gradients(self, gr_f):
|
|
5027
5146
|
gr = np.concatenate((gr_f), axis=1)
|
|
@@ -5480,9 +5599,7 @@ class ObjectiveFunction(object):
|
|
|
5480
5599
|
convergence = optim_res['success']
|
|
5481
5600
|
coeff_ = optim_res['x']
|
|
5482
5601
|
penalty = 0
|
|
5483
|
-
|
|
5484
|
-
if abs(i) > 120:
|
|
5485
|
-
penalty += abs(i)
|
|
5602
|
+
|
|
5486
5603
|
if 'hess_inv' in optim_res:
|
|
5487
5604
|
covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
|
|
5488
5605
|
if robust else optim_res['hess_inv']
|
|
@@ -5496,16 +5613,7 @@ class ObjectiveFunction(object):
|
|
|
5496
5613
|
# stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
|
|
5497
5614
|
if is_dispersion:
|
|
5498
5615
|
stderr[-1] = random.uniform(0.001, 0.005)
|
|
5499
|
-
|
|
5500
|
-
# gets the number of parmas before the correlations
|
|
5501
|
-
pre_cor_pams = sum(self.get_num_params()[:3])
|
|
5502
|
-
# gets the number of correlated rpm
|
|
5503
|
-
post_cor_pams = sum(self.get_num_params()[:5])
|
|
5504
|
-
|
|
5505
|
-
|
|
5506
|
-
# this calculation takes into account the correlated rpms distinct values
|
|
5507
|
-
for i in range(pre_cor_pams, post_cor_pams):
|
|
5508
|
-
stderr[i] = stderr[i] / np.sqrt(sample_size)
|
|
5616
|
+
|
|
5509
5617
|
|
|
5510
5618
|
if np.isnan(stderr).any():
|
|
5511
5619
|
raise ValueError("Error: Matrix contains NaN values")
|
|
@@ -5518,6 +5626,9 @@ class ObjectiveFunction(object):
|
|
|
5518
5626
|
optim_res['fun'] = 10.0 ** 10
|
|
5519
5627
|
if self.power_up_ll:
|
|
5520
5628
|
loglikelihood =-optim_res['fun']/2 - penalty
|
|
5629
|
+
elif self.linear_regression:
|
|
5630
|
+
loglikelihood= -optim_res['fun']
|
|
5631
|
+
|
|
5521
5632
|
else:
|
|
5522
5633
|
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
|
5523
5634
|
|
|
@@ -5817,11 +5928,15 @@ class ObjectiveFunction(object):
|
|
|
5817
5928
|
draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
|
|
5818
5929
|
|
|
5819
5930
|
# Optimization method and options
|
|
5820
|
-
method = self.method_ll
|
|
5931
|
+
method = self.method_ll if bounds is None else 'L-BFGS-B'
|
|
5821
5932
|
print('updataing methods')
|
|
5822
|
-
method = 'Nelder-Mead-BFGS'
|
|
5823
|
-
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
|
|
5824
5933
|
|
|
5934
|
+
#method = 'Nelder-Mead-BFGS'
|
|
5935
|
+
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
|
|
5936
|
+
args=(
|
|
5937
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5938
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
|
5939
|
+
)
|
|
5825
5940
|
# Run optimization
|
|
5826
5941
|
optimization_result = self._minimize(
|
|
5827
5942
|
self._loglik_gradient,
|
|
@@ -5835,9 +5950,94 @@ class ObjectiveFunction(object):
|
|
|
5835
5950
|
tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
|
|
5836
5951
|
options=options
|
|
5837
5952
|
)
|
|
5953
|
+
|
|
5954
|
+
|
|
5955
|
+
|
|
5956
|
+
# Run the bootstrap to calculate standard errors
|
|
5957
|
+
if self.run_bootstrap:
|
|
5958
|
+
|
|
5959
|
+
std_errors = self.bootstrap_std_dev(
|
|
5960
|
+
initial_params=optimization_result.x,
|
|
5961
|
+
XX=XX,
|
|
5962
|
+
y=y,
|
|
5963
|
+
dispersion=dispersion,
|
|
5964
|
+
bounds=bounds,
|
|
5965
|
+
tol=tol,
|
|
5966
|
+
mod=mod,
|
|
5967
|
+
n_bootstraps=100
|
|
5968
|
+
)
|
|
5969
|
+
self.stderr = std_errors
|
|
5970
|
+
|
|
5971
|
+
|
|
5972
|
+
|
|
5973
|
+
|
|
5838
5974
|
return optimization_result
|
|
5839
5975
|
|
|
5976
|
+
|
|
5840
5977
|
|
|
5978
|
+
|
|
5979
|
+
def bootstrap_std_dev(self, initial_params, XX, y, dispersion, bounds, tol, mod, n_bootstraps=100):
|
|
5980
|
+
"""
|
|
5981
|
+
Perform bootstrap resampling to estimate the standard deviations of the parameters.
|
|
5982
|
+
|
|
5983
|
+
Parameters:
|
|
5984
|
+
self: Reference to the class instance.
|
|
5985
|
+
initial_params: Initial parameter estimates from the optimization.
|
|
5986
|
+
XX: Design matrix.
|
|
5987
|
+
y: Observed outcomes.
|
|
5988
|
+
dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
|
|
5989
|
+
bounds: List of bounds for each parameter.
|
|
5990
|
+
tol: Tolerance for the optimization process (dictionary with ftol and gtol).
|
|
5991
|
+
mod: Dictionary containing additional data.
|
|
5992
|
+
n_bootstraps: Number of bootstrap resamples (default=100).
|
|
5993
|
+
|
|
5994
|
+
Returns:
|
|
5995
|
+
std_devs: Standard deviations of the parameter estimates (from bootstrap resampling).
|
|
5996
|
+
"""
|
|
5997
|
+
# List to store parameter estimates from each bootstrap iteration
|
|
5998
|
+
bootstrap_estimates = []
|
|
5999
|
+
|
|
6000
|
+
# Extract design matrices and additional components from `mod`
|
|
6001
|
+
X, Xr, XG = mod.get('X'), mod.get('Xr'), mod.get('XG')
|
|
6002
|
+
distribution = mod.get('dist_fit')
|
|
6003
|
+
|
|
6004
|
+
# Prepare draws
|
|
6005
|
+
draws = self._prepare_draws(Xr, distribution)
|
|
6006
|
+
draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
|
|
6007
|
+
|
|
6008
|
+
# Perform bootstrap iterations
|
|
6009
|
+
for _ in range(n_bootstraps):
|
|
6010
|
+
# Resample data with replacement
|
|
6011
|
+
indices = np.random.choice(len(y), size=len(y), replace=True)
|
|
6012
|
+
X_resampled = X[indices]
|
|
6013
|
+
y_resampled = y[indices]
|
|
6014
|
+
|
|
6015
|
+
# Refit the model with resampled data
|
|
6016
|
+
bootstrap_result = self._minimize(
|
|
6017
|
+
self._loglik_gradient,
|
|
6018
|
+
initial_params,
|
|
6019
|
+
args=(
|
|
6020
|
+
X_resampled, y_resampled, draws, X_resampled, Xr, self.batch_size, self.grad_yes,
|
|
6021
|
+
self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, None, None,
|
|
6022
|
+
draws_grouped, XG, mod
|
|
6023
|
+
),
|
|
6024
|
+
method=self.method_ll,
|
|
6025
|
+
bounds=bounds,
|
|
6026
|
+
tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
|
|
6027
|
+
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
|
|
6028
|
+
)
|
|
6029
|
+
|
|
6030
|
+
# Store the parameter estimates from this bootstrap iteration
|
|
6031
|
+
bootstrap_estimates.append(bootstrap_result.x)
|
|
6032
|
+
|
|
6033
|
+
# Convert bootstrap parameter estimates to a NumPy array
|
|
6034
|
+
bootstrap_estimates = np.array(bootstrap_estimates)
|
|
6035
|
+
|
|
6036
|
+
# Compute the standard deviations of the parameter estimates
|
|
6037
|
+
std_devs = np.std(bootstrap_estimates, axis=0)
|
|
6038
|
+
|
|
6039
|
+
return std_devs
|
|
6040
|
+
|
|
5841
6041
|
def _initialize_params_and_bounds(self, XX, dispersion):
|
|
5842
6042
|
"""Initialize parameters and set bounds for optimization."""
|
|
5843
6043
|
num_params = XX.shape[2] # Number of features
|
|
@@ -5963,7 +6163,13 @@ class ObjectiveFunction(object):
|
|
|
5963
6163
|
if dispersion == 0:
|
|
5964
6164
|
return [(-30, 30) for _ in initial_params]
|
|
5965
6165
|
elif dispersion == 1:
|
|
5966
|
-
|
|
6166
|
+
num_params = self.get_num_params()
|
|
6167
|
+
skip_count = sum(num_params[:2])
|
|
6168
|
+
|
|
6169
|
+
|
|
6170
|
+
bounds = [(-3, 3) for _ in initial_params[:-1]] + [(-1, 1)]
|
|
6171
|
+
bounds[skip_count: -1] = [(0.02, None) for _ in bounds[skip_count: -1]]
|
|
6172
|
+
return bounds
|
|
5967
6173
|
elif dispersion == 2:
|
|
5968
6174
|
return [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
|
|
5969
6175
|
else:
|
|
@@ -6024,11 +6230,16 @@ class ObjectiveFunction(object):
|
|
|
6024
6230
|
Initial parameter array.
|
|
6025
6231
|
"""
|
|
6026
6232
|
# Generate random initial coefficients
|
|
6027
|
-
initial_params = np.random.uniform(
|
|
6233
|
+
initial_params = np.random.uniform(-.1, 0.1, size=num_coefficients)
|
|
6234
|
+
parma_sum = sum(self.get_num_params()[:2])
|
|
6235
|
+
|
|
6236
|
+
|
|
6237
|
+
initial_params[parma_sum:-dispersion] =0.5
|
|
6028
6238
|
|
|
6029
6239
|
# Add dispersion parameter if applicable
|
|
6030
6240
|
if dispersion > 0:
|
|
6031
|
-
initial_params
|
|
6241
|
+
initial_params[-1] = 0.0
|
|
6242
|
+
#initial_params[0] =3
|
|
6032
6243
|
|
|
6033
6244
|
return initial_params
|
|
6034
6245
|
|
|
@@ -6047,8 +6258,9 @@ class ObjectiveFunction(object):
|
|
|
6047
6258
|
obj_1, log_lik, betas, stderr, pvalues, zvalues, is_halton, is_delete
|
|
6048
6259
|
"""
|
|
6049
6260
|
try:
|
|
6261
|
+
dispersion = mod.get('dispersion', dispersion)
|
|
6050
6262
|
# Preprocessing
|
|
6051
|
-
tol = {'ftol': 1e-
|
|
6263
|
+
tol = {'ftol': 1e-6, 'gtol': 1e-6, 'xtol': 1e-6}
|
|
6052
6264
|
y, X, Xr, XG, XH = mod.get('y'), mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
|
|
6053
6265
|
|
|
6054
6266
|
# Validate input data
|
|
@@ -7369,7 +7581,7 @@ class ObjectiveFunction(object):
|
|
|
7369
7581
|
sequence.append(n_th_number)
|
|
7370
7582
|
return sequence
|
|
7371
7583
|
|
|
7372
|
-
def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=
|
|
7584
|
+
def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=10, primes=None,
|
|
7373
7585
|
long=False) -> np.ndarray:
|
|
7374
7586
|
"""Generate Halton draws for multiple random variables using different primes as base"""
|
|
7375
7587
|
if primes is None:
|
|
@@ -7398,6 +7610,7 @@ class ObjectiveFunction(object):
|
|
|
7398
7610
|
i += 1
|
|
7399
7611
|
t += 1
|
|
7400
7612
|
seq = seq[drop:length + drop]
|
|
7613
|
+
seq = np.clip(seq, 1e-4, 1-1e-4)
|
|
7401
7614
|
if shuffled:
|
|
7402
7615
|
np.random.shuffle(seq)
|
|
7403
7616
|
return seq
|
|
@@ -7451,6 +7664,12 @@ class ObjectiveFunction(object):
|
|
|
7451
7664
|
(1 - x) * np.random.gamma(2, scale=theta, size=n)
|
|
7452
7665
|
return b
|
|
7453
7666
|
|
|
7667
|
+
|
|
7668
|
+
|
|
7669
|
+
|
|
7670
|
+
|
|
7671
|
+
|
|
7672
|
+
|
|
7454
7673
|
def _compute_derivatives(self, betas, draws, betas_std=None, distribution=None):
|
|
7455
7674
|
# N, N_draws, K = len(draws)/self.Ndraws, self.Ndraws, len(self._distribution)
|
|
7456
7675
|
# N, D = draws.shape[0], draws.shape[1]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.1.241
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
0.1.237
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/_device_cust.py
RENAMED
|
File without changes
|
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/data_split_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/helperprocess.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/pareto_logger__plot.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/not-zip-safe
RENAMED
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/requires.txt
RENAMED
|
File without changes
|
{metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|