metacountregressor 0.1.238__py3-none-any.whl → 0.1.241__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/helperprocess.py +37 -0
- metacountregressor/metaheuristics.py +3 -3
- metacountregressor/solution.py +896 -53
- {metacountregressor-0.1.238.dist-info → metacountregressor-0.1.241.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.238.dist-info → metacountregressor-0.1.241.dist-info}/RECORD +8 -8
- {metacountregressor-0.1.238.dist-info → metacountregressor-0.1.241.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.238.dist-info → metacountregressor-0.1.241.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.238.dist-info → metacountregressor-0.1.241.dist-info}/top_level.txt +0 -0
|
@@ -45,6 +45,43 @@ def delete_all_folders(directory_path):
|
|
|
45
45
|
except Exception as e:
|
|
46
46
|
print(f"An error occurred: {e}")
|
|
47
47
|
|
|
48
|
+
def delete_all_contents(directory_path):
|
|
49
|
+
try:
|
|
50
|
+
# Check if the directory exists
|
|
51
|
+
if not os.path.exists(directory_path):
|
|
52
|
+
print(f"The directory '{directory_path}' does not exist.")
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
# Iterate through items in the directory
|
|
56
|
+
for item in os.listdir(directory_path):
|
|
57
|
+
item_path = os.path.join(directory_path, item)
|
|
58
|
+
|
|
59
|
+
# If the item is a directory, delete it
|
|
60
|
+
if os.path.isdir(item_path):
|
|
61
|
+
shutil.rmtree(item_path) # Recursively delete the folder
|
|
62
|
+
print(f"Deleted folder: {item_path}")
|
|
63
|
+
else:
|
|
64
|
+
# If the item is a file, delete it
|
|
65
|
+
os.remove(item_path)
|
|
66
|
+
print(f"Deleted file: {item_path}")
|
|
67
|
+
|
|
68
|
+
print("All contents deleted successfully.")
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"An error occurred: {e}")
|
|
71
|
+
|
|
72
|
+
def delete_folder_and_contents(directory_path):
|
|
73
|
+
try:
|
|
74
|
+
# Check if the directory exists
|
|
75
|
+
if not os.path.exists(directory_path):
|
|
76
|
+
print(f"The directory '{directory_path}' does not exist.")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
# Delete the entire folder and its contents
|
|
80
|
+
shutil.rmtree(directory_path)
|
|
81
|
+
print(f"Deleted folder and all its contents: {directory_path}")
|
|
82
|
+
except Exception as e:
|
|
83
|
+
print(f"An error occurred: {e}")
|
|
84
|
+
|
|
48
85
|
|
|
49
86
|
|
|
50
87
|
##Select the best Features Based on RF
|
|
@@ -236,15 +236,15 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
|
|
236
236
|
else:
|
|
237
237
|
de = DifferentialEvolution(objective_function, **kwargs)
|
|
238
238
|
|
|
239
|
-
iterations, solutions, best_solutions, best_fitness, best_struct
|
|
239
|
+
iterations, solutions, best_solutions, best_fitness, best_struct = de.differential_evolution_run(
|
|
240
240
|
initial_slns=initial_slns, mod_init=man)
|
|
241
|
-
|
|
241
|
+
AVERAGE_BEST = st.mean(best_solutions)
|
|
242
242
|
end = datetime.now()
|
|
243
243
|
elapsed_time = end - start
|
|
244
244
|
return DifferentialEvolutionResults(elapsed_time=elapsed_time, iteration=iterations,
|
|
245
245
|
iter_solution=solutions, best_solutions=best_solutions,
|
|
246
246
|
best_fitness=best_fitness,
|
|
247
|
-
best_struct=best_struct, average_best=
|
|
247
|
+
best_struct=best_struct, average_best=AVERAGE_BEST)
|
|
248
248
|
|
|
249
249
|
|
|
250
250
|
def simulated_annealing(objective_function, initial_slns=None, **kwargs):
|
metacountregressor/solution.py
CHANGED
|
@@ -33,6 +33,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
|
33
33
|
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
34
34
|
from texttable import Texttable
|
|
35
35
|
import time
|
|
36
|
+
|
|
36
37
|
try:
|
|
37
38
|
from ._device_cust import device as dev
|
|
38
39
|
from .pareto_file import Pareto, Solution
|
|
@@ -121,14 +122,16 @@ class ObjectiveFunction(object):
|
|
|
121
122
|
"""
|
|
122
123
|
|
|
123
124
|
def __init__(self, x_data, y_data, **kwargs):
|
|
125
|
+
self.gbl_best = 1000000.0
|
|
126
|
+
self.run_bootstrap = kwargs.get('run_bootstrap', False)
|
|
124
127
|
self.linear_regression = kwargs.get('linear_model', False)
|
|
125
|
-
self.reg_penalty =
|
|
128
|
+
self.reg_penalty = 1
|
|
126
129
|
self.power_up_ll = False
|
|
127
130
|
self.nb_parma = 1
|
|
128
131
|
self.bic = None
|
|
129
132
|
self.other_bic = False
|
|
130
133
|
self.test_flag = 1
|
|
131
|
-
self.no_extra_param =
|
|
134
|
+
self.no_extra_param =0 #if true, fix dispersion. w
|
|
132
135
|
if self.other_bic:
|
|
133
136
|
print('change this to false latter ')
|
|
134
137
|
|
|
@@ -150,7 +153,7 @@ class ObjectiveFunction(object):
|
|
|
150
153
|
self.rdm_fit = None
|
|
151
154
|
self.rdm_cor_fit = None
|
|
152
155
|
self.dist_fit = None
|
|
153
|
-
self.rounding_point = kwargs.get('decimals_in_coeff',
|
|
156
|
+
self.rounding_point = kwargs.get('decimals_in_coeff', 4)
|
|
154
157
|
self.MAE = None
|
|
155
158
|
self.best_obj_1 = 1000000.0
|
|
156
159
|
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
@@ -425,7 +428,7 @@ class ObjectiveFunction(object):
|
|
|
425
428
|
|
|
426
429
|
|
|
427
430
|
|
|
428
|
-
self.Ndraws = kwargs.get('Ndraws',
|
|
431
|
+
self.Ndraws = kwargs.get('Ndraws', 100)
|
|
429
432
|
self.draws1 = None
|
|
430
433
|
self.initial_sig = 1 # pass the test of a single model
|
|
431
434
|
self.pvalue_sig_value = .1
|
|
@@ -446,10 +449,9 @@ class ObjectiveFunction(object):
|
|
|
446
449
|
print('Setup Complete...')
|
|
447
450
|
self._characteristics_names = list(self._x_data.columns)
|
|
448
451
|
# define the variables
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
self._transformations = kwargs.get('
|
|
452
|
-
self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh"])
|
|
453
455
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
454
456
|
|
|
455
457
|
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
@@ -485,8 +487,8 @@ class ObjectiveFunction(object):
|
|
|
485
487
|
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
486
488
|
#model_types = [[0]]
|
|
487
489
|
|
|
488
|
-
if self:
|
|
489
|
-
model_types = [[
|
|
490
|
+
if self.linear_regression:
|
|
491
|
+
model_types = [[1]]
|
|
490
492
|
self.grad_yes = False
|
|
491
493
|
|
|
492
494
|
print(f'Linear Model Selected: turning off gradient calculation')
|
|
@@ -494,6 +496,11 @@ class ObjectiveFunction(object):
|
|
|
494
496
|
|
|
495
497
|
model_t_dict = {'Poisson':0,
|
|
496
498
|
"NB":1}
|
|
499
|
+
if self.linear_regression:
|
|
500
|
+
# Rename key "NB" to "sigma" if it exists in the dictionary
|
|
501
|
+
if "NB" in model_t_dict:
|
|
502
|
+
model_t_dict["sigma"] = model_t_dict.pop("NB")
|
|
503
|
+
|
|
497
504
|
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
498
505
|
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
499
506
|
# Print the formatted result
|
|
@@ -503,6 +510,7 @@ class ObjectiveFunction(object):
|
|
|
503
510
|
|
|
504
511
|
self._model_type_codes = ['p', 'nb',
|
|
505
512
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
|
513
|
+
self.update_model_type_codes()
|
|
506
514
|
self._variable = [True] * len(self._discrete_values)
|
|
507
515
|
self._lower_bounds = [None] * \
|
|
508
516
|
len(self._discrete_values) # TODO have continus
|
|
@@ -522,7 +530,18 @@ class ObjectiveFunction(object):
|
|
|
522
530
|
|
|
523
531
|
self.solution_analyst = None
|
|
524
532
|
|
|
533
|
+
def update_model_type_codes(self):
|
|
534
|
+
if self.linear_regression:
|
|
535
|
+
# Recursively update all occurrences of 'nb' to 'sigma'
|
|
536
|
+
def replace_nb_with_sigma(item):
|
|
537
|
+
if isinstance(item, list):
|
|
538
|
+
return [replace_nb_with_sigma(sub_item) for sub_item in item]
|
|
539
|
+
elif item == 'nb':
|
|
540
|
+
return 'sigma'
|
|
541
|
+
return item
|
|
525
542
|
|
|
543
|
+
# Update the _model_type_codes list
|
|
544
|
+
self._model_type_codes = replace_nb_with_sigma(self._model_type_codes)
|
|
526
545
|
|
|
527
546
|
|
|
528
547
|
def over_ride_self(self, **kwargs):
|
|
@@ -584,6 +603,7 @@ class ObjectiveFunction(object):
|
|
|
584
603
|
self.set_defined_seed(42) # Set a specific seed
|
|
585
604
|
|
|
586
605
|
modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
|
|
606
|
+
self.significant = 1
|
|
587
607
|
self.makeRegression(modified_fit) # Perform regression with the modified fit
|
|
588
608
|
|
|
589
609
|
|
|
@@ -820,6 +840,10 @@ class ObjectiveFunction(object):
|
|
|
820
840
|
|
|
821
841
|
if dispersion == 0:
|
|
822
842
|
return None
|
|
843
|
+
if dispersion == 1:
|
|
844
|
+
return np.clip(np.exp(betas[-1]),None, 2)
|
|
845
|
+
|
|
846
|
+
|
|
823
847
|
elif dispersion == 2 or dispersion == 1:
|
|
824
848
|
if self.no_extra_param:
|
|
825
849
|
return self.nb_parma
|
|
@@ -1085,15 +1109,7 @@ class ObjectiveFunction(object):
|
|
|
1085
1109
|
[''] * (len(names) - len(self.transform_id_names))
|
|
1086
1110
|
self.coeff_names = names
|
|
1087
1111
|
|
|
1088
|
-
|
|
1089
|
-
if betas is not None:
|
|
1090
|
-
try:
|
|
1091
|
-
if len(betas) != len(names):
|
|
1092
|
-
print('standard_model', no_draws)
|
|
1093
|
-
|
|
1094
|
-
except Exception as e:
|
|
1095
|
-
print(e)
|
|
1096
|
-
'''
|
|
1112
|
+
|
|
1097
1113
|
|
|
1098
1114
|
|
|
1099
1115
|
|
|
@@ -1134,7 +1150,8 @@ class ObjectiveFunction(object):
|
|
|
1134
1150
|
print("-" * 80)
|
|
1135
1151
|
|
|
1136
1152
|
if solution is not None:
|
|
1137
|
-
|
|
1153
|
+
if self.is_multi:
|
|
1154
|
+
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1138
1155
|
|
|
1139
1156
|
self.pvalues = [self.round_with_padding(
|
|
1140
1157
|
x, 2) for x in self.pvalues]
|
|
@@ -1148,13 +1165,15 @@ class ObjectiveFunction(object):
|
|
|
1148
1165
|
self.zvalues = np.append(self.zvalues, 50)
|
|
1149
1166
|
|
|
1150
1167
|
elif self.coeff_[-1] < 0.25:
|
|
1151
|
-
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1152
|
-
print(np.exp(self.coeff_[-1]))
|
|
1168
|
+
#print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1169
|
+
print(f'dispession is para,aters {np.exp(self.coeff_[-1])}')
|
|
1153
1170
|
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1171
|
+
|
|
1154
1172
|
|
|
1155
1173
|
|
|
1174
|
+
|
|
1175
|
+
self.coeff_ = self.convert_coefficients(self.coeff_, model)
|
|
1156
1176
|
self.coeff_ = [self.round_with_padding(x, self.rounding_point) for x in self.coeff_]
|
|
1157
|
-
|
|
1158
1177
|
self.stderr = [self.round_with_padding(x, 2) for x in self.stderr]
|
|
1159
1178
|
self.zvalues = [self.round_with_padding(
|
|
1160
1179
|
x, 2) for x in self.zvalues]
|
|
@@ -1969,7 +1988,7 @@ class ObjectiveFunction(object):
|
|
|
1969
1988
|
subpvalues = pvalues.copy()
|
|
1970
1989
|
else:
|
|
1971
1990
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1972
|
-
|
|
1991
|
+
|
|
1973
1992
|
if pvalues[-1] > sig_value:
|
|
1974
1993
|
vio_counts += 1
|
|
1975
1994
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -2379,6 +2398,12 @@ class ObjectiveFunction(object):
|
|
|
2379
2398
|
else:
|
|
2380
2399
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
|
2381
2400
|
|
|
2401
|
+
def update_gbl_best(self, obj_1):
|
|
2402
|
+
'''Method to update the global best solution. Also sets the significant attribute to 1 if the global best is updated'''
|
|
2403
|
+
if self.gbl_best > obj_1[self._obj_1]:
|
|
2404
|
+
self.gbl_best = obj_1[self._obj_1]
|
|
2405
|
+
self.significant = 1
|
|
2406
|
+
|
|
2382
2407
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
|
2383
2408
|
obj_1 = 10.0 ** 4
|
|
2384
2409
|
obj_best = None
|
|
@@ -2405,7 +2430,7 @@ class ObjectiveFunction(object):
|
|
|
2405
2430
|
|
|
2406
2431
|
a = {}
|
|
2407
2432
|
obj_1, model_mod = self.makeRegression(model_nature, layout=layout, **a)
|
|
2408
|
-
|
|
2433
|
+
self.update_gbl_best(obj_1)
|
|
2409
2434
|
if self.pvalues is None:
|
|
2410
2435
|
self.reset_sln()
|
|
2411
2436
|
return obj_1
|
|
@@ -3255,6 +3280,35 @@ class ObjectiveFunction(object):
|
|
|
3255
3280
|
print('output', out)
|
|
3256
3281
|
return out
|
|
3257
3282
|
|
|
3283
|
+
def custom_betas_to_penalise(self, params, dispersion):
|
|
3284
|
+
num_params = self.get_num_params()
|
|
3285
|
+
skip_count = sum(num_params[:2])
|
|
3286
|
+
betas_start = params[:skip_count]
|
|
3287
|
+
if dispersion:
|
|
3288
|
+
betas_end = params[-dispersion:]
|
|
3289
|
+
betas_ = np.concatenate((betas_start,betas_end))
|
|
3290
|
+
return betas_
|
|
3291
|
+
else: return betas_start
|
|
3292
|
+
|
|
3293
|
+
|
|
3294
|
+
def convert_coefficients(self, params, dispersion):
|
|
3295
|
+
num_params = self.get_num_params()
|
|
3296
|
+
skip_count = sum(num_params[:2])
|
|
3297
|
+
remain_params = num_params[2:]
|
|
3298
|
+
params[skip_count:skip_count+remain_params[1]] = np.abs(params[skip_count:skip_count+remain_params[1]])
|
|
3299
|
+
return params
|
|
3300
|
+
|
|
3301
|
+
|
|
3302
|
+
|
|
3303
|
+
def custom_penalty(self, params, penalty):
|
|
3304
|
+
num_params = self.get_num_params()
|
|
3305
|
+
skip_count = sum(num_params[:2])
|
|
3306
|
+
|
|
3307
|
+
for i in params[skip_count:-1]:
|
|
3308
|
+
if i < 0.25:
|
|
3309
|
+
penalty += self.reg_penalty*np.maximum(0, 2.25 -i)**2
|
|
3310
|
+
return penalty
|
|
3311
|
+
|
|
3258
3312
|
# p is the paramaterisation GP1 is at 0
|
|
3259
3313
|
def general_poisson(self, mu, y, nu, p=0): # TODO laxywhere??
|
|
3260
3314
|
|
|
@@ -3917,8 +3971,10 @@ class ObjectiveFunction(object):
|
|
|
3917
3971
|
|
|
3918
3972
|
|
|
3919
3973
|
if dispersion:
|
|
3974
|
+
sigma = dispersion
|
|
3920
3975
|
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3921
3976
|
|
|
3977
|
+
epsilon = np.random.normal(loc=0, scale=sigma, size=eta.shape)
|
|
3922
3978
|
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3923
3979
|
#print('check if this holds size')
|
|
3924
3980
|
else:
|
|
@@ -3927,7 +3983,7 @@ class ObjectiveFunction(object):
|
|
|
3927
3983
|
|
|
3928
3984
|
|
|
3929
3985
|
if linear:
|
|
3930
|
-
eta = eta.astype('float')
|
|
3986
|
+
eta = eta.astype('float') +epsilon.astype('float')
|
|
3931
3987
|
return eta
|
|
3932
3988
|
|
|
3933
3989
|
|
|
@@ -4585,7 +4641,48 @@ class ObjectiveFunction(object):
|
|
|
4585
4641
|
|
|
4586
4642
|
brstd = br_std
|
|
4587
4643
|
|
|
4644
|
+
def _loglik_prefit(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
4645
|
+
return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
|
|
4646
|
+
zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None, model_nature=None, kwarg=None,
|
|
4647
|
+
**kwargs):
|
|
4648
|
+
"""Fixed and random parameters are handled separately to speed up the estimation and the results are concatenated.
|
|
4649
|
+
pass
|
|
4650
|
+
"""
|
|
4651
|
+
pass
|
|
4652
|
+
def _linear_logliklihood(self, y, eta, sigma):
|
|
4653
|
+
"""
|
|
4654
|
+
Calculate the log-likelihood for a linear regression model with random parameters.
|
|
4655
|
+
|
|
4656
|
+
Parameters:
|
|
4657
|
+
y (np.ndarray): Observed responses (n_samples,).
|
|
4658
|
+
eta (np.ndarray): Predicted values (linear predictor) (n_samples, 1, n_draws).
|
|
4659
|
+
sigma (float): Standard deviation of the error term.
|
|
4660
|
+
|
|
4661
|
+
Returns:
|
|
4662
|
+
float: The log-likelihood value aggregated across all draws.
|
|
4663
|
+
"""
|
|
4664
|
+
n_samples, _, n_draws = eta.shape # Number of observations and draws
|
|
4665
|
+
|
|
4666
|
+
# Repeat y to match the shape of eta
|
|
4667
|
+
y_repeated = np.repeat(y, n_draws, axis=2) # Shape (n_samples, 1, n_draws)
|
|
4588
4668
|
|
|
4669
|
+
# Calculate residuals for each draw
|
|
4670
|
+
residuals = y_repeated - eta # Shape (n_samples, 1, n_draws)
|
|
4671
|
+
|
|
4672
|
+
# Calculate the residual sum of squares (RSS) for each draw
|
|
4673
|
+
rss = np.sum(residuals ** 2, axis=(0, 1)) # Shape (n_draws,)
|
|
4674
|
+
|
|
4675
|
+
# Log-likelihood for each draw
|
|
4676
|
+
log_likelihood_per_draw = (
|
|
4677
|
+
-0.5 * n_samples * np.log(2 * np.pi) # Constant term
|
|
4678
|
+
- 0.5 * n_samples * np.log(sigma**2) # Variance term
|
|
4679
|
+
- 0.5 * rss / sigma**2 # Residual term
|
|
4680
|
+
) # Shape (n_draws,)
|
|
4681
|
+
|
|
4682
|
+
# Aggregate across draws (e.g., take the mean log-likelihood)
|
|
4683
|
+
log_likelihood_value = np.mean(log_likelihood_per_draw)
|
|
4684
|
+
|
|
4685
|
+
return log_likelihood_value
|
|
4589
4686
|
|
|
4590
4687
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
4591
4688
|
return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
|
|
@@ -4650,8 +4747,9 @@ class ObjectiveFunction(object):
|
|
|
4650
4747
|
|
|
4651
4748
|
if self.linear_regression:
|
|
4652
4749
|
# LINEAR MODEL PROCESS
|
|
4653
|
-
mse =
|
|
4654
|
-
|
|
4750
|
+
mse = self._linear_logliklihood(y, eVd, main_disper)
|
|
4751
|
+
#mse = np.mean((y - eVd) ** 2)
|
|
4752
|
+
return (-mse + penalty)*self.minimize_scaler
|
|
4655
4753
|
|
|
4656
4754
|
### GLM PROCESS ########
|
|
4657
4755
|
llf_main = self.loglik_obs(
|
|
@@ -4667,7 +4765,10 @@ class ObjectiveFunction(object):
|
|
|
4667
4765
|
|
|
4668
4766
|
loglik += 2*loglik
|
|
4669
4767
|
print('am i powering up')
|
|
4670
|
-
|
|
4768
|
+
|
|
4769
|
+
b_pen = self.custom_betas_to_penalise(betas, dispersion)
|
|
4770
|
+
penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
|
|
4771
|
+
penalty = self.custom_penalty(betas, penalty)
|
|
4671
4772
|
|
|
4672
4773
|
if not np.isreal(loglik):
|
|
4673
4774
|
loglik = - 10000000.0
|
|
@@ -4884,12 +4985,24 @@ class ObjectiveFunction(object):
|
|
|
4884
4985
|
betas_hetro_sd = None
|
|
4885
4986
|
|
|
4886
4987
|
Vdr = dev.cust_einsum("njk,nkr -> njr", Xdr, Br) # (N,P,R)
|
|
4887
|
-
if self:
|
|
4988
|
+
if self.linear_regression:
|
|
4888
4989
|
### LINEAR MODEL WAY #######
|
|
4889
4990
|
eVd = np.clip(
|
|
4890
4991
|
Vdf[:, :, None] + Vdr + Vdh + dev.np.array(offset), None, None)
|
|
4891
|
-
|
|
4892
|
-
|
|
4992
|
+
main_disper = self.get_dispersion_paramaters(betas, dispersion)
|
|
4993
|
+
penalty, main_disper = self._penalty_dispersion(
|
|
4994
|
+
dispersion, main_disper, eVd, y, penalty, model_nature)
|
|
4995
|
+
error_term = np.random.normal(loc=0, scale=main_disper, size=eVd.shape)
|
|
4996
|
+
b_pen = self.custom_betas_to_penalise(betas, dispersion)
|
|
4997
|
+
penalty += self.regularise_l2(b_pen) + self.regularise_l1(b_pen)
|
|
4998
|
+
#penalty = 0
|
|
4999
|
+
penalty = self.custom_penalty(betas, penalty)
|
|
5000
|
+
# LINEAR MODEL PROCESS
|
|
5001
|
+
mse = self._linear_logliklihood(y, eVd, main_disper)
|
|
5002
|
+
#mse = np.mean((y - eVd) ** 2)
|
|
5003
|
+
|
|
5004
|
+
return -mse + penalty
|
|
5005
|
+
|
|
4893
5006
|
|
|
4894
5007
|
##### GLM WAY #####
|
|
4895
5008
|
eVd = dev.np.exp(np.clip(
|
|
@@ -4955,7 +5068,7 @@ class ObjectiveFunction(object):
|
|
|
4955
5068
|
if self.power_up_ll:
|
|
4956
5069
|
penalty += self.regularise_l2(betas)
|
|
4957
5070
|
|
|
4958
|
-
penalty += self.regularise_l2(betas)
|
|
5071
|
+
penalty += self.regularise_l2(betas) + self.regularise_l1(betas)
|
|
4959
5072
|
if not return_gradient:
|
|
4960
5073
|
|
|
4961
5074
|
output = ((-loglik + penalty)*self.minimize_scaler,)
|
|
@@ -5018,6 +5131,11 @@ class ObjectiveFunction(object):
|
|
|
5018
5131
|
else:
|
|
5019
5132
|
return -self.reg_penalty*sum(np.square(betas.copy()))
|
|
5020
5133
|
|
|
5134
|
+
def regularise_l1(self, betas, backwards = False):
|
|
5135
|
+
if backwards == False:
|
|
5136
|
+
return self.reg_penalty*sum(np.square(betas.copy()))
|
|
5137
|
+
else:
|
|
5138
|
+
return -self.reg_penalty*sum(np.abs(betas.copy()))
|
|
5021
5139
|
|
|
5022
5140
|
def _concat_gradients(self, gr_f):
|
|
5023
5141
|
gr = np.concatenate((gr_f), axis=1)
|
|
@@ -5453,20 +5571,36 @@ class ObjectiveFunction(object):
|
|
|
5453
5571
|
# self.grad_n = optim_res['grad_n']
|
|
5454
5572
|
# self.total_fun_eval = optim_res['nfev']2
|
|
5455
5573
|
|
|
5574
|
+
def handle_covariance(self, covariance):
|
|
5575
|
+
"""
|
|
5576
|
+
Safely handle covariance matrix, converting it to a dense NumPy array if needed.
|
|
5577
|
+
|
|
5578
|
+
Parameters:
|
|
5579
|
+
covariance: The covariance matrix, which may be a `LbfgsInvHessProduct`.
|
|
5580
|
+
|
|
5581
|
+
Returns:
|
|
5582
|
+
A dense NumPy array of the covariance matrix.
|
|
5583
|
+
"""
|
|
5584
|
+
# Check if the covariance is an `LbfgsInvHessProduct`
|
|
5585
|
+
if hasattr(covariance, "todense"):
|
|
5586
|
+
# Convert to a dense NumPy array
|
|
5587
|
+
covariance = covariance.todense()
|
|
5588
|
+
return covariance
|
|
5589
|
+
|
|
5590
|
+
|
|
5456
5591
|
def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
|
|
5457
5592
|
# sample_size = len(self._x_data) - len(optim_res['x']) -1
|
|
5458
5593
|
sample_size = len(self._x_data)
|
|
5459
5594
|
convergence = optim_res['success']
|
|
5460
5595
|
coeff_ = optim_res['x']
|
|
5461
5596
|
penalty = 0
|
|
5462
|
-
|
|
5463
|
-
if abs(i) > 120:
|
|
5464
|
-
penalty += abs(i)
|
|
5597
|
+
|
|
5465
5598
|
if 'hess_inv' in optim_res:
|
|
5466
5599
|
covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
|
|
5467
5600
|
if robust else optim_res['hess_inv']
|
|
5468
5601
|
else:
|
|
5469
5602
|
covariance = np.diag(np.ones(len(optim_res.x)))
|
|
5603
|
+
covariance = self.handle_covariance(covariance)
|
|
5470
5604
|
covariance = np.clip(covariance, 0, None)
|
|
5471
5605
|
stderr = np.sqrt(np.diag(covariance))
|
|
5472
5606
|
# stderr = [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
|
|
@@ -5474,16 +5608,7 @@ class ObjectiveFunction(object):
|
|
|
5474
5608
|
# stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
|
|
5475
5609
|
if is_dispersion:
|
|
5476
5610
|
stderr[-1] = random.uniform(0.001, 0.005)
|
|
5477
|
-
|
|
5478
|
-
# gets the number of parmas before the correlations
|
|
5479
|
-
pre_cor_pams = sum(self.get_num_params()[:3])
|
|
5480
|
-
# gets the number of correlated rpm
|
|
5481
|
-
post_cor_pams = sum(self.get_num_params()[:5])
|
|
5482
|
-
|
|
5483
|
-
|
|
5484
|
-
# this calculation takes into account the correlated rpms distinct values
|
|
5485
|
-
for i in range(pre_cor_pams, post_cor_pams):
|
|
5486
|
-
stderr[i] = stderr[i] / np.sqrt(sample_size)
|
|
5611
|
+
|
|
5487
5612
|
|
|
5488
5613
|
if np.isnan(stderr).any():
|
|
5489
5614
|
raise ValueError("Error: Matrix contains NaN values")
|
|
@@ -5496,6 +5621,9 @@ class ObjectiveFunction(object):
|
|
|
5496
5621
|
optim_res['fun'] = 10.0 ** 10
|
|
5497
5622
|
if self.power_up_ll:
|
|
5498
5623
|
loglikelihood =-optim_res['fun']/2 - penalty
|
|
5624
|
+
elif self.linear_regression:
|
|
5625
|
+
loglikelihood= -optim_res['fun']
|
|
5626
|
+
|
|
5499
5627
|
else:
|
|
5500
5628
|
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
|
5501
5629
|
|
|
@@ -5542,10 +5670,9 @@ class ObjectiveFunction(object):
|
|
|
5542
5670
|
self.none_handler(self.rdm_cor_fit) + \
|
|
5543
5671
|
self.get_dispersion_name(dispersion)
|
|
5544
5672
|
return a
|
|
5545
|
-
|
|
5546
|
-
def
|
|
5673
|
+
|
|
5674
|
+
def fitRegression_prefit(self, mod,
|
|
5547
5675
|
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5548
|
-
|
|
5549
5676
|
"""
|
|
5550
5677
|
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
5551
5678
|
if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
|
|
@@ -5581,7 +5708,714 @@ class ObjectiveFunction(object):
|
|
|
5581
5708
|
zvalues = None
|
|
5582
5709
|
if mod.get('Xr') is not None or mod.get('XG') is not None or mod.get('XH') is not None:
|
|
5583
5710
|
calc_gradient = True
|
|
5711
|
+
calc_gradient = False if self.linear_regression else True
|
|
5712
|
+
n, p, k = mod.get('X').shape
|
|
5713
|
+
_r, pr, kr = mod.get('Xr').shape
|
|
5714
|
+
kh = mod.get('XH').shape[2]
|
|
5715
|
+
|
|
5716
|
+
if 'XG' in mod:
|
|
5717
|
+
_g, pg, kg = mod.get('XG').shape
|
|
5718
|
+
else:
|
|
5719
|
+
_g, pg, kg = 0, 0, 0
|
|
5720
|
+
|
|
5721
|
+
dispersion_param_num = self.is_dispersion(dispersion)
|
|
5722
|
+
if self.no_extra_param:
|
|
5723
|
+
dispersion_param_num =0
|
|
5724
|
+
|
|
5725
|
+
#paramNum = self.get_param_num(dispersion)
|
|
5726
|
+
self.no_random_paramaters = 0
|
|
5727
|
+
if 'XG' in mod:
|
|
5728
|
+
XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
|
5729
|
+
elif 'XH' in mod:
|
|
5730
|
+
XX = np.concatenate((mod.get('X'), mod.get('Xr'), mod.get('XH')), axis=2)
|
|
5731
|
+
else:
|
|
5732
|
+
XX = np.concatenate((mod.get('X'), mod.get('Xr')), axis=2)
|
|
5733
|
+
|
|
5734
|
+
if self.is_multi:
|
|
5735
|
+
if mod.get('X_test') is not None and mod.get('Xr_test') is not None:
|
|
5736
|
+
if 'XH' in mod:
|
|
5737
|
+
XX_test = np.concatenate((mod.get('X_test'), mod.get('Xr_test'), mod.get('XH_test')),
|
|
5738
|
+
axis=2)
|
|
5739
|
+
else:
|
|
5740
|
+
XX_test = np.concatenate((mod.get('X_test'), mod.get('Xr_test')), axis=2)
|
|
5741
|
+
|
|
5742
|
+
|
|
5743
|
+
|
|
5744
|
+
else:
|
|
5745
|
+
|
|
5746
|
+
XX = mod.get('Xr')
|
|
5747
|
+
if mod.get('Xr_test') is not None:
|
|
5748
|
+
XX_test = mod.get('Xr_test')
|
|
5749
|
+
|
|
5750
|
+
bb = np.random.uniform(
|
|
5751
|
+
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
|
|
5752
|
+
|
|
5753
|
+
if method == 'L-BFGS-B':
|
|
5754
|
+
if dispersion == 0:
|
|
5755
|
+
bounds = []
|
|
5756
|
+
for i in bb:
|
|
5757
|
+
bounds = bounds + [(i - 30, i + 30)]
|
|
5758
|
+
|
|
5759
|
+
# bound = [(-100,100) ]*len(b)
|
|
5760
|
+
|
|
5761
|
+
elif dispersion == 1: # TODO test bounds was NOne
|
|
5762
|
+
bounds = []
|
|
5763
|
+
for i in bb[:-1]:
|
|
5764
|
+
bounds = bounds + [(i - 30, i + 30)]
|
|
5765
|
+
bounds = bounds + [(-1, 5)]
|
|
5766
|
+
|
|
5767
|
+
elif dispersion == 2:
|
|
5768
|
+
bounds = []
|
|
5769
|
+
for i in bb[:-1]:
|
|
5770
|
+
bounds = bounds + [(i - 5, i + 5)]
|
|
5771
|
+
bounds = bounds + [(0.1, .99)]
|
|
5772
|
+
|
|
5773
|
+
else:
|
|
5774
|
+
bounds = None
|
|
5775
|
+
else:
|
|
5776
|
+
bb[0] = self.constant_value
|
|
5777
|
+
if dispersion == 1:
|
|
5778
|
+
if not self.no_extra_param:
|
|
5779
|
+
bb[-1] = self.negative_binomial_value
|
|
5780
|
+
bounds = None
|
|
5781
|
+
|
|
5782
|
+
|
|
5783
|
+
|
|
5784
|
+
# intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
|
|
5785
|
+
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
|
5786
|
+
|
|
5787
|
+
if self.no_extra_param:
|
|
5788
|
+
dispersion_poisson = 0
|
|
5789
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5790
|
+
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
|
5791
|
+
dispersion_poisson, 0, False, 0, None, None, None, None, None,
|
|
5792
|
+
mod),
|
|
5793
|
+
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
|
5794
|
+
bounds=bounds)
|
|
5795
|
+
if dispersion:
|
|
5796
|
+
try:
|
|
5797
|
+
nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
|
|
5798
|
+
except:
|
|
5799
|
+
nb_parma = 0.5
|
|
5800
|
+
|
|
5801
|
+
if method2 == 'L-BFGS-B':
|
|
5802
|
+
if hasattr(initial_beta.hess_inv, 'todense'):
|
|
5803
|
+
initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
|
|
5804
|
+
'todense') else np.array(
|
|
5805
|
+
[initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
|
|
5806
|
+
|
|
5807
|
+
bb = initial_beta['x'].copy()
|
|
5808
|
+
|
|
5809
|
+
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
|
5810
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5811
|
+
args=(XX, y, None, None, None, None, True, True, dispersion,
|
|
5812
|
+
0, False, 0, None, None, None, None, None, mod),
|
|
5813
|
+
method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
|
|
5814
|
+
|
|
5815
|
+
if initial_beta is not None and not np.isnan(initial_beta['fun']):
|
|
5816
|
+
self._no_random_paramaters = 1
|
|
5817
|
+
if initial_beta['success'] != 0:
|
|
5818
|
+
self.convergance = 0
|
|
5819
|
+
else:
|
|
5820
|
+
self.convergance = 1
|
|
5821
|
+
print('TODO NEED TO RETURN THE THINGS I CARE ABOUT')
|
|
5822
|
+
else:
|
|
5823
|
+
|
|
5824
|
+
is_halton = 0
|
|
5825
|
+
|
|
5826
|
+
print('Solution was not finite, error. Continue')
|
|
5827
|
+
sol.add_objective()
|
|
5828
|
+
return sol, None, None, None, None, None, None, 0
|
|
5829
|
+
except Exception as e:
|
|
5830
|
+
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
5831
|
+
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
5832
|
+
print(exc_type, fname, exc_tb.tb_lineno)
|
|
5833
|
+
|
|
5834
|
+
def fitRegression_in_chunks(self, mod,dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5835
|
+
"""
|
|
5836
|
+
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
5837
|
+
if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
|
|
5838
|
+
|
|
5839
|
+
#TODO lineraregression
|
|
5840
|
+
Inputs:
|
|
5841
|
+
X - array. Design matrix
|
|
5842
|
+
y - array. Observed outcomes
|
|
5843
|
+
Outputs:
|
|
5844
|
+
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
|
5845
|
+
"""
|
|
5846
|
+
# Set defualt method
|
|
5847
|
+
#TODO, the inital fit worked but it throws
|
|
5848
|
+
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
5849
|
+
|
|
5850
|
+
sol = Solution()
|
|
5851
|
+
|
|
5852
|
+
initial_betas = self.fitRegression_prefit(mod, dispersion, maxiter, batch_size, num_hess, **kwargs)
|
|
5853
|
+
|
|
5854
|
+
|
|
5855
|
+
|
|
5856
|
+
|
|
5857
|
+
def _build_design_matrix(self, mod):
|
|
5858
|
+
"""
|
|
5859
|
+
Build the design matrix `XX` by combining `X`, `Xr`, `XG`, and `XH`.
|
|
5860
|
+
|
|
5861
|
+
Parameters:
|
|
5862
|
+
mod: Dictionary containing data and parameters.
|
|
5863
|
+
|
|
5864
|
+
Returns:
|
|
5865
|
+
Combined design matrix `XX`.
|
|
5866
|
+
"""
|
|
5867
|
+
X, Xr, XG, XH = mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
|
|
5868
|
+
if XG is not None:
|
|
5869
|
+
return np.concatenate((X, XG, Xr, XH), axis=2)
|
|
5870
|
+
elif XH is not None:
|
|
5871
|
+
return np.concatenate((X, Xr, XH), axis=2)
|
|
5872
|
+
else:
|
|
5873
|
+
return np.concatenate((X, Xr), axis=2)
|
|
5874
|
+
|
|
5875
|
+
|
|
5876
|
+
def _update_attributes(self, optimization_result, mod):
|
|
5877
|
+
"""
|
|
5878
|
+
Update instance attributes like `self.significant` and `self.draws`.
|
|
5879
|
+
|
|
5880
|
+
Parameters:
|
|
5881
|
+
optimization_result: The result of the optimization process.
|
|
5882
|
+
mod: The model dictionary containing data and parameters.
|
|
5883
|
+
"""
|
|
5884
|
+
# Update `self.significant` based on p-values or other criteria
|
|
5885
|
+
if optimization_result is not None:
|
|
5886
|
+
significant_threshold = 0.05 # Example threshold for significance
|
|
5887
|
+
self.significant = all(
|
|
5888
|
+
p < significant_threshold for p in mod.get("pvalues", [])
|
|
5889
|
+
)
|
|
5890
|
+
else:
|
|
5891
|
+
self.significant = False # Mark as not significant if optimization failed
|
|
5892
|
+
|
|
5893
|
+
# Update `self.draws` based on `mod` or other factors
|
|
5894
|
+
if "Xr" in mod:
|
|
5895
|
+
Xr = mod.get("Xr")
|
|
5896
|
+
draws = Xr.shape[0] if Xr is not None else 0 # Example: Number of rows in Xr
|
|
5897
|
+
self.draws = draws
|
|
5898
|
+
else:
|
|
5899
|
+
self.draws = 0
|
|
5900
|
+
|
|
5901
|
+
def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
|
|
5902
|
+
"""
|
|
5903
|
+
Run the optimization process with draws logic and update the Solution object.
|
|
5904
|
+
|
|
5905
|
+
Parameters:
|
|
5906
|
+
XX: Design matrix.
|
|
5907
|
+
y: Observed outcomes.
|
|
5908
|
+
dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
|
|
5909
|
+
initial_params: Initial parameter array.
|
|
5910
|
+
bounds: List of bounds for each parameter.
|
|
5911
|
+
tol: Tolerance for the optimization process (dictionary with ftol and gtol).
|
|
5912
|
+
mod: Dictionary containing additional data.
|
|
5913
|
+
|
|
5914
|
+
Returns:
|
|
5915
|
+
Solution object with updated objectives.
|
|
5916
|
+
"""
|
|
5917
|
+
# Extract relevant data
|
|
5918
|
+
X, Xr, XG = mod.get('X'), mod.get('Xr'), mod.get('XG')
|
|
5919
|
+
distribution = mod.get('dist_fit')
|
|
5920
|
+
|
|
5921
|
+
# Prepare draws
|
|
5922
|
+
draws = self._prepare_draws(Xr, distribution)
|
|
5923
|
+
draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
|
|
5924
|
+
|
|
5925
|
+
# Optimization method and options
|
|
5926
|
+
method = self.method_ll if bounds is None else 'L-BFGS-B'
|
|
5927
|
+
print('updataing methods')
|
|
5928
|
+
|
|
5929
|
+
#method = 'Nelder-Mead-BFGS'
|
|
5930
|
+
options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
|
|
5931
|
+
args=(
|
|
5932
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5933
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
|
5934
|
+
)
|
|
5935
|
+
# Run optimization
|
|
5936
|
+
optimization_result = self._minimize(
|
|
5937
|
+
self._loglik_gradient,
|
|
5938
|
+
initial_params,
|
|
5939
|
+
args=(
|
|
5940
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5941
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod
|
|
5942
|
+
),
|
|
5943
|
+
method=method,
|
|
5944
|
+
bounds=bounds,
|
|
5945
|
+
tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
|
|
5946
|
+
options=options
|
|
5947
|
+
)
|
|
5948
|
+
|
|
5949
|
+
|
|
5950
|
+
|
|
5951
|
+
# Run the bootstrap to calculate standard errors
|
|
5952
|
+
if self.run_bootstrap:
|
|
5953
|
+
|
|
5954
|
+
std_errors = self.bootstrap_std_dev(
|
|
5955
|
+
initial_params=optimization_result.x,
|
|
5956
|
+
XX=XX,
|
|
5957
|
+
y=y,
|
|
5958
|
+
dispersion=dispersion,
|
|
5959
|
+
bounds=bounds,
|
|
5960
|
+
tol=tol,
|
|
5961
|
+
mod=mod,
|
|
5962
|
+
n_bootstraps=100
|
|
5963
|
+
)
|
|
5964
|
+
self.stderr = std_errors
|
|
5965
|
+
|
|
5966
|
+
|
|
5967
|
+
|
|
5968
|
+
|
|
5969
|
+
return optimization_result
|
|
5970
|
+
|
|
5971
|
+
|
|
5972
|
+
|
|
5973
|
+
|
|
5974
|
+
def bootstrap_std_dev(self, initial_params, XX, y, dispersion, bounds, tol, mod, n_bootstraps=100):
|
|
5975
|
+
"""
|
|
5976
|
+
Perform bootstrap resampling to estimate the standard deviations of the parameters.
|
|
5977
|
+
|
|
5978
|
+
Parameters:
|
|
5979
|
+
self: Reference to the class instance.
|
|
5980
|
+
initial_params: Initial parameter estimates from the optimization.
|
|
5981
|
+
XX: Design matrix.
|
|
5982
|
+
y: Observed outcomes.
|
|
5983
|
+
dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
|
|
5984
|
+
bounds: List of bounds for each parameter.
|
|
5985
|
+
tol: Tolerance for the optimization process (dictionary with ftol and gtol).
|
|
5986
|
+
mod: Dictionary containing additional data.
|
|
5987
|
+
n_bootstraps: Number of bootstrap resamples (default=100).
|
|
5988
|
+
|
|
5989
|
+
Returns:
|
|
5990
|
+
std_devs: Standard deviations of the parameter estimates (from bootstrap resampling).
|
|
5991
|
+
"""
|
|
5992
|
+
# List to store parameter estimates from each bootstrap iteration
|
|
5993
|
+
bootstrap_estimates = []
|
|
5994
|
+
|
|
5995
|
+
# Extract design matrices and additional components from `mod`
|
|
5996
|
+
X, Xr, XG = mod.get('X'), mod.get('Xr'), mod.get('XG')
|
|
5997
|
+
distribution = mod.get('dist_fit')
|
|
5998
|
+
|
|
5999
|
+
# Prepare draws
|
|
6000
|
+
draws = self._prepare_draws(Xr, distribution)
|
|
6001
|
+
draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
|
|
6002
|
+
|
|
6003
|
+
# Perform bootstrap iterations
|
|
6004
|
+
for _ in range(n_bootstraps):
|
|
6005
|
+
# Resample data with replacement
|
|
6006
|
+
indices = np.random.choice(len(y), size=len(y), replace=True)
|
|
6007
|
+
X_resampled = X[indices]
|
|
6008
|
+
y_resampled = y[indices]
|
|
6009
|
+
|
|
6010
|
+
# Refit the model with resampled data
|
|
6011
|
+
bootstrap_result = self._minimize(
|
|
6012
|
+
self._loglik_gradient,
|
|
6013
|
+
initial_params,
|
|
6014
|
+
args=(
|
|
6015
|
+
X_resampled, y_resampled, draws, X_resampled, Xr, self.batch_size, self.grad_yes,
|
|
6016
|
+
self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, None, None,
|
|
6017
|
+
draws_grouped, XG, mod
|
|
6018
|
+
),
|
|
6019
|
+
method=self.method_ll,
|
|
6020
|
+
bounds=bounds,
|
|
6021
|
+
tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
|
|
6022
|
+
options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
|
|
6023
|
+
)
|
|
6024
|
+
|
|
6025
|
+
# Store the parameter estimates from this bootstrap iteration
|
|
6026
|
+
bootstrap_estimates.append(bootstrap_result.x)
|
|
6027
|
+
|
|
6028
|
+
# Convert bootstrap parameter estimates to a NumPy array
|
|
6029
|
+
bootstrap_estimates = np.array(bootstrap_estimates)
|
|
6030
|
+
|
|
6031
|
+
# Compute the standard deviations of the parameter estimates
|
|
6032
|
+
std_devs = np.std(bootstrap_estimates, axis=0)
|
|
6033
|
+
|
|
6034
|
+
return std_devs
|
|
6035
|
+
|
|
6036
|
+
def _initialize_params_and_bounds(self, XX, dispersion):
|
|
6037
|
+
"""Initialize parameters and set bounds for optimization."""
|
|
6038
|
+
num_params = XX.shape[2] # Number of features
|
|
6039
|
+
initial_params = np.random.uniform(-0.05, 0.05, size=num_params)
|
|
6040
|
+
|
|
6041
|
+
# Define bounds for optimization
|
|
6042
|
+
if dispersion == 0:
|
|
6043
|
+
bounds = [(-30, 30) for _ in initial_params]
|
|
6044
|
+
elif dispersion == 1:
|
|
6045
|
+
bounds = [(-30, 30) for _ in initial_params[:-1]] + [(-1, 5)]
|
|
6046
|
+
elif dispersion == 2:
|
|
6047
|
+
bounds = [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
|
|
6048
|
+
else:
|
|
6049
|
+
bounds = None
|
|
6050
|
+
|
|
6051
|
+
return initial_params, bounds
|
|
6052
|
+
|
|
6053
|
+
|
|
6054
|
+
def _prepare_data(self, mod):
|
|
6055
|
+
"""Prepare data matrices (XX, XX_test) and outcomes (y, y_test)."""
|
|
6056
|
+
y = mod.get('y')
|
|
6057
|
+
y_test = mod.get('y_test')
|
|
6058
|
+
|
|
6059
|
+
# Combine main data matrices
|
|
6060
|
+
XX = self._combine_data_matrices(mod)
|
|
6061
|
+
|
|
6062
|
+
# Combine test data matrices
|
|
6063
|
+
if mod.get('X_test') is not None and mod.get('Xr_test') is not None:
|
|
6064
|
+
if 'XH' in mod:
|
|
6065
|
+
XX_test = np.concatenate(
|
|
6066
|
+
(mod.get('X_test'), mod.get('Xr_test'), mod.get('XH_test')), axis=2
|
|
6067
|
+
)
|
|
6068
|
+
else:
|
|
6069
|
+
XX_test = np.concatenate((mod.get('X_test'), mod.get('Xr_test')), axis=2)
|
|
6070
|
+
else:
|
|
6071
|
+
XX_test = None
|
|
6072
|
+
|
|
6073
|
+
return XX, XX_test, y, y_test
|
|
6074
|
+
|
|
6075
|
+
def _handle_error(self, e):
|
|
6076
|
+
"""Handle exceptions and log errors."""
|
|
6077
|
+
import sys, os
|
|
6078
|
+
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
6079
|
+
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
6080
|
+
print(f"Error: {e}, File: {fname}, Line: {exc_tb.tb_lineno}")
|
|
6081
|
+
|
|
6082
|
+
|
|
6083
|
+
|
|
6084
|
+
|
|
6085
|
+
|
|
6086
|
+
def _postprocess_results(self, optimization_result, XX, XX_test, y, y_test, dispersion, mod):
|
|
6087
|
+
"""
|
|
6088
|
+
Process optimization results and calculate metrics.
|
|
6089
|
+
|
|
6090
|
+
Parameters:
|
|
6091
|
+
optimization_result: The result of the optimization process.
|
|
6092
|
+
XX: Design matrix for training data.
|
|
6093
|
+
XX_test: Design matrix for test data (if applicable).
|
|
6094
|
+
y: Observed outcomes for training data.
|
|
6095
|
+
y_test: Observed outcomes for test data (if applicable).
|
|
6096
|
+
dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
|
|
6097
|
+
mod: Dictionary containing additional model data.
|
|
6098
|
+
|
|
6099
|
+
Returns:
|
|
6100
|
+
log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
|
|
6101
|
+
"""
|
|
6102
|
+
if optimization_result is not None and np.isfinite(optimization_result['fun']):
|
|
6103
|
+
# Calculate post-fit metrics (log-likelihood, AIC, BIC, etc.)
|
|
6104
|
+
log_ll, aic, bic, stderr, zvalues, pvalue_alt, other_measures = self._post_fit_ll_aic_bic(
|
|
6105
|
+
optimization_result, simple_fit=False, is_dispersion=dispersion
|
|
6106
|
+
)
|
|
6107
|
+
|
|
6108
|
+
# Validation metrics if test data is available (in-sample and out-of-sample MAE)
|
|
6109
|
+
in_sample_mae = None
|
|
6110
|
+
out_sample_mae = None
|
|
6111
|
+
if self.is_multi and XX_test is not None:
|
|
6112
|
+
in_sample_mae = self.validation(
|
|
6113
|
+
optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
|
|
6114
|
+
)
|
|
6115
|
+
out_sample_mae = self.validation(
|
|
6116
|
+
optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
|
|
6117
|
+
)
|
|
6118
|
+
|
|
6119
|
+
return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
|
|
6120
|
+
|
|
6121
|
+
else:
|
|
6122
|
+
# Optimization failed, return None for all metrics
|
|
6123
|
+
print("Optimization failed.")
|
|
6124
|
+
return None, None, None, None, None, None, None, None
|
|
6125
|
+
def _prepare_data_and_bounds(self, mod, dispersion):
|
|
6126
|
+
"""Prepare the data matrices, bounds, and initial parameters."""
|
|
6127
|
+
# Prepare data matrices
|
|
6128
|
+
XX = self._combine_data_matrices(mod) # Combine mod['X'], mod['Xr'], mod['XH'], etc.
|
|
6129
|
+
|
|
6130
|
+
# Set initial parameters
|
|
6131
|
+
initial_params = self._initialize_parameters(XX, dispersion)
|
|
6132
|
+
|
|
6133
|
+
# Define bounds for optimization
|
|
6134
|
+
bounds = self._set_bounds(initial_params, dispersion)
|
|
6135
|
+
|
|
6136
|
+
return XX, bounds, initial_params
|
|
6137
|
+
|
|
6138
|
+
|
|
6139
|
+
def _combine_data_matrices(self, mod):
|
|
6140
|
+
"""Combine data matrices (X, Xr, XH, etc.) into a single matrix."""
|
|
6141
|
+
if 'XG' in mod:
|
|
6142
|
+
return np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
|
6143
|
+
elif 'XH' in mod:
|
|
6144
|
+
return np.concatenate((mod.get('X'), mod.get('Xr'), mod.get('XH')), axis=2)
|
|
6145
|
+
else:
|
|
6146
|
+
return np.concatenate((mod.get('X'), mod.get('Xr')), axis=2)
|
|
6147
|
+
|
|
6148
|
+
|
|
6149
|
+
def _initialize_parameters(self, XX, dispersion):
|
|
6150
|
+
"""Initialize random parameters for optimization."""
|
|
6151
|
+
num_params = XX.shape[2] # Number of features
|
|
6152
|
+
return np.random.uniform(-0.05, 0.05, size=num_params)
|
|
6153
|
+
|
|
6154
|
+
|
|
6155
|
+
def _set_bounds(self, initial_params, dispersion):
|
|
6156
|
+
"""Set bounds for optimization based on the dispersion type."""
|
|
6157
|
+
return None
|
|
6158
|
+
if dispersion == 0:
|
|
6159
|
+
return [(-30, 30) for _ in initial_params]
|
|
6160
|
+
elif dispersion == 1:
|
|
6161
|
+
num_params = self.get_num_params()
|
|
6162
|
+
skip_count = sum(num_params[:2])
|
|
6163
|
+
|
|
6164
|
+
|
|
6165
|
+
bounds = [(-3, 3) for _ in initial_params[:-1]] + [(-1, 1)]
|
|
6166
|
+
bounds[skip_count: -1] = [(0.02, None) for _ in bounds[skip_count: -1]]
|
|
6167
|
+
return bounds
|
|
6168
|
+
elif dispersion == 2:
|
|
6169
|
+
return [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
|
|
6170
|
+
else:
|
|
6171
|
+
return None
|
|
6172
|
+
def _build_test_matrix(self, mod):
|
|
6173
|
+
"""
|
|
6174
|
+
Build the test matrix `XX_test` by combining `X_test`, `Xr_test`, and `XH_test`.
|
|
6175
|
+
|
|
6176
|
+
Parameters:
|
|
6177
|
+
mod: Dictionary containing test data.
|
|
6178
|
+
|
|
6179
|
+
Returns:
|
|
6180
|
+
Combined test matrix `XX_test`.
|
|
6181
|
+
"""
|
|
6182
|
+
X_test, Xr_test, XG_test, XH_test = (
|
|
6183
|
+
mod.get('X_test'), mod.get('Xr_test'), mod.get('XG_test'), mod.get('XH_test')
|
|
6184
|
+
)
|
|
6185
|
+
if X_test is None or Xr_test is None:
|
|
6186
|
+
return None
|
|
6187
|
+
|
|
6188
|
+
if XH_test is not None:
|
|
6189
|
+
return np.concatenate((X_test, Xr_test, XH_test), axis=2)
|
|
6190
|
+
elif XG_test is not None:
|
|
6191
|
+
return np.concatenate((X_test, XG_test, Xr_test), axis=2)
|
|
6192
|
+
else:
|
|
6193
|
+
return np.concatenate((X_test, Xr_test), axis=2)
|
|
6194
|
+
|
|
6195
|
+
def _calculate_num_coefficients(self, mod, dispersion):
|
|
6196
|
+
"""
|
|
6197
|
+
Calculate the total number of coefficients for the regression model.
|
|
6198
|
+
|
|
6199
|
+
Parameters:
|
|
6200
|
+
mod: Dictionary containing data and parameters.
|
|
6201
|
+
dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
|
|
6202
|
+
|
|
6203
|
+
Returns:
|
|
6204
|
+
Total number of coefficients.
|
|
6205
|
+
"""
|
|
6206
|
+
X, Xr, XG, XH = mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
|
|
6207
|
+
n, p, k = X.shape
|
|
6208
|
+
kr = Xr.shape[2] if Xr is not None else 0
|
|
6209
|
+
kg = XG.shape[2] if XG is not None else 0
|
|
6210
|
+
kh = XH.shape[2] if XH is not None else 0
|
|
6211
|
+
|
|
6212
|
+
# Dispersion adds one additional parameter if enabled
|
|
6213
|
+
dispersion_param = 1 if dispersion > 0 else 0
|
|
6214
|
+
return sum(self.get_num_params()) + dispersion_param
|
|
6215
|
+
#return k + kr + kg + kh + dispersion_param
|
|
6216
|
+
def _build_initial_params(self, num_coefficients, dispersion):
|
|
6217
|
+
"""
|
|
6218
|
+
Build the initial parameter array for optimization.
|
|
6219
|
+
|
|
6220
|
+
Parameters:
|
|
6221
|
+
num_coefficients: Total number of coefficients.
|
|
6222
|
+
dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
|
|
6223
|
+
|
|
6224
|
+
Returns:
|
|
6225
|
+
Initial parameter array.
|
|
6226
|
+
"""
|
|
6227
|
+
# Generate random initial coefficients
|
|
6228
|
+
initial_params = np.random.uniform(-.1, 0.1, size=num_coefficients)
|
|
6229
|
+
parma_sum = sum(self.get_num_params()[:2])
|
|
6230
|
+
|
|
6231
|
+
|
|
6232
|
+
initial_params[parma_sum:-dispersion] =0.5
|
|
6233
|
+
|
|
6234
|
+
# Add dispersion parameter if applicable
|
|
6235
|
+
if dispersion > 0:
|
|
6236
|
+
initial_params[-1] = 0.0
|
|
6237
|
+
#initial_params[0] =3
|
|
6238
|
+
|
|
6239
|
+
return initial_params
|
|
6240
|
+
|
|
6241
|
+
def fitRegression(self, mod, dispersion=0, maxiter=4000, batch_size=None, num_hess=False, **kwargs):
|
|
6242
|
+
"""
|
|
6243
|
+
Fits a Poisson regression, NB regression (dispersion=1), or GP regression (dispersion=2).
|
|
6244
|
+
|
|
6245
|
+
Parameters:
|
|
6246
|
+
mod: Dictionary containing data and parameters.
|
|
6247
|
+
dispersion: 0 for Poisson, 1 for NB, 2 for GP.
|
|
6248
|
+
maxiter: Maximum number of optimization iterations.
|
|
6249
|
+
batch_size: Batch size for certain methods (if applicable).
|
|
6250
|
+
num_hess: Whether to compute the numerical Hessian.
|
|
6251
|
+
|
|
6252
|
+
Returns:
|
|
6253
|
+
obj_1, log_lik, betas, stderr, pvalues, zvalues, is_halton, is_delete
|
|
6254
|
+
"""
|
|
6255
|
+
try:
|
|
6256
|
+
dispersion = mod.get('dispersion', dispersion)
|
|
6257
|
+
# Preprocessing
|
|
6258
|
+
tol = {'ftol': 1e-6, 'gtol': 1e-6, 'xtol': 1e-6}
|
|
6259
|
+
y, X, Xr, XG, XH = mod.get('y'), mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
|
|
6260
|
+
|
|
6261
|
+
# Validate input data
|
|
6262
|
+
if y is None or X is None:
|
|
6263
|
+
raise ValueError("Both `y` and `X` must be provided in the `mod` dictionary.")
|
|
6264
|
+
|
|
6265
|
+
# Build the design matrix `XX` and test matrix `XX_test` if applicable
|
|
6266
|
+
XX = self._build_design_matrix(mod)
|
|
6267
|
+
XX_test = self._build_test_matrix(mod) if self.is_multi else None
|
|
6268
|
+
|
|
6269
|
+
# Determine the number of coefficients
|
|
6270
|
+
num_coefficients = self._calculate_num_coefficients(mod, dispersion)
|
|
6271
|
+
|
|
6272
|
+
# Build initial parameters and bounds
|
|
6273
|
+
initial_params = self._build_initial_params(num_coefficients, dispersion)
|
|
6274
|
+
bounds = self._set_bounds(initial_params, dispersion)
|
|
6275
|
+
|
|
6276
|
+
|
|
6277
|
+
# Run optimization
|
|
6278
|
+
optimization_result = self._run_optimization(
|
|
6279
|
+
XX, y, dispersion, initial_params, bounds, tol, mod
|
|
6280
|
+
)
|
|
6281
|
+
|
|
6282
|
+
# Post-process results
|
|
6283
|
+
log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
|
|
6284
|
+
optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
|
|
6285
|
+
)
|
|
6286
|
+
|
|
6287
|
+
# Extract other outputs
|
|
6288
|
+
betas = optimization_result['x'] if optimization_result is not None else None
|
|
6289
|
+
is_halton = Xr is not None and Xr.size > 0 # Halton draws used if `Xr` is not empty
|
|
6290
|
+
|
|
6291
|
+
# Determine `is_delete`
|
|
6292
|
+
is_delete = not (
|
|
6293
|
+
optimization_result is not None
|
|
6294
|
+
and 'fun' in optimization_result
|
|
6295
|
+
and not math.isnan(optimization_result['fun'])
|
|
6296
|
+
and not math.isinf(optimization_result['fun'])
|
|
6297
|
+
)
|
|
6298
|
+
|
|
6299
|
+
betas_est = optimization_result
|
|
6300
|
+
|
|
6301
|
+
# Post-fit metrics
|
|
6302
|
+
log_ll, aic, bic, stderr, zvalues, pvalue_alt, other_measures = self._post_fit_ll_aic_bic(
|
|
6303
|
+
betas_est, simple_fit=False, is_dispersion=dispersion
|
|
6304
|
+
)
|
|
6305
|
+
|
|
6306
|
+
# Number of parameters
|
|
6307
|
+
paramNum = len(betas_est['x'])
|
|
6308
|
+
|
|
6309
|
+
# Naming for printing (optional, for formatting or debugging purposes)
|
|
6310
|
+
self.convergance = not is_delete
|
|
6311
|
+
self.naming_for_printing(betas_est['x'], 0, dispersion, model_nature=mod)
|
|
6312
|
+
|
|
6313
|
+
# Add metrics to solution object
|
|
6314
|
+
sol = Solution() # Assuming Solution is the appropriate class to store results
|
|
6315
|
+
sol.add_objective(
|
|
6316
|
+
bic=bic,
|
|
6317
|
+
aic=aic,
|
|
6318
|
+
loglik=log_ll,
|
|
6319
|
+
num_parm=paramNum,
|
|
6320
|
+
GOF=other_measures
|
|
6321
|
+
)
|
|
6322
|
+
|
|
6323
|
+
|
|
6324
|
+
return (
|
|
6325
|
+
sol, # obj_1
|
|
6326
|
+
log_lik,
|
|
6327
|
+
betas,
|
|
6328
|
+
stderr,
|
|
6329
|
+
pvalues,
|
|
6330
|
+
zvalues,
|
|
6331
|
+
is_halton,
|
|
6332
|
+
is_delete
|
|
6333
|
+
)
|
|
6334
|
+
|
|
6335
|
+
except Exception as e:
|
|
6336
|
+
self._handle_error(e)
|
|
6337
|
+
return None, None, None, None, None, None, None, 0
|
|
6338
|
+
|
|
6339
|
+
|
|
6340
|
+
def _prepare_draws(self, Xr, distribution):
|
|
6341
|
+
"""
|
|
6342
|
+
Prepare the draws for the random effects.
|
|
6343
|
+
|
|
6344
|
+
Parameters:
|
|
6345
|
+
Xr: Random effect design matrix.
|
|
6346
|
+
distribution: Distribution type for the random effects.
|
|
6347
|
+
|
|
6348
|
+
Returns:
|
|
6349
|
+
Draws matrix or None if `Xr` is not provided.
|
|
6350
|
+
"""
|
|
6351
|
+
if Xr is None or Xr.size == 0:
|
|
6352
|
+
return None
|
|
6353
|
+
|
|
6354
|
+
n_samples, n_features, n_random_effects = Xr.shape
|
|
6355
|
+
return self.prepare_halton(
|
|
6356
|
+
n_random_effects, n_samples, self.Ndraws, distribution, long=False, slice_this_way=self.group_halton
|
|
6357
|
+
)
|
|
6358
|
+
|
|
6359
|
+
def _prepare_grouped_draws(self, XG, mod):
|
|
6360
|
+
"""
|
|
6361
|
+
Prepare the grouped draws for the regression model.
|
|
6362
|
+
|
|
6363
|
+
Parameters:
|
|
6364
|
+
XG: Grouped design matrix.
|
|
6365
|
+
mod: Dictionary containing additional data.
|
|
6366
|
+
|
|
6367
|
+
Returns:
|
|
6368
|
+
Grouped draws matrix.
|
|
6369
|
+
"""
|
|
6370
|
+
n_samples, n_features, n_groups = XG.shape
|
|
6371
|
+
if n_features == 0:
|
|
6372
|
+
return None
|
|
6373
|
+
group_distribution = mod.get('dist_fit_grouped', np.zeros(n_groups))
|
|
5584
6374
|
|
|
6375
|
+
return self.prepare_halton(
|
|
6376
|
+
n_groups, n_samples, self.Ndraws, group_distribution, slice_this_way=self.group_halton
|
|
6377
|
+
)
|
|
6378
|
+
|
|
6379
|
+
def fitRegression_o(self, mod,
|
|
6380
|
+
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
6381
|
+
|
|
6382
|
+
"""
|
|
6383
|
+
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
6384
|
+
if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
|
|
6385
|
+
|
|
6386
|
+
#TODO lineraregression
|
|
6387
|
+
Inputs:
|
|
6388
|
+
X - array. Design matrix
|
|
6389
|
+
y - array. Observed outcomes
|
|
6390
|
+
Outputs:
|
|
6391
|
+
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
|
6392
|
+
"""
|
|
6393
|
+
# Set defualt method
|
|
6394
|
+
#TODO, the inital fit worked but it throws
|
|
6395
|
+
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
6396
|
+
|
|
6397
|
+
|
|
6398
|
+
|
|
6399
|
+
sol = Solution()
|
|
6400
|
+
|
|
6401
|
+
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
6402
|
+
is_delete = 0
|
|
6403
|
+
dispersion = mod.get('dispersion')
|
|
6404
|
+
y = mod.get('y')
|
|
6405
|
+
try:
|
|
6406
|
+
method = self.method_ll
|
|
6407
|
+
method2 = self.method_ll
|
|
6408
|
+
# method2 = 'BFGS_2'
|
|
6409
|
+
if self.hess_yes == False:
|
|
6410
|
+
method2 = 'BFGS_2'
|
|
6411
|
+
method2 = self.method_ll
|
|
6412
|
+
|
|
6413
|
+
bic = None
|
|
6414
|
+
pvalue_alt = None
|
|
6415
|
+
zvalues = None
|
|
6416
|
+
if mod.get('Xr') is not None or mod.get('XG') is not None or mod.get('XH') is not None:
|
|
6417
|
+
calc_gradient = True
|
|
6418
|
+
calc_gradient = False if self.linear_regression else True
|
|
5585
6419
|
n, p, k = mod.get('X').shape
|
|
5586
6420
|
_r, pr, kr = mod.get('Xr').shape
|
|
5587
6421
|
kh = mod.get('XH').shape[2]
|
|
@@ -5679,7 +6513,7 @@ class ObjectiveFunction(object):
|
|
|
5679
6513
|
initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
|
|
5680
6514
|
'todense') else np.array(
|
|
5681
6515
|
[initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
|
|
5682
|
-
|
|
6516
|
+
|
|
5683
6517
|
bb = initial_beta['x'].copy()
|
|
5684
6518
|
|
|
5685
6519
|
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
|
@@ -6070,7 +6904,8 @@ class ObjectiveFunction(object):
|
|
|
6070
6904
|
def transformer(self, transform, idc, x_data):
|
|
6071
6905
|
if transform == 0 or transform == 1 or transform == 'no':
|
|
6072
6906
|
tr = x_data.astype(float)
|
|
6073
|
-
|
|
6907
|
+
elif transform == 'nil':
|
|
6908
|
+
tr = x_data.astype(float)
|
|
6074
6909
|
elif transform == 'log':
|
|
6075
6910
|
tr = np.log1p(x_data.astype(float))
|
|
6076
6911
|
elif transform == 'exp':
|
|
@@ -6087,7 +6922,8 @@ class ObjectiveFunction(object):
|
|
|
6087
6922
|
tr = pd.Series(tr)
|
|
6088
6923
|
|
|
6089
6924
|
else: # will be a number
|
|
6090
|
-
tr =
|
|
6925
|
+
tr = x_data.astype(float)
|
|
6926
|
+
transform = 'nil'
|
|
6091
6927
|
# if tr.isin([np.inf, -np.inf, np.nan, None]).any() == True:
|
|
6092
6928
|
|
|
6093
6929
|
if np.any(np.logical_or(pd.isna(tr), np.logical_or(pd.isna(tr), tr is None))):
|
|
@@ -6740,7 +7576,7 @@ class ObjectiveFunction(object):
|
|
|
6740
7576
|
sequence.append(n_th_number)
|
|
6741
7577
|
return sequence
|
|
6742
7578
|
|
|
6743
|
-
def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=
|
|
7579
|
+
def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=10, primes=None,
|
|
6744
7580
|
long=False) -> np.ndarray:
|
|
6745
7581
|
"""Generate Halton draws for multiple random variables using different primes as base"""
|
|
6746
7582
|
if primes is None:
|
|
@@ -6769,6 +7605,7 @@ class ObjectiveFunction(object):
|
|
|
6769
7605
|
i += 1
|
|
6770
7606
|
t += 1
|
|
6771
7607
|
seq = seq[drop:length + drop]
|
|
7608
|
+
seq = np.clip(seq, 1e-4, 1-1e-4)
|
|
6772
7609
|
if shuffled:
|
|
6773
7610
|
np.random.shuffle(seq)
|
|
6774
7611
|
return seq
|
|
@@ -6822,6 +7659,12 @@ class ObjectiveFunction(object):
|
|
|
6822
7659
|
(1 - x) * np.random.gamma(2, scale=theta, size=n)
|
|
6823
7660
|
return b
|
|
6824
7661
|
|
|
7662
|
+
|
|
7663
|
+
|
|
7664
|
+
|
|
7665
|
+
|
|
7666
|
+
|
|
7667
|
+
|
|
6825
7668
|
def _compute_derivatives(self, betas, draws, betas_std=None, distribution=None):
|
|
6826
7669
|
# N, N_draws, K = len(draws)/self.Ndraws, self.Ndraws, len(self._distribution)
|
|
6827
7670
|
# N, D = draws.shape[0], draws.shape[1]
|
|
@@ -3,18 +3,18 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
|
3
3
|
metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
|
|
4
4
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
|
5
5
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
|
6
|
-
metacountregressor/helperprocess.py,sha256=
|
|
6
|
+
metacountregressor/helperprocess.py,sha256=zBpMI2AI7aZ19IZlEnZrvy2VAtJICIcwpq60JL_OIjQ,24428
|
|
7
7
|
metacountregressor/main.py,sha256=xfpKN2w0kePHp_Q2HOPjtG15PLEN1L3sEnDw1PHBquw,23668
|
|
8
8
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
|
9
|
-
metacountregressor/metaheuristics.py,sha256=
|
|
9
|
+
metacountregressor/metaheuristics.py,sha256=PyxFBlNqqwq787cDHl36xLaricMA3HTCtDweOe_UM-M,106763
|
|
10
10
|
metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
|
|
11
11
|
metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
|
|
12
12
|
metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,936
|
|
13
13
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
|
14
|
-
metacountregressor/solution.py,sha256=
|
|
14
|
+
metacountregressor/solution.py,sha256=78CNqLdVfLQCSVFEWVY1NkV3wlNykwbek3aNTcC5-WI,316939
|
|
15
15
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
|
16
|
-
metacountregressor-0.1.
|
|
17
|
-
metacountregressor-0.1.
|
|
18
|
-
metacountregressor-0.1.
|
|
19
|
-
metacountregressor-0.1.
|
|
20
|
-
metacountregressor-0.1.
|
|
16
|
+
metacountregressor-0.1.241.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
17
|
+
metacountregressor-0.1.241.dist-info/METADATA,sha256=c9KqN-cADG_SSnayPLrL2npCMCW_nway7Upg_Lr-Row,23529
|
|
18
|
+
metacountregressor-0.1.241.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
|
|
19
|
+
metacountregressor-0.1.241.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
|
20
|
+
metacountregressor-0.1.241.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|