metacountregressor 0.1.78__py3-none-any.whl → 0.1.93__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/main.py +73 -8
- metacountregressor/solution.py +158 -110
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/METADATA +78 -20
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/RECORD +7 -7
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/top_level.txt +0 -0
metacountregressor/main.py
CHANGED
@@ -29,6 +29,64 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
29
29
|
|
30
30
|
|
31
31
|
def main(args, **kwargs):
|
32
|
+
'''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
|
33
|
+
import statsmodels.api as sm
|
34
|
+
|
35
|
+
data = sm.datasets.sunspots.load_pandas().data
|
36
|
+
# print(data.exog)
|
37
|
+
data_exog = data['YEAR']
|
38
|
+
data_exog = sm.add_constant(data_exog)
|
39
|
+
data_endog = data['SUNACTIVITY']
|
40
|
+
|
41
|
+
# Instantiate a gamma family model with the default link function.
|
42
|
+
import numpy as np
|
43
|
+
|
44
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
45
|
+
gamma_results = gamma_model.fit()
|
46
|
+
|
47
|
+
print(gamma_results.summary())
|
48
|
+
|
49
|
+
# NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
|
50
|
+
import metacountregressor
|
51
|
+
from importlib.metadata import version
|
52
|
+
print(version('metacountregressor'))
|
53
|
+
import pandas as pd
|
54
|
+
import numpy as np
|
55
|
+
from metacountregressor.solution import ObjectiveFunction
|
56
|
+
from metacountregressor.metaheuristics import (harmony_search,
|
57
|
+
differential_evolution,
|
58
|
+
simulated_annealing)
|
59
|
+
|
60
|
+
# Model Decisions,
|
61
|
+
manual_fit_spec = {
|
62
|
+
|
63
|
+
'fixed_terms': ['const', 'YEAR'],
|
64
|
+
'rdm_terms': [],
|
65
|
+
'rdm_cor_terms': [],
|
66
|
+
'grouped_terms': [],
|
67
|
+
'hetro_in_means': [],
|
68
|
+
'transformations': ['no', 'no'],
|
69
|
+
'dispersion': 1 # Negative Binomial
|
70
|
+
}
|
71
|
+
|
72
|
+
# Arguments
|
73
|
+
arguments = {
|
74
|
+
'algorithm': 'hs',
|
75
|
+
'test_percentage': 0,
|
76
|
+
'test_complexity': 6,
|
77
|
+
'instance_number': 'name',
|
78
|
+
'Manual_Fit': manual_fit_spec
|
79
|
+
}
|
80
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
81
|
+
#exit()
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
32
90
|
print('the args is:', args)
|
33
91
|
print('the kwargs is', kwargs)
|
34
92
|
|
@@ -109,6 +167,16 @@ def main(args, **kwargs):
|
|
109
167
|
'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
|
110
168
|
'dispersion': 1
|
111
169
|
}
|
170
|
+
print('overriding this delete, just want to test the NB')
|
171
|
+
manual_fit_spec = {
|
172
|
+
'fixed_terms': ['const'],
|
173
|
+
'rdm_terms': [],
|
174
|
+
'rdm_cor_terms': [],
|
175
|
+
'grouped_terms': [],
|
176
|
+
'hetro_in_means': [],
|
177
|
+
'transformations': ['no'],
|
178
|
+
'dispersion': 1
|
179
|
+
}
|
112
180
|
|
113
181
|
df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
|
114
182
|
y_df = df[['FREQ']].copy() # only consider crashes
|
@@ -118,7 +186,7 @@ def main(args, **kwargs):
|
|
118
186
|
x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
|
119
187
|
x_df = x_df.drop(columns=['AADT', 'LENGTH'])
|
120
188
|
|
121
|
-
if args
|
189
|
+
if args.get('seperate_out_factors', 0):
|
122
190
|
|
123
191
|
x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
|
124
192
|
exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
|
@@ -173,8 +241,8 @@ def main(args, **kwargs):
|
|
173
241
|
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
174
242
|
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
175
243
|
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
176
|
-
|
177
|
-
|
244
|
+
except Exception as e:
|
245
|
+
print(e)
|
178
246
|
x_df = df.drop(columns=['Y']) # was dropped postcode
|
179
247
|
|
180
248
|
group_grab = x_df['county']
|
@@ -215,7 +283,6 @@ def main(args, **kwargs):
|
|
215
283
|
else:
|
216
284
|
print('fitting manually')
|
217
285
|
args['Manual_Fit'] = manual_fit_spec
|
218
|
-
|
219
286
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
220
287
|
print('Maine County Dataset.')
|
221
288
|
args['group'] = 'county'
|
@@ -346,10 +413,8 @@ if __name__ == '__main__':
|
|
346
413
|
override = True
|
347
414
|
if override:
|
348
415
|
print('todo turn off, in testing phase')
|
349
|
-
parser.add_argument('-problem_number', default='
|
416
|
+
parser.add_argument('-problem_number', default='4')
|
350
417
|
print('did it make it')
|
351
|
-
|
352
|
-
|
353
418
|
if 'algorithm' not in args:
|
354
419
|
parser.add_argument('-algorithm', type=str, default='hs',
|
355
420
|
help='optimization algorithm')
|
@@ -370,7 +435,7 @@ if __name__ == '__main__':
|
|
370
435
|
parser.print_help()
|
371
436
|
args = vars(parser.parse_args())
|
372
437
|
print(type(args))
|
373
|
-
# TODO add in chi 2 and df in estimation and compare degrees of freedom
|
438
|
+
# TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
|
374
439
|
|
375
440
|
# Print the args.
|
376
441
|
profiler = cProfile.Profile()
|
metacountregressor/solution.py
CHANGED
@@ -47,7 +47,7 @@ np.seterr(divide='ignore', invalid='ignore')
|
|
47
47
|
warnings.simplefilter("ignore")
|
48
48
|
|
49
49
|
# define the computation boundary limits
|
50
|
-
min_comp_val = 1e-
|
50
|
+
min_comp_val = 1e-160
|
51
51
|
max_comp_val = 1e+200
|
52
52
|
log_lik_min = -1e+200
|
53
53
|
log_lik_max = 1e+200
|
@@ -122,8 +122,9 @@ class ObjectiveFunction(object):
|
|
122
122
|
|
123
123
|
def __init__(self, x_data, y_data, **kwargs):
|
124
124
|
|
125
|
-
self.reg_penalty =
|
125
|
+
self.reg_penalty = 0
|
126
126
|
self.power_up_ll = False
|
127
|
+
|
127
128
|
self.bic = None
|
128
129
|
self.other_bic = False
|
129
130
|
self.test_flag = 1
|
@@ -131,8 +132,8 @@ class ObjectiveFunction(object):
|
|
131
132
|
print('change this to false latter ')
|
132
133
|
|
133
134
|
# initialize values
|
134
|
-
self.constant_value =
|
135
|
-
self.negative_binomial_value =
|
135
|
+
self.constant_value = 0
|
136
|
+
self.negative_binomial_value = 1
|
136
137
|
|
137
138
|
self.verbose_safe = True
|
138
139
|
self.please_print = kwargs.get('please_print', 0)
|
@@ -169,7 +170,7 @@ class ObjectiveFunction(object):
|
|
169
170
|
self._par = 0.3
|
170
171
|
self._mpai = 1
|
171
172
|
self._max_imp = 100000
|
172
|
-
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
|
173
|
+
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
173
174
|
self._panels = None
|
174
175
|
self.is_multi = True
|
175
176
|
self.method_ll = 'Nelder-Mead-BFGS'
|
@@ -389,6 +390,8 @@ class ObjectiveFunction(object):
|
|
389
390
|
self.initial_sig = 1 # pass the test of a single model
|
390
391
|
self.pvalue_sig_value = .1
|
391
392
|
self.observations = self._x_data.shape[0]
|
393
|
+
self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
|
394
|
+
|
392
395
|
self.batch_size = None
|
393
396
|
# open the file in the write mode
|
394
397
|
self.grab_transforms = 0
|
@@ -841,8 +844,12 @@ class ObjectiveFunction(object):
|
|
841
844
|
|
842
845
|
return ([self._model_type_codes[dispersion]])
|
843
846
|
|
844
|
-
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
|
845
|
-
|
847
|
+
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
848
|
+
r'''
|
849
|
+
setup for naming of the model summary
|
850
|
+
'''
|
851
|
+
|
852
|
+
|
846
853
|
self.name_deleter = []
|
847
854
|
group_rpm = None
|
848
855
|
group_dist = []
|
@@ -1015,7 +1022,7 @@ class ObjectiveFunction(object):
|
|
1015
1022
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
1016
1023
|
if model == 1:
|
1017
1024
|
|
1018
|
-
self.coeff_[-1] = np.
|
1025
|
+
self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
1019
1026
|
if self.coeff_[-1] < 0.25:
|
1020
1027
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
1021
1028
|
print(np.exp(self.coeff_[-1]))
|
@@ -2683,7 +2690,7 @@ class ObjectiveFunction(object):
|
|
2683
2690
|
grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
|
2684
2691
|
return gradient, grad_n
|
2685
2692
|
|
2686
|
-
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
|
2693
|
+
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
|
2687
2694
|
"""
|
2688
2695
|
Negative Binomial model score (gradient) vector of the log-likelihood
|
2689
2696
|
Parameters
|
@@ -2703,9 +2710,47 @@ class ObjectiveFunction(object):
|
|
2703
2710
|
|
2704
2711
|
"""
|
2705
2712
|
|
2706
|
-
|
2713
|
+
# Calculate common terms
|
2714
|
+
'''
|
2715
|
+
n = len(y)
|
2716
|
+
n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
|
2717
|
+
|
2718
|
+
# Flatten the data since there's only one panel, simplifying the operations
|
2719
|
+
X_flat = X.reshape(n * p, d)
|
2720
|
+
y_flat = y.flatten()
|
2721
|
+
mu_flat = mu.flatten()
|
2722
|
+
|
2723
|
+
# Prepare score array
|
2724
|
+
score = np.zeros(d + 1) # +1 for alpha
|
2725
|
+
|
2726
|
+
# Compute the gradient for regression coefficients
|
2727
|
+
for j in range(d): # Exclude the last parameter (alpha)
|
2728
|
+
score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
|
2729
|
+
|
2730
|
+
# Compute the gradient for the dispersion parameter
|
2731
|
+
if obs_specific:
|
2732
|
+
# Adjust the calculation if observation-specific effects are considered
|
2733
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
2734
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
2735
|
+
score[-1] = np.sum(sum_terms)
|
2736
|
+
else:
|
2737
|
+
# Standard calculation
|
2738
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
2739
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
2740
|
+
score[-1] = np.sum(sum_terms)
|
2741
|
+
return score
|
2742
|
+
'''
|
2743
|
+
#return score
|
2707
2744
|
|
2708
|
-
|
2745
|
+
|
2746
|
+
|
2747
|
+
|
2748
|
+
|
2749
|
+
try:
|
2750
|
+
if alpha is None:
|
2751
|
+
alpha = np.exp(params[-1])
|
2752
|
+
else:
|
2753
|
+
alpha = np.exp(params[-1])
|
2709
2754
|
a1 = 1 / alpha * mu ** Q
|
2710
2755
|
prob = a1 / (a1 + mu)
|
2711
2756
|
exog = X
|
@@ -2747,7 +2792,8 @@ class ObjectiveFunction(object):
|
|
2747
2792
|
return np.concatenate((dparams, dalpha),
|
2748
2793
|
axis=1)
|
2749
2794
|
except Exception as e:
|
2750
|
-
print(
|
2795
|
+
print(e)
|
2796
|
+
print('NB score exception problem..')
|
2751
2797
|
exc_type, exc_obj, exc_tb = sys.exc_info()
|
2752
2798
|
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
2753
2799
|
print(exc_type, fname, exc_tb.tb_lineno)
|
@@ -3404,24 +3450,44 @@ class ObjectiveFunction(object):
|
|
3404
3450
|
# if gamma <= 0.01: #min defined value for stable nb
|
3405
3451
|
# gamma = 0.01
|
3406
3452
|
|
3453
|
+
|
3454
|
+
|
3455
|
+
|
3407
3456
|
endog = y
|
3408
3457
|
mu = lam
|
3409
|
-
alpha = gamma
|
3410
|
-
size = 1.0 / alpha * mu ** Q
|
3458
|
+
alpha = np.exp(gamma)
|
3459
|
+
#size = 1.0 / alpha * mu ** Q
|
3411
3460
|
alpha_size = alpha * mu ** Q
|
3412
3461
|
# prob = size/(size+mu)
|
3413
3462
|
prob = alpha / (alpha + mu)
|
3414
3463
|
# prob = 1/(1+mu*alpha)
|
3464
|
+
|
3465
|
+
'''test'''
|
3466
|
+
|
3467
|
+
|
3415
3468
|
try:
|
3416
3469
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
3417
|
-
#
|
3470
|
+
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
3471
|
+
#import time
|
3472
|
+
#start_time = time.time()
|
3473
|
+
|
3418
3474
|
|
3475
|
+
# Measure time for negbinom_pmf
|
3476
|
+
#start_time = time.time()
|
3477
|
+
#for _ in range(10000):
|
3478
|
+
|
3479
|
+
#gg = self.negbinom_pmf(alpha_size, prob, y)
|
3480
|
+
#end_time = time.time()
|
3481
|
+
#print("Custom functieon time:", end_time - start_time)
|
3482
|
+
#start_time = time.time()
|
3483
|
+
#for _ in range(10000):
|
3419
3484
|
gg = np.exp(
|
3420
3485
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
3421
3486
|
y + alpha) * np.log(mu + alpha))
|
3422
|
-
|
3423
|
-
#
|
3424
|
-
#
|
3487
|
+
gg[np.isnan(gg)] = 1
|
3488
|
+
#gg = nbinom.pmf(y ,alpha, prob)
|
3489
|
+
#end_time = time.time()
|
3490
|
+
#print("Custom functieon time:", end_time - start_time)
|
3425
3491
|
|
3426
3492
|
except Exception as e:
|
3427
3493
|
print(e)
|
@@ -3492,7 +3558,7 @@ class ObjectiveFunction(object):
|
|
3492
3558
|
|
3493
3559
|
endog = y
|
3494
3560
|
mu = lam
|
3495
|
-
alpha = gamma
|
3561
|
+
alpha = np.exp(gamma)
|
3496
3562
|
alpha = alpha * mu ** Q
|
3497
3563
|
size = 1 / alpha * mu ** Q # also r
|
3498
3564
|
# self.rate_param = size
|
@@ -3572,21 +3638,8 @@ class ObjectiveFunction(object):
|
|
3572
3638
|
|
3573
3639
|
if dispersion == 1 or dispersion == 4: # nb
|
3574
3640
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
3575
|
-
# if b_gam < 0.8*model_nature['dispersion_penalty']:
|
3576
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
3577
|
-
|
3578
|
-
# if abs(b_gam) < 0.01:
|
3579
|
-
# penalty += 1/np.abs(b_gam)
|
3580
3641
|
|
3581
|
-
if b_gam >= 4.5:
|
3582
|
-
penalty += b_gam
|
3583
|
-
b_gam = 4.61
|
3584
|
-
# b_gam = 7.9
|
3585
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
3586
|
-
# penalty += 1/np.max((0.01,abs(b_gam)))
|
3587
|
-
# b_gam = model_nature['dispersion_penalty']
|
3588
3642
|
|
3589
|
-
"""
|
3590
3643
|
if b_gam <= 0:
|
3591
3644
|
#penalty += 100
|
3592
3645
|
#penalty += abs(b_gam)
|
@@ -3594,21 +3647,21 @@ class ObjectiveFunction(object):
|
|
3594
3647
|
#b_gam = 1
|
3595
3648
|
|
3596
3649
|
# if b_gam < 0.03:
|
3597
|
-
penalty +=
|
3650
|
+
penalty += min(1, np.abs(b_gam))
|
3598
3651
|
|
3599
|
-
b_gam = 0.
|
3652
|
+
b_gam = 0.001
|
3600
3653
|
#
|
3601
3654
|
|
3602
|
-
if b_gam >= 10:
|
3603
|
-
|
3655
|
+
#if b_gam >= 10:
|
3656
|
+
# penalty+= b_gam
|
3604
3657
|
|
3605
|
-
|
3606
|
-
b_gam = min_comp_val
|
3658
|
+
# if b_gam == 0:
|
3659
|
+
#b_gam = min_comp_val
|
3607
3660
|
#b_gam = 0.03
|
3608
3661
|
|
3609
|
-
|
3662
|
+
# b_gam = abs(b_gam)
|
3610
3663
|
|
3611
|
-
|
3664
|
+
|
3612
3665
|
|
3613
3666
|
elif dispersion == 2:
|
3614
3667
|
if b_gam >= 1:
|
@@ -3761,7 +3814,8 @@ class ObjectiveFunction(object):
|
|
3761
3814
|
elif dispersion == 1:
|
3762
3815
|
|
3763
3816
|
proba_r = self._nonlog_nbin(y, eVd, b_gam)
|
3764
|
-
|
3817
|
+
|
3818
|
+
|
3765
3819
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
3766
3820
|
# print('fuck if this actually works')
|
3767
3821
|
|
@@ -3793,7 +3847,7 @@ class ObjectiveFunction(object):
|
|
3793
3847
|
proba_p = self._prob_product_across_panels(
|
3794
3848
|
proba_r, self.panel_info)
|
3795
3849
|
proba_r = proba_p
|
3796
|
-
proba_r = np.clip(proba_r, min_comp_val,
|
3850
|
+
proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
|
3797
3851
|
loglik = np.log(proba_r)
|
3798
3852
|
return loglik
|
3799
3853
|
|
@@ -4095,9 +4149,9 @@ class ObjectiveFunction(object):
|
|
4095
4149
|
|
4096
4150
|
elif dispersion == 1:
|
4097
4151
|
|
4098
|
-
der =
|
4152
|
+
der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
|
4099
4153
|
if both:
|
4100
|
-
grad_n =
|
4154
|
+
grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
|
4101
4155
|
return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
|
4102
4156
|
neginf=-140)
|
4103
4157
|
|
@@ -4351,7 +4405,7 @@ class ObjectiveFunction(object):
|
|
4351
4405
|
P += Xd[key].shape[1]
|
4352
4406
|
Kf += Xd[key].shape[2]
|
4353
4407
|
else:
|
4354
|
-
self.naming_for_printing(betas, 1, dispersion,
|
4408
|
+
self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
|
4355
4409
|
N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
|
4356
4410
|
betas = np.array(betas)
|
4357
4411
|
Bf = betas[0:Kf] # Fixed betas
|
@@ -4381,7 +4435,7 @@ class ObjectiveFunction(object):
|
|
4381
4435
|
llf_main = self.loglik_obs(
|
4382
4436
|
y, eVd, dispersion, main_disper, lindley_disp, betas)
|
4383
4437
|
|
4384
|
-
|
4438
|
+
llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
|
4385
4439
|
|
4386
4440
|
loglik = llf_main.sum()
|
4387
4441
|
|
@@ -4402,14 +4456,19 @@ class ObjectiveFunction(object):
|
|
4402
4456
|
if return_gradient_n:
|
4403
4457
|
der, grad_n = self.simple_score_grad(
|
4404
4458
|
betas, y, eVd, Xd, dispersion, both=True)
|
4405
|
-
return (-loglik + penalty, -der, grad_n)
|
4459
|
+
#return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
|
4460
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
|
4461
|
+
return scaled_tuple
|
4406
4462
|
else:
|
4407
4463
|
der = self.simple_score_grad(
|
4408
4464
|
betas, y, eVd, Xd, dispersion, both=False)
|
4409
|
-
|
4410
|
-
|
4465
|
+
scaled_tuple = tuple(
|
4466
|
+
x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
|
4467
|
+
return scaled_tuple
|
4468
|
+
#return (-loglik + penalty, -der.ravel())*self.minimize_scaler
|
4411
4469
|
else:
|
4412
|
-
|
4470
|
+
|
4471
|
+
return (-loglik + penalty)*self.minimize_scaler
|
4413
4472
|
# Else, we have draws
|
4414
4473
|
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
4415
4474
|
penalty += self._penalty_betas(
|
@@ -4420,7 +4479,7 @@ class ObjectiveFunction(object):
|
|
4420
4479
|
# Kf =0
|
4421
4480
|
betas = np.array(betas)
|
4422
4481
|
betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
|
4423
|
-
self.naming_for_printing(betas, 0, dispersion,
|
4482
|
+
self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
|
4424
4483
|
y = dev.to_gpu(y)
|
4425
4484
|
if draws is not None and draws_grouped is not None:
|
4426
4485
|
draws = np.concatenate((draws_grouped, draws), axis=1)
|
@@ -4509,11 +4568,11 @@ class ObjectiveFunction(object):
|
|
4509
4568
|
# brstd), draws_) # Get random coefficients, old method
|
4510
4569
|
Br = self._transform_rand_betas(br,
|
4511
4570
|
brstd, draws_) # Get random coefficients
|
4512
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
4571
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
4513
4572
|
self.Br = Br.copy()
|
4514
4573
|
|
4515
4574
|
else:
|
4516
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
4575
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
4517
4576
|
chol_mat = self._chol_mat(
|
4518
4577
|
len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
|
4519
4578
|
self.chol_mat = chol_mat.copy()
|
@@ -4633,34 +4692,18 @@ class ObjectiveFunction(object):
|
|
4633
4692
|
# lik = np.nan_to_num(lik, )
|
4634
4693
|
loglik = np.log(lik)
|
4635
4694
|
llf_main = loglik
|
4636
|
-
if 'exog_infl' in model_nature:
|
4637
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
4638
|
-
params_main = Bf
|
4639
|
-
exog_infl = model_nature.get('exog_inflX')
|
4640
|
-
llf_main = llf_main.ravel() # TODO test this
|
4641
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
4642
|
-
|
4643
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
4644
|
-
|
4645
|
-
zero_idx = np.nonzero(y == 0)[0]
|
4646
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
4647
|
-
|
4648
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
4649
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
4650
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
4651
|
-
loglik = llf.sum()
|
4652
|
-
else:
|
4653
4695
|
|
4654
|
-
|
4696
|
+
|
4697
|
+
loglik = loglik.sum()
|
4655
4698
|
|
4656
4699
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
4657
4700
|
if self.power_up_ll:
|
4658
4701
|
penalty += self.regularise_l2(betas)
|
4659
|
-
|
4702
|
+
|
4660
4703
|
penalty += self.regularise_l2(betas)
|
4661
4704
|
if not return_gradient:
|
4662
4705
|
|
4663
|
-
output = (-loglik + penalty,)
|
4706
|
+
output = ((-loglik + penalty)*self.minimize_scaler,)
|
4664
4707
|
if verbose > 1:
|
4665
4708
|
print(
|
4666
4709
|
f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
|
@@ -4690,19 +4733,24 @@ class ObjectiveFunction(object):
|
|
4690
4733
|
# Hinv = np.linalg.inv(H)
|
4691
4734
|
# except Exception:
|
4692
4735
|
# Hinv = np.linalg.pinv(H)
|
4693
|
-
|
4736
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
|
4737
|
+
return scaled_tuple
|
4738
|
+
#output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
|
4694
4739
|
|
4695
|
-
return output
|
4740
|
+
#return output
|
4696
4741
|
else:
|
4742
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
|
4743
|
+
return scaled_tuple
|
4744
|
+
#output = (-loglik + penalty, -grad)*self.minimize_scaler
|
4697
4745
|
|
4698
|
-
output
|
4699
|
-
|
4700
|
-
return output
|
4746
|
+
#return output
|
4701
4747
|
except Exception as e:
|
4702
4748
|
traceback.print_exc()
|
4703
4749
|
print(e)
|
4704
4750
|
|
4705
|
-
|
4751
|
+
def minimize_function(self, loglike):
|
4752
|
+
r'Takes the logliklihood function and tranforms it to a more handed minimization function'
|
4753
|
+
return loglike/self.n_obs
|
4706
4754
|
def print_chol_mat(self, betas):
|
4707
4755
|
print(self.chol_mat)
|
4708
4756
|
self.get_br_and_bstd(betas)
|
@@ -4938,12 +4986,16 @@ class ObjectiveFunction(object):
|
|
4938
4986
|
return H
|
4939
4987
|
|
4940
4988
|
def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
|
4941
|
-
|
4989
|
+
#method = 'BFGS'
|
4942
4990
|
if method == "BFGS":
|
4943
4991
|
|
4944
4992
|
try:
|
4993
|
+
argbs = list(args)
|
4945
4994
|
|
4946
|
-
|
4995
|
+
argbs[7] = True
|
4996
|
+
argsb = tuple(argbs)
|
4997
|
+
a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
|
4998
|
+
return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
|
4947
4999
|
|
4948
5000
|
except:
|
4949
5001
|
return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
|
@@ -5190,7 +5242,7 @@ class ObjectiveFunction(object):
|
|
5190
5242
|
if self.power_up_ll:
|
5191
5243
|
loglikelihood =-optim_res['fun']/2 - penalty
|
5192
5244
|
else:
|
5193
|
-
loglikelihood = -optim_res['fun'] - penalty
|
5245
|
+
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
5194
5246
|
|
5195
5247
|
# self.coeff_names = coeff_names
|
5196
5248
|
# self.total_iter = optim_res['nit']
|
@@ -5249,9 +5301,9 @@ class ObjectiveFunction(object):
|
|
5249
5301
|
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
5250
5302
|
"""
|
5251
5303
|
# Set defualt method
|
5252
|
-
|
5253
|
-
|
5254
|
-
|
5304
|
+
#TODO, the inital fit worked but it throws
|
5305
|
+
|
5306
|
+
|
5255
5307
|
|
5256
5308
|
sol = Solution()
|
5257
5309
|
log_ll = 10.0 ** 9
|
@@ -5266,10 +5318,7 @@ class ObjectiveFunction(object):
|
|
5266
5318
|
if self.hess_yes == False:
|
5267
5319
|
method2 = 'BFGS_2'
|
5268
5320
|
method2 = self.method_ll
|
5269
|
-
# method2 = 'BFGS_2'
|
5270
5321
|
|
5271
|
-
# method2 = 'BFGS_2'
|
5272
|
-
# method2 = 'dogleg'
|
5273
5322
|
bic = None
|
5274
5323
|
pvalue_alt = None
|
5275
5324
|
zvalues = None
|
@@ -5287,7 +5336,7 @@ class ObjectiveFunction(object):
|
|
5287
5336
|
|
5288
5337
|
dispersion_param_num = self.is_dispersion(dispersion)
|
5289
5338
|
|
5290
|
-
paramNum = self.get_param_num(dispersion)
|
5339
|
+
#paramNum = self.get_param_num(dispersion)
|
5291
5340
|
self.no_random_paramaters = 0
|
5292
5341
|
if 'XG' in mod:
|
5293
5342
|
XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
@@ -5313,7 +5362,7 @@ class ObjectiveFunction(object):
|
|
5313
5362
|
XX_test = mod.get('Xr_test')
|
5314
5363
|
|
5315
5364
|
bb = np.random.uniform(
|
5316
|
-
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num
|
5365
|
+
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
|
5317
5366
|
|
5318
5367
|
if method == 'L-BFGS-B':
|
5319
5368
|
if dispersion == 0:
|
@@ -5347,11 +5396,13 @@ class ObjectiveFunction(object):
|
|
5347
5396
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
5348
5397
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
5349
5398
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
5350
|
-
dispersion, 0, False, 0, None,
|
5399
|
+
dispersion, 0, False, 0, None, None, None, None, None,
|
5351
5400
|
mod),
|
5352
5401
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
5353
5402
|
bounds=bounds)
|
5354
5403
|
|
5404
|
+
|
5405
|
+
|
5355
5406
|
if method2 == 'L-BFGS-B':
|
5356
5407
|
if hasattr(initial_beta.hess_inv, 'todense'):
|
5357
5408
|
initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
|
@@ -5363,7 +5414,7 @@ class ObjectiveFunction(object):
|
|
5363
5414
|
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
5364
5415
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
5365
5416
|
args=(XX, y, None, None, None, None, True, True, dispersion,
|
5366
|
-
0, False, 0, None,
|
5417
|
+
0, False, 0, None, None, None, None, None, mod),
|
5367
5418
|
method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
|
5368
5419
|
|
5369
5420
|
if initial_beta is not None and not np.isnan(initial_beta['fun']):
|
@@ -5387,24 +5438,24 @@ class ObjectiveFunction(object):
|
|
5387
5438
|
loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
|
5388
5439
|
|
5389
5440
|
self.naming_for_printing(
|
5390
|
-
initial_beta['x'], 1, dispersion,
|
5441
|
+
initial_beta['x'], 1, dispersion, model_nature=mod)
|
5391
5442
|
|
5392
5443
|
if self.is_multi:
|
5393
5444
|
in_sample_mae = self.validation(
|
5394
5445
|
initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
|
5395
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5446
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
5396
5447
|
testing=0)
|
5397
5448
|
|
5398
5449
|
sol.add_objective(TRAIN=in_sample_mae)
|
5399
5450
|
MAE_out = self.validation(
|
5400
5451
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
5401
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5452
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
|
5402
5453
|
sol.add_objective(TEST=MAE_out)
|
5403
5454
|
|
5404
5455
|
if self.val_percentage >0:
|
5405
5456
|
MAE_VAL = self.validation(
|
5406
5457
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
5407
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5458
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
5408
5459
|
validation=1)
|
5409
5460
|
sol.add_objective(VAL=MAE_VAL)
|
5410
5461
|
if sol[self._obj_1] <= self.best_obj_1:
|
@@ -5509,9 +5560,6 @@ class ObjectiveFunction(object):
|
|
5509
5560
|
|
5510
5561
|
bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
|
5511
5562
|
count += 1
|
5512
|
-
|
5513
|
-
|
5514
|
-
|
5515
5563
|
elif ii < jj:
|
5516
5564
|
if bob2[count] > 0:
|
5517
5565
|
|
@@ -5584,14 +5632,14 @@ class ObjectiveFunction(object):
|
|
5584
5632
|
mod['dispersion_penalty'] = np.abs(b[-1])
|
5585
5633
|
grad_args = (
|
5586
5634
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
|
5587
|
-
None,
|
5635
|
+
None, None, draws_grouped, XG, mod)
|
5588
5636
|
# self.gradients_est_yes = (1, 1)
|
5589
5637
|
|
5590
5638
|
if draws is None and draws_hetro is not None:
|
5591
5639
|
print('hold')
|
5592
5640
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
5593
5641
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
5594
|
-
self.rdm_cor_fit, None,
|
5642
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
5595
5643
|
method=method2, tol=tol['ftol'],
|
5596
5644
|
options={'gtol': tol['gtol']}, bounds=bounds,
|
5597
5645
|
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
@@ -5610,7 +5658,7 @@ class ObjectiveFunction(object):
|
|
5610
5658
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
5611
5659
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
|
5612
5660
|
self.rdm_cor_fit,
|
5613
|
-
None,
|
5661
|
+
None, None, draws_grouped, XG, mod),
|
5614
5662
|
method=method2, tol=tol['ftol'],
|
5615
5663
|
options={'gtol': tol['gtol']})
|
5616
5664
|
|
@@ -5646,7 +5694,7 @@ class ObjectiveFunction(object):
|
|
5646
5694
|
|
5647
5695
|
paramNum = len(betas_est['x'])
|
5648
5696
|
self.naming_for_printing(
|
5649
|
-
betas_est['x'], 0, dispersion,
|
5697
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
5650
5698
|
|
5651
5699
|
sol.add_objective(bic=bic, aic=aic,
|
5652
5700
|
loglik=log_ll, num_parm=paramNum, GOF=other_measures)
|
@@ -5656,19 +5704,19 @@ class ObjectiveFunction(object):
|
|
5656
5704
|
try:
|
5657
5705
|
|
5658
5706
|
in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
|
5659
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5707
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
5660
5708
|
model_nature=mod, testing=0)
|
5661
5709
|
sol.add_objective(TRAIN=in_sample_mae)
|
5662
5710
|
y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
|
5663
5711
|
Xr_grouped_test = mod.get('Xrtest')
|
5664
5712
|
MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
5665
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5713
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
5666
5714
|
model_nature=mod)
|
5667
5715
|
|
5668
5716
|
sol.add_objective(TEST=MAE_test)
|
5669
|
-
if self.val_percentage >0:
|
5717
|
+
if self.val_percentage > 0:
|
5670
5718
|
MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
5671
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5719
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
5672
5720
|
model_nature=mod, validation=1)
|
5673
5721
|
sol.add_objective(VAL=MAE_val)
|
5674
5722
|
|
@@ -6105,17 +6153,17 @@ class ObjectiveFunction(object):
|
|
6105
6153
|
if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
|
6106
6154
|
self.bic = obj_1['bic']
|
6107
6155
|
self.pvalues = pvalues
|
6108
|
-
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c",
|
6156
|
+
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
|
6109
6157
|
# todo: probably delete
|
6110
6158
|
self.naming_for_printing(
|
6111
|
-
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6159
|
+
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6112
6160
|
obj_1, model_nature)
|
6113
6161
|
else:
|
6114
6162
|
if is_delete == 0:
|
6115
6163
|
# todo: probably delete
|
6116
6164
|
self.naming_for_printing(
|
6117
6165
|
pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6118
|
-
|
6166
|
+
obj_1, model_nature)
|
6119
6167
|
self.coeff_ = betas
|
6120
6168
|
self.stderr = stderr
|
6121
6169
|
self.zvalues = zvalues
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.93
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE.txt
|
12
12
|
Requires-Dist: numpy >=1.13.1
|
13
13
|
Requires-Dist: scipy >=1.0.0
|
14
|
+
Requires-Dist: requests
|
14
15
|
|
15
16
|
<div style="display: flex; align-items: center;">
|
16
|
-
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width:
|
17
|
+
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
|
17
18
|
<p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
|
18
19
|
</div>
|
19
20
|
|
21
|
+
# Tutorial also available as a jupyter notebook
|
22
|
+
[Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
|
23
|
+
|
24
|
+
The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
|
25
|
+
|
20
26
|
##### Quick Setup
|
21
27
|
The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
|
22
28
|
|
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
|
|
35
41
|
from metacountregressor.metaheuristics import (harmony_search,
|
36
42
|
differential_evolution,
|
37
43
|
simulated_annealing)
|
44
|
+
|
45
|
+
|
38
46
|
```
|
39
47
|
|
48
|
+
loaded standard packages
|
49
|
+
loaded helper
|
50
|
+
testing
|
51
|
+
|
52
|
+
|
40
53
|
#### Basic setup.
|
41
54
|
The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
|
42
55
|
|
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
|
53
66
|
|
54
67
|
#some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
|
55
68
|
arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
|
56
|
-
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "
|
69
|
+
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
|
57
70
|
# Fit the model with metacountregressor
|
58
71
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
59
72
|
#replace with other metaheuristics if desired
|
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
|
|
71
84
|
- `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
|
72
85
|
- `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
|
73
86
|
- `instance_number`: This parameter is used to give a name to the outputs.
|
74
|
-
- `
|
87
|
+
- `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
|
75
88
|
- `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
|
76
89
|
- `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
|
77
90
|
- `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
|
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
|
|
80
93
|
|
81
94
|
|
82
95
|
|
83
|
-
###
|
96
|
+
### Example of changing the arguments:
|
84
97
|
Modify the arguments according to your preferences using the commented code as a guide.
|
85
98
|
|
86
99
|
|
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
|
|
108
121
|
|
109
122
|
|
110
123
|
```python
|
111
|
-
#Model Decisions, Specify for
|
124
|
+
#Model Decisions, Specify for initial solution that will be optimised.
|
112
125
|
manual_fit_spec = {
|
113
126
|
'fixed_terms': ['SINGLE', 'LENGTH'],
|
114
127
|
'rdm_terms': ['AADT:normal'],
|
115
|
-
'rdm_cor_terms': ['GRADEBR:
|
128
|
+
'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
|
116
129
|
'grouped_terms': [],
|
117
130
|
'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
|
118
131
|
'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
|
119
|
-
'dispersion':
|
132
|
+
'dispersion': 0
|
120
133
|
}
|
134
|
+
|
135
|
+
|
121
136
|
#Search Arguments
|
122
137
|
arguments = {
|
123
138
|
'algorithm': 'hs',
|
@@ -129,7 +144,47 @@ arguments = {
|
|
129
144
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
130
145
|
```
|
131
146
|
|
132
|
-
|
147
|
+
Setup Complete...
|
148
|
+
Benchmaking test with Seed 42
|
149
|
+
--------------------------------------------------------------------------------
|
150
|
+
Log-Likelihood: -1339.1862434675106
|
151
|
+
--------------------------------------------------------------------------------
|
152
|
+
bic: 2732.31
|
153
|
+
--------------------------------------------------------------------------------
|
154
|
+
MSE: 650856.32
|
155
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
156
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
157
|
+
+==========================+========+=======+==========+==========+============+
|
158
|
+
| LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
|
159
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
160
|
+
| SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
|
161
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
162
|
+
| GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
|
163
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
164
|
+
| CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
|
165
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
166
|
+
| Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
|
167
|
+
| Dev. normal) ) | | | | | |
|
168
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
169
|
+
| Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
|
170
|
+
| normal) ) | | | | | |
|
171
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
172
|
+
| Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
|
173
|
+
| normal) . GRADEBR (Std. | | | | | |
|
174
|
+
| Dev. normal ) | | | | | |
|
175
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
176
|
+
| main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
|
177
|
+
| group 0 | | | | | |
|
178
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
179
|
+
| ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
|
180
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
181
|
+
| main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
|
182
|
+
| group 0:normal:sd hetro | | | | | |
|
183
|
+
| group 0 | | | | | |
|
184
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
185
|
+
|
186
|
+
|
187
|
+
Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
|
133
188
|
|
134
189
|
|
135
190
|
```python
|
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
|
|
137
192
|
print(results)
|
138
193
|
```
|
139
194
|
|
140
|
-
|
195
|
+
# Notes:
|
141
196
|
### Capabilities of the software include:
|
142
197
|
* Handling of Panel Data
|
143
198
|
* Support for Data Transformations
|
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
|
|
155
210
|
* Customization of Hyper-parameters to solve problems tailored to your dataset
|
156
211
|
* Out-of-the-box optimization capability using default metaheuristics
|
157
212
|
|
158
|
-
###
|
213
|
+
### Intepreting the output of the model:
|
159
214
|
A regression table is produced. The following text elements are explained:
|
160
215
|
- Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
|
161
216
|
- Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
|
162
|
-
- hetro group
|
217
|
+
- hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
|
163
218
|
- $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
|
164
219
|
|
165
220
|
|
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
|
|
211
266
|
|
212
267
|
8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
|
213
268
|
|
214
|
-
|
269
|
+
## Example: Assistance by Harmony Search
|
215
270
|
|
216
271
|
|
217
|
-
Let's
|
272
|
+
Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
|
218
273
|
|
219
274
|
|
220
275
|
|
@@ -241,27 +296,30 @@ arguments = {
|
|
241
296
|
'_max_time': 10000
|
242
297
|
}
|
243
298
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
244
|
-
|
245
299
|
results = harmony_search(obj_fun)
|
246
300
|
print(results)
|
247
301
|
```
|
248
302
|
|
303
|
+
## Paper
|
304
|
+
|
305
|
+
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
306
|
+
|
249
307
|
## Contact
|
250
308
|
If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
|
251
309
|
|
252
310
|
## Citing MetaCountRegressor
|
253
311
|
Please cite MetaCountRegressor as follows:
|
254
312
|
|
255
|
-
Ahern, Z., Corry P., Paz A. (
|
313
|
+
Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
|
256
314
|
|
257
315
|
Or using BibTex as follows:
|
258
316
|
|
259
317
|
```bibtex
|
260
|
-
@misc{
|
261
|
-
author = {Zeke Ahern
|
318
|
+
@misc{Ahern2024Meta,
|
319
|
+
author = {Zeke Ahern, Paul Corry and Alexander Paz},
|
262
320
|
journal = {PyPi},
|
263
321
|
title = {metacountregressor · PyPI},
|
264
|
-
url = {https://pypi.org/project/metacountregressor/0.1.
|
265
|
-
year = {
|
322
|
+
url = {https://pypi.org/project/metacountregressor/0.1.80/},
|
323
|
+
year = {2024},
|
266
324
|
}
|
267
325
|
|
@@ -3,17 +3,17 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
3
3
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
4
4
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
5
5
|
metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
|
6
|
-
metacountregressor/main.py,sha256=
|
6
|
+
metacountregressor/main.py,sha256=7ln6YvX2Nmesw1ose7T-2BQdLfDz0XmiLnP991AgQHw,18273
|
7
7
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
8
8
|
metacountregressor/metaheuristics.py,sha256=2MW3qlgs7BFbe_w64snLSKc4Y0-e_9sa3s_96rUm_SE,105887
|
9
9
|
metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
|
10
10
|
metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
|
11
11
|
metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
|
12
12
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
13
|
-
metacountregressor/solution.py,sha256=
|
13
|
+
metacountregressor/solution.py,sha256=wigjQ4tJrMS0EvbzmRMb2JRT7s0guvPdpCXRwEWUGQg,266891
|
14
14
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
15
|
-
metacountregressor-0.1.
|
16
|
-
metacountregressor-0.1.
|
17
|
-
metacountregressor-0.1.
|
18
|
-
metacountregressor-0.1.
|
19
|
-
metacountregressor-0.1.
|
15
|
+
metacountregressor-0.1.93.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
16
|
+
metacountregressor-0.1.93.dist-info/METADATA,sha256=lxko7pOT-xFIpuqN3cUYr9hF3SIWszwVmGcfASHysOY,18165
|
17
|
+
metacountregressor-0.1.93.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
18
|
+
metacountregressor-0.1.93.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
19
|
+
metacountregressor-0.1.93.dist-info/RECORD,,
|
File without changes
|
File without changes
|