metacountregressor 0.1.78__py3-none-any.whl → 0.1.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/main.py +15 -8
- metacountregressor/solution.py +97 -71
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.88.dist-info}/METADATA +78 -20
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.88.dist-info}/RECORD +7 -7
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.88.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.88.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.88.dist-info}/top_level.txt +0 -0
metacountregressor/main.py
CHANGED
@@ -109,6 +109,16 @@ def main(args, **kwargs):
|
|
109
109
|
'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
|
110
110
|
'dispersion': 1
|
111
111
|
}
|
112
|
+
print('overriding this delete, just want to test the NB')
|
113
|
+
manual_fit_spec = {
|
114
|
+
'fixed_terms': ['const'],
|
115
|
+
'rdm_terms': [],
|
116
|
+
'rdm_cor_terms': [],
|
117
|
+
'grouped_terms': [],
|
118
|
+
'hetro_in_means': [],
|
119
|
+
'transformations': ['no'],
|
120
|
+
'dispersion': 1
|
121
|
+
}
|
112
122
|
|
113
123
|
df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
|
114
124
|
y_df = df[['FREQ']].copy() # only consider crashes
|
@@ -118,7 +128,7 @@ def main(args, **kwargs):
|
|
118
128
|
x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
|
119
129
|
x_df = x_df.drop(columns=['AADT', 'LENGTH'])
|
120
130
|
|
121
|
-
if args
|
131
|
+
if args.get('seperate_out_factors', 0):
|
122
132
|
|
123
133
|
x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
|
124
134
|
exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
|
@@ -173,8 +183,8 @@ def main(args, **kwargs):
|
|
173
183
|
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
174
184
|
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
175
185
|
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
176
|
-
|
177
|
-
|
186
|
+
except Exception as e:
|
187
|
+
print(e)
|
178
188
|
x_df = df.drop(columns=['Y']) # was dropped postcode
|
179
189
|
|
180
190
|
group_grab = x_df['county']
|
@@ -215,7 +225,6 @@ def main(args, **kwargs):
|
|
215
225
|
else:
|
216
226
|
print('fitting manually')
|
217
227
|
args['Manual_Fit'] = manual_fit_spec
|
218
|
-
|
219
228
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
220
229
|
print('Maine County Dataset.')
|
221
230
|
args['group'] = 'county'
|
@@ -346,10 +355,8 @@ if __name__ == '__main__':
|
|
346
355
|
override = True
|
347
356
|
if override:
|
348
357
|
print('todo turn off, in testing phase')
|
349
|
-
parser.add_argument('-problem_number', default='
|
358
|
+
parser.add_argument('-problem_number', default='4')
|
350
359
|
print('did it make it')
|
351
|
-
|
352
|
-
|
353
360
|
if 'algorithm' not in args:
|
354
361
|
parser.add_argument('-algorithm', type=str, default='hs',
|
355
362
|
help='optimization algorithm')
|
@@ -370,7 +377,7 @@ if __name__ == '__main__':
|
|
370
377
|
parser.print_help()
|
371
378
|
args = vars(parser.parse_args())
|
372
379
|
print(type(args))
|
373
|
-
# TODO add in chi 2 and df in estimation and compare degrees of freedom
|
380
|
+
# TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
|
374
381
|
|
375
382
|
# Print the args.
|
376
383
|
profiler = cProfile.Profile()
|
metacountregressor/solution.py
CHANGED
@@ -47,7 +47,7 @@ np.seterr(divide='ignore', invalid='ignore')
|
|
47
47
|
warnings.simplefilter("ignore")
|
48
48
|
|
49
49
|
# define the computation boundary limits
|
50
|
-
min_comp_val = 1e-
|
50
|
+
min_comp_val = 1e-160
|
51
51
|
max_comp_val = 1e+200
|
52
52
|
log_lik_min = -1e+200
|
53
53
|
log_lik_max = 1e+200
|
@@ -131,8 +131,8 @@ class ObjectiveFunction(object):
|
|
131
131
|
print('change this to false latter ')
|
132
132
|
|
133
133
|
# initialize values
|
134
|
-
self.constant_value =
|
135
|
-
self.negative_binomial_value =
|
134
|
+
self.constant_value = 0
|
135
|
+
self.negative_binomial_value = 1
|
136
136
|
|
137
137
|
self.verbose_safe = True
|
138
138
|
self.please_print = kwargs.get('please_print', 0)
|
@@ -169,7 +169,7 @@ class ObjectiveFunction(object):
|
|
169
169
|
self._par = 0.3
|
170
170
|
self._mpai = 1
|
171
171
|
self._max_imp = 100000
|
172
|
-
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
|
172
|
+
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
173
173
|
self._panels = None
|
174
174
|
self.is_multi = True
|
175
175
|
self.method_ll = 'Nelder-Mead-BFGS'
|
@@ -841,8 +841,7 @@ class ObjectiveFunction(object):
|
|
841
841
|
|
842
842
|
return ([self._model_type_codes[dispersion]])
|
843
843
|
|
844
|
-
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
|
845
|
-
zi_fit=None, obj_1=None, model_nature=None):
|
844
|
+
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
846
845
|
self.name_deleter = []
|
847
846
|
group_rpm = None
|
848
847
|
group_dist = []
|
@@ -2683,7 +2682,7 @@ class ObjectiveFunction(object):
|
|
2683
2682
|
grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
|
2684
2683
|
return gradient, grad_n
|
2685
2684
|
|
2686
|
-
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
|
2685
|
+
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
|
2687
2686
|
"""
|
2688
2687
|
Negative Binomial model score (gradient) vector of the log-likelihood
|
2689
2688
|
Parameters
|
@@ -2702,10 +2701,48 @@ class ObjectiveFunction(object):
|
|
2702
2701
|
|
2703
2702
|
|
2704
2703
|
"""
|
2704
|
+
#print('delete this later')
|
2705
|
+
if alpha is None:
|
2706
|
+
alpha = params[-1]
|
2707
|
+
# Calculate common terms
|
2708
|
+
'''
|
2709
|
+
n = len(y)
|
2710
|
+
n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
|
2705
2711
|
|
2706
|
-
|
2712
|
+
# Flatten the data since there's only one panel, simplifying the operations
|
2713
|
+
X_flat = X.reshape(n * p, d)
|
2714
|
+
y_flat = y.flatten()
|
2715
|
+
mu_flat = mu.flatten()
|
2707
2716
|
|
2708
|
-
|
2717
|
+
# Prepare score array
|
2718
|
+
score = np.zeros(d + 1) # +1 for alpha
|
2719
|
+
|
2720
|
+
# Compute the gradient for regression coefficients
|
2721
|
+
for j in range(d): # Exclude the last parameter (alpha)
|
2722
|
+
score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
|
2723
|
+
|
2724
|
+
# Compute the gradient for the dispersion parameter
|
2725
|
+
if obs_specific:
|
2726
|
+
# Adjust the calculation if observation-specific effects are considered
|
2727
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
2728
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
2729
|
+
score[-1] = np.sum(sum_terms)
|
2730
|
+
else:
|
2731
|
+
# Standard calculation
|
2732
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
2733
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
2734
|
+
score[-1] = np.sum(sum_terms)
|
2735
|
+
return score
|
2736
|
+
'''
|
2737
|
+
#return score
|
2738
|
+
|
2739
|
+
|
2740
|
+
|
2741
|
+
|
2742
|
+
|
2743
|
+
try:
|
2744
|
+
if alpha is None:
|
2745
|
+
alpha = params[-1]
|
2709
2746
|
a1 = 1 / alpha * mu ** Q
|
2710
2747
|
prob = a1 / (a1 + mu)
|
2711
2748
|
exog = X
|
@@ -2747,7 +2784,8 @@ class ObjectiveFunction(object):
|
|
2747
2784
|
return np.concatenate((dparams, dalpha),
|
2748
2785
|
axis=1)
|
2749
2786
|
except Exception as e:
|
2750
|
-
print(
|
2787
|
+
print(e)
|
2788
|
+
print('NB score exception problem..')
|
2751
2789
|
exc_type, exc_obj, exc_tb = sys.exc_info()
|
2752
2790
|
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
2753
2791
|
print(exc_type, fname, exc_tb.tb_lineno)
|
@@ -3414,7 +3452,7 @@ class ObjectiveFunction(object):
|
|
3414
3452
|
# prob = 1/(1+mu*alpha)
|
3415
3453
|
try:
|
3416
3454
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
3417
|
-
|
3455
|
+
gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
3418
3456
|
|
3419
3457
|
gg = np.exp(
|
3420
3458
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
@@ -3572,21 +3610,8 @@ class ObjectiveFunction(object):
|
|
3572
3610
|
|
3573
3611
|
if dispersion == 1 or dispersion == 4: # nb
|
3574
3612
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
3575
|
-
# if b_gam < 0.8*model_nature['dispersion_penalty']:
|
3576
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
3577
|
-
|
3578
|
-
# if abs(b_gam) < 0.01:
|
3579
|
-
# penalty += 1/np.abs(b_gam)
|
3580
3613
|
|
3581
|
-
if b_gam >= 4.5:
|
3582
|
-
penalty += b_gam
|
3583
|
-
b_gam = 4.61
|
3584
|
-
# b_gam = 7.9
|
3585
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
3586
|
-
# penalty += 1/np.max((0.01,abs(b_gam)))
|
3587
|
-
# b_gam = model_nature['dispersion_penalty']
|
3588
3614
|
|
3589
|
-
"""
|
3590
3615
|
if b_gam <= 0:
|
3591
3616
|
#penalty += 100
|
3592
3617
|
#penalty += abs(b_gam)
|
@@ -3594,21 +3619,21 @@ class ObjectiveFunction(object):
|
|
3594
3619
|
#b_gam = 1
|
3595
3620
|
|
3596
3621
|
# if b_gam < 0.03:
|
3597
|
-
penalty +=
|
3622
|
+
penalty += min(1, np.abs(b_gam))
|
3598
3623
|
|
3599
|
-
b_gam = 0.
|
3624
|
+
b_gam = 0.001
|
3600
3625
|
#
|
3601
3626
|
|
3602
|
-
if b_gam >= 10:
|
3603
|
-
|
3627
|
+
#if b_gam >= 10:
|
3628
|
+
# penalty+= b_gam
|
3604
3629
|
|
3605
|
-
|
3606
|
-
b_gam = min_comp_val
|
3630
|
+
# if b_gam == 0:
|
3631
|
+
#b_gam = min_comp_val
|
3607
3632
|
#b_gam = 0.03
|
3608
3633
|
|
3609
|
-
|
3634
|
+
# b_gam = abs(b_gam)
|
3610
3635
|
|
3611
|
-
|
3636
|
+
|
3612
3637
|
|
3613
3638
|
elif dispersion == 2:
|
3614
3639
|
if b_gam >= 1:
|
@@ -3761,7 +3786,8 @@ class ObjectiveFunction(object):
|
|
3761
3786
|
elif dispersion == 1:
|
3762
3787
|
|
3763
3788
|
proba_r = self._nonlog_nbin(y, eVd, b_gam)
|
3764
|
-
|
3789
|
+
|
3790
|
+
|
3765
3791
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
3766
3792
|
# print('fuck if this actually works')
|
3767
3793
|
|
@@ -3793,7 +3819,7 @@ class ObjectiveFunction(object):
|
|
3793
3819
|
proba_p = self._prob_product_across_panels(
|
3794
3820
|
proba_r, self.panel_info)
|
3795
3821
|
proba_r = proba_p
|
3796
|
-
proba_r = np.clip(proba_r, min_comp_val,
|
3822
|
+
proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
|
3797
3823
|
loglik = np.log(proba_r)
|
3798
3824
|
return loglik
|
3799
3825
|
|
@@ -4095,9 +4121,9 @@ class ObjectiveFunction(object):
|
|
4095
4121
|
|
4096
4122
|
elif dispersion == 1:
|
4097
4123
|
|
4098
|
-
der =
|
4124
|
+
der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
|
4099
4125
|
if both:
|
4100
|
-
grad_n =
|
4126
|
+
grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
|
4101
4127
|
return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
|
4102
4128
|
neginf=-140)
|
4103
4129
|
|
@@ -4351,7 +4377,7 @@ class ObjectiveFunction(object):
|
|
4351
4377
|
P += Xd[key].shape[1]
|
4352
4378
|
Kf += Xd[key].shape[2]
|
4353
4379
|
else:
|
4354
|
-
self.naming_for_printing(betas, 1, dispersion,
|
4380
|
+
self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
|
4355
4381
|
N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
|
4356
4382
|
betas = np.array(betas)
|
4357
4383
|
Bf = betas[0:Kf] # Fixed betas
|
@@ -4381,7 +4407,7 @@ class ObjectiveFunction(object):
|
|
4381
4407
|
llf_main = self.loglik_obs(
|
4382
4408
|
y, eVd, dispersion, main_disper, lindley_disp, betas)
|
4383
4409
|
|
4384
|
-
|
4410
|
+
llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
|
4385
4411
|
|
4386
4412
|
loglik = llf_main.sum()
|
4387
4413
|
|
@@ -4420,7 +4446,7 @@ class ObjectiveFunction(object):
|
|
4420
4446
|
# Kf =0
|
4421
4447
|
betas = np.array(betas)
|
4422
4448
|
betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
|
4423
|
-
self.naming_for_printing(betas, 0, dispersion,
|
4449
|
+
self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
|
4424
4450
|
y = dev.to_gpu(y)
|
4425
4451
|
if draws is not None and draws_grouped is not None:
|
4426
4452
|
draws = np.concatenate((draws_grouped, draws), axis=1)
|
@@ -4509,11 +4535,11 @@ class ObjectiveFunction(object):
|
|
4509
4535
|
# brstd), draws_) # Get random coefficients, old method
|
4510
4536
|
Br = self._transform_rand_betas(br,
|
4511
4537
|
brstd, draws_) # Get random coefficients
|
4512
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
4538
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
4513
4539
|
self.Br = Br.copy()
|
4514
4540
|
|
4515
4541
|
else:
|
4516
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
4542
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
4517
4543
|
chol_mat = self._chol_mat(
|
4518
4544
|
len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
|
4519
4545
|
self.chol_mat = chol_mat.copy()
|
@@ -4938,12 +4964,16 @@ class ObjectiveFunction(object):
|
|
4938
4964
|
return H
|
4939
4965
|
|
4940
4966
|
def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
|
4941
|
-
|
4967
|
+
#method = 'BFGS'
|
4942
4968
|
if method == "BFGS":
|
4943
4969
|
|
4944
4970
|
try:
|
4971
|
+
argbs = list(args)
|
4945
4972
|
|
4946
|
-
|
4973
|
+
argbs[7] = True
|
4974
|
+
argsb = tuple(argbs)
|
4975
|
+
a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
|
4976
|
+
return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
|
4947
4977
|
|
4948
4978
|
except:
|
4949
4979
|
return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
|
@@ -5249,9 +5279,9 @@ class ObjectiveFunction(object):
|
|
5249
5279
|
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
5250
5280
|
"""
|
5251
5281
|
# Set defualt method
|
5252
|
-
|
5253
|
-
|
5254
|
-
|
5282
|
+
#TODO, the inital fit worked but it throws
|
5283
|
+
|
5284
|
+
|
5255
5285
|
|
5256
5286
|
sol = Solution()
|
5257
5287
|
log_ll = 10.0 ** 9
|
@@ -5266,10 +5296,7 @@ class ObjectiveFunction(object):
|
|
5266
5296
|
if self.hess_yes == False:
|
5267
5297
|
method2 = 'BFGS_2'
|
5268
5298
|
method2 = self.method_ll
|
5269
|
-
# method2 = 'BFGS_2'
|
5270
5299
|
|
5271
|
-
# method2 = 'BFGS_2'
|
5272
|
-
# method2 = 'dogleg'
|
5273
5300
|
bic = None
|
5274
5301
|
pvalue_alt = None
|
5275
5302
|
zvalues = None
|
@@ -5287,7 +5314,7 @@ class ObjectiveFunction(object):
|
|
5287
5314
|
|
5288
5315
|
dispersion_param_num = self.is_dispersion(dispersion)
|
5289
5316
|
|
5290
|
-
paramNum = self.get_param_num(dispersion)
|
5317
|
+
#paramNum = self.get_param_num(dispersion)
|
5291
5318
|
self.no_random_paramaters = 0
|
5292
5319
|
if 'XG' in mod:
|
5293
5320
|
XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
@@ -5313,7 +5340,7 @@ class ObjectiveFunction(object):
|
|
5313
5340
|
XX_test = mod.get('Xr_test')
|
5314
5341
|
|
5315
5342
|
bb = np.random.uniform(
|
5316
|
-
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num
|
5343
|
+
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
|
5317
5344
|
|
5318
5345
|
if method == 'L-BFGS-B':
|
5319
5346
|
if dispersion == 0:
|
@@ -5347,10 +5374,12 @@ class ObjectiveFunction(object):
|
|
5347
5374
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
5348
5375
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
5349
5376
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
5350
|
-
dispersion, 0, False, 0, None,
|
5377
|
+
dispersion, 0, False, 0, None, None, None, None, None,
|
5351
5378
|
mod),
|
5352
5379
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
5353
5380
|
bounds=bounds)
|
5381
|
+
print(1)
|
5382
|
+
|
5354
5383
|
|
5355
5384
|
if method2 == 'L-BFGS-B':
|
5356
5385
|
if hasattr(initial_beta.hess_inv, 'todense'):
|
@@ -5363,7 +5392,7 @@ class ObjectiveFunction(object):
|
|
5363
5392
|
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
5364
5393
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
5365
5394
|
args=(XX, y, None, None, None, None, True, True, dispersion,
|
5366
|
-
0, False, 0, None,
|
5395
|
+
0, False, 0, None, None, None, None, None, mod),
|
5367
5396
|
method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
|
5368
5397
|
|
5369
5398
|
if initial_beta is not None and not np.isnan(initial_beta['fun']):
|
@@ -5387,24 +5416,24 @@ class ObjectiveFunction(object):
|
|
5387
5416
|
loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
|
5388
5417
|
|
5389
5418
|
self.naming_for_printing(
|
5390
|
-
initial_beta['x'], 1, dispersion,
|
5419
|
+
initial_beta['x'], 1, dispersion, model_nature=mod)
|
5391
5420
|
|
5392
5421
|
if self.is_multi:
|
5393
5422
|
in_sample_mae = self.validation(
|
5394
5423
|
initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
|
5395
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5424
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
5396
5425
|
testing=0)
|
5397
5426
|
|
5398
5427
|
sol.add_objective(TRAIN=in_sample_mae)
|
5399
5428
|
MAE_out = self.validation(
|
5400
5429
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
5401
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5430
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
|
5402
5431
|
sol.add_objective(TEST=MAE_out)
|
5403
5432
|
|
5404
5433
|
if self.val_percentage >0:
|
5405
5434
|
MAE_VAL = self.validation(
|
5406
5435
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
5407
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5436
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
5408
5437
|
validation=1)
|
5409
5438
|
sol.add_objective(VAL=MAE_VAL)
|
5410
5439
|
if sol[self._obj_1] <= self.best_obj_1:
|
@@ -5509,9 +5538,6 @@ class ObjectiveFunction(object):
|
|
5509
5538
|
|
5510
5539
|
bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
|
5511
5540
|
count += 1
|
5512
|
-
|
5513
|
-
|
5514
|
-
|
5515
5541
|
elif ii < jj:
|
5516
5542
|
if bob2[count] > 0:
|
5517
5543
|
|
@@ -5584,14 +5610,14 @@ class ObjectiveFunction(object):
|
|
5584
5610
|
mod['dispersion_penalty'] = np.abs(b[-1])
|
5585
5611
|
grad_args = (
|
5586
5612
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
|
5587
|
-
None,
|
5613
|
+
None, None, draws_grouped, XG, mod)
|
5588
5614
|
# self.gradients_est_yes = (1, 1)
|
5589
5615
|
|
5590
5616
|
if draws is None and draws_hetro is not None:
|
5591
5617
|
print('hold')
|
5592
5618
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
5593
5619
|
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
5594
|
-
self.rdm_cor_fit, None,
|
5620
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
5595
5621
|
method=method2, tol=tol['ftol'],
|
5596
5622
|
options={'gtol': tol['gtol']}, bounds=bounds,
|
5597
5623
|
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
@@ -5610,7 +5636,7 @@ class ObjectiveFunction(object):
|
|
5610
5636
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
5611
5637
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
|
5612
5638
|
self.rdm_cor_fit,
|
5613
|
-
None,
|
5639
|
+
None, None, draws_grouped, XG, mod),
|
5614
5640
|
method=method2, tol=tol['ftol'],
|
5615
5641
|
options={'gtol': tol['gtol']})
|
5616
5642
|
|
@@ -5646,7 +5672,7 @@ class ObjectiveFunction(object):
|
|
5646
5672
|
|
5647
5673
|
paramNum = len(betas_est['x'])
|
5648
5674
|
self.naming_for_printing(
|
5649
|
-
betas_est['x'], 0, dispersion,
|
5675
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
5650
5676
|
|
5651
5677
|
sol.add_objective(bic=bic, aic=aic,
|
5652
5678
|
loglik=log_ll, num_parm=paramNum, GOF=other_measures)
|
@@ -5656,19 +5682,19 @@ class ObjectiveFunction(object):
|
|
5656
5682
|
try:
|
5657
5683
|
|
5658
5684
|
in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
|
5659
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5685
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
5660
5686
|
model_nature=mod, testing=0)
|
5661
5687
|
sol.add_objective(TRAIN=in_sample_mae)
|
5662
5688
|
y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
|
5663
5689
|
Xr_grouped_test = mod.get('Xrtest')
|
5664
5690
|
MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
5665
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5691
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
5666
5692
|
model_nature=mod)
|
5667
5693
|
|
5668
5694
|
sol.add_objective(TEST=MAE_test)
|
5669
|
-
if self.val_percentage >0:
|
5695
|
+
if self.val_percentage > 0:
|
5670
5696
|
MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
5671
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
5697
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
5672
5698
|
model_nature=mod, validation=1)
|
5673
5699
|
sol.add_objective(VAL=MAE_val)
|
5674
5700
|
|
@@ -6105,17 +6131,17 @@ class ObjectiveFunction(object):
|
|
6105
6131
|
if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
|
6106
6132
|
self.bic = obj_1['bic']
|
6107
6133
|
self.pvalues = pvalues
|
6108
|
-
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c",
|
6134
|
+
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
|
6109
6135
|
# todo: probably delete
|
6110
6136
|
self.naming_for_printing(
|
6111
|
-
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6137
|
+
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6112
6138
|
obj_1, model_nature)
|
6113
6139
|
else:
|
6114
6140
|
if is_delete == 0:
|
6115
6141
|
# todo: probably delete
|
6116
6142
|
self.naming_for_printing(
|
6117
6143
|
pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
6118
|
-
|
6144
|
+
obj_1, model_nature)
|
6119
6145
|
self.coeff_ = betas
|
6120
6146
|
self.stderr = stderr
|
6121
6147
|
self.zvalues = zvalues
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: metacountregressor
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.88
|
4
4
|
Summary: Extensions for a Python package for estimation of count models.
|
5
5
|
Home-page: https://github.com/zahern/CountDataEstimation
|
6
6
|
Author: Zeke Ahern
|
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
|
|
11
11
|
License-File: LICENSE.txt
|
12
12
|
Requires-Dist: numpy >=1.13.1
|
13
13
|
Requires-Dist: scipy >=1.0.0
|
14
|
+
Requires-Dist: requests
|
14
15
|
|
15
16
|
<div style="display: flex; align-items: center;">
|
16
|
-
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width:
|
17
|
+
<img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
|
17
18
|
<p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
|
18
19
|
</div>
|
19
20
|
|
21
|
+
# Tutorial also available as a jupyter notebook
|
22
|
+
[Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
|
23
|
+
|
24
|
+
The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
|
25
|
+
|
20
26
|
##### Quick Setup
|
21
27
|
The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
|
22
28
|
|
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
|
|
35
41
|
from metacountregressor.metaheuristics import (harmony_search,
|
36
42
|
differential_evolution,
|
37
43
|
simulated_annealing)
|
44
|
+
|
45
|
+
|
38
46
|
```
|
39
47
|
|
48
|
+
loaded standard packages
|
49
|
+
loaded helper
|
50
|
+
testing
|
51
|
+
|
52
|
+
|
40
53
|
#### Basic setup.
|
41
54
|
The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
|
42
55
|
|
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
|
53
66
|
|
54
67
|
#some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
|
55
68
|
arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
|
56
|
-
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "
|
69
|
+
'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
|
57
70
|
# Fit the model with metacountregressor
|
58
71
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
59
72
|
#replace with other metaheuristics if desired
|
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
|
|
71
84
|
- `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
|
72
85
|
- `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
|
73
86
|
- `instance_number`: This parameter is used to give a name to the outputs.
|
74
|
-
- `
|
87
|
+
- `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
|
75
88
|
- `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
|
76
89
|
- `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
|
77
90
|
- `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
|
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
|
|
80
93
|
|
81
94
|
|
82
95
|
|
83
|
-
###
|
96
|
+
### Example of changing the arguments:
|
84
97
|
Modify the arguments according to your preferences using the commented code as a guide.
|
85
98
|
|
86
99
|
|
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
|
|
108
121
|
|
109
122
|
|
110
123
|
```python
|
111
|
-
#Model Decisions, Specify for
|
124
|
+
#Model Decisions, Specify for initial solution that will be optimised.
|
112
125
|
manual_fit_spec = {
|
113
126
|
'fixed_terms': ['SINGLE', 'LENGTH'],
|
114
127
|
'rdm_terms': ['AADT:normal'],
|
115
|
-
'rdm_cor_terms': ['GRADEBR:
|
128
|
+
'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
|
116
129
|
'grouped_terms': [],
|
117
130
|
'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
|
118
131
|
'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
|
119
|
-
'dispersion':
|
132
|
+
'dispersion': 0
|
120
133
|
}
|
134
|
+
|
135
|
+
|
121
136
|
#Search Arguments
|
122
137
|
arguments = {
|
123
138
|
'algorithm': 'hs',
|
@@ -129,7 +144,47 @@ arguments = {
|
|
129
144
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
130
145
|
```
|
131
146
|
|
132
|
-
|
147
|
+
Setup Complete...
|
148
|
+
Benchmaking test with Seed 42
|
149
|
+
--------------------------------------------------------------------------------
|
150
|
+
Log-Likelihood: -1339.1862434675106
|
151
|
+
--------------------------------------------------------------------------------
|
152
|
+
bic: 2732.31
|
153
|
+
--------------------------------------------------------------------------------
|
154
|
+
MSE: 650856.32
|
155
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
156
|
+
| Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
|
157
|
+
+==========================+========+=======+==========+==========+============+
|
158
|
+
| LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
|
159
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
160
|
+
| SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
|
161
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
162
|
+
| GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
|
163
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
164
|
+
| CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
|
165
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
166
|
+
| Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
|
167
|
+
| Dev. normal) ) | | | | | |
|
168
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
169
|
+
| Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
|
170
|
+
| normal) ) | | | | | |
|
171
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
172
|
+
| Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
|
173
|
+
| normal) . GRADEBR (Std. | | | | | |
|
174
|
+
| Dev. normal ) | | | | | |
|
175
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
176
|
+
| main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
|
177
|
+
| group 0 | | | | | |
|
178
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
179
|
+
| ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
|
180
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
181
|
+
| main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
|
182
|
+
| group 0:normal:sd hetro | | | | | |
|
183
|
+
| group 0 | | | | | |
|
184
|
+
+--------------------------+--------+-------+----------+----------+------------+
|
185
|
+
|
186
|
+
|
187
|
+
Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
|
133
188
|
|
134
189
|
|
135
190
|
```python
|
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
|
|
137
192
|
print(results)
|
138
193
|
```
|
139
194
|
|
140
|
-
|
195
|
+
# Notes:
|
141
196
|
### Capabilities of the software include:
|
142
197
|
* Handling of Panel Data
|
143
198
|
* Support for Data Transformations
|
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
|
|
155
210
|
* Customization of Hyper-parameters to solve problems tailored to your dataset
|
156
211
|
* Out-of-the-box optimization capability using default metaheuristics
|
157
212
|
|
158
|
-
###
|
213
|
+
### Intepreting the output of the model:
|
159
214
|
A regression table is produced. The following text elements are explained:
|
160
215
|
- Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
|
161
216
|
- Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
|
162
|
-
- hetro group
|
217
|
+
- hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
|
163
218
|
- $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
|
164
219
|
|
165
220
|
|
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
|
|
211
266
|
|
212
267
|
8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
|
213
268
|
|
214
|
-
|
269
|
+
## Example: Assistance by Harmony Search
|
215
270
|
|
216
271
|
|
217
|
-
Let's
|
272
|
+
Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
|
218
273
|
|
219
274
|
|
220
275
|
|
@@ -241,27 +296,30 @@ arguments = {
|
|
241
296
|
'_max_time': 10000
|
242
297
|
}
|
243
298
|
obj_fun = ObjectiveFunction(X, y, **arguments)
|
244
|
-
|
245
299
|
results = harmony_search(obj_fun)
|
246
300
|
print(results)
|
247
301
|
```
|
248
302
|
|
303
|
+
## Paper
|
304
|
+
|
305
|
+
The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
|
306
|
+
|
249
307
|
## Contact
|
250
308
|
If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
|
251
309
|
|
252
310
|
## Citing MetaCountRegressor
|
253
311
|
Please cite MetaCountRegressor as follows:
|
254
312
|
|
255
|
-
Ahern, Z., Corry P., Paz A. (
|
313
|
+
Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
|
256
314
|
|
257
315
|
Or using BibTex as follows:
|
258
316
|
|
259
317
|
```bibtex
|
260
|
-
@misc{
|
261
|
-
author = {Zeke Ahern
|
318
|
+
@misc{Ahern2024Meta,
|
319
|
+
author = {Zeke Ahern, Paul Corry and Alexander Paz},
|
262
320
|
journal = {PyPi},
|
263
321
|
title = {metacountregressor · PyPI},
|
264
|
-
url = {https://pypi.org/project/metacountregressor/0.1.
|
265
|
-
year = {
|
322
|
+
url = {https://pypi.org/project/metacountregressor/0.1.80/},
|
323
|
+
year = {2024},
|
266
324
|
}
|
267
325
|
|
@@ -3,17 +3,17 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
3
3
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
4
4
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
5
5
|
metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
|
6
|
-
metacountregressor/main.py,sha256=
|
6
|
+
metacountregressor/main.py,sha256=RKddYRv3UKkszbWD-d2-u8yqcYeniCB5vL3vmj7am5I,16700
|
7
7
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
8
8
|
metacountregressor/metaheuristics.py,sha256=2MW3qlgs7BFbe_w64snLSKc4Y0-e_9sa3s_96rUm_SE,105887
|
9
9
|
metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
|
10
10
|
metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
|
11
11
|
metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
|
12
12
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
13
|
-
metacountregressor/solution.py,sha256=
|
13
|
+
metacountregressor/solution.py,sha256=6UFri1O62X5GGEmrhMTpi2PQdtbtbJoc02uKixfYXGo,266195
|
14
14
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
15
|
-
metacountregressor-0.1.
|
16
|
-
metacountregressor-0.1.
|
17
|
-
metacountregressor-0.1.
|
18
|
-
metacountregressor-0.1.
|
19
|
-
metacountregressor-0.1.
|
15
|
+
metacountregressor-0.1.88.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
16
|
+
metacountregressor-0.1.88.dist-info/METADATA,sha256=BLyeZoC1G7i0pMCkJBmsop3EFSg_QFYKH0nWPjWFkHE,18165
|
17
|
+
metacountregressor-0.1.88.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
|
18
|
+
metacountregressor-0.1.88.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
19
|
+
metacountregressor-0.1.88.dist-info/RECORD,,
|
File without changes
|
File without changes
|