metacountregressor 0.1.78__py3-none-any.whl → 0.1.93__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -29,6 +29,64 @@ def convert_df_columns_to_binary_and_wide(df):
29
29
 
30
30
 
31
31
  def main(args, **kwargs):
32
+ '''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
33
+ import statsmodels.api as sm
34
+
35
+ data = sm.datasets.sunspots.load_pandas().data
36
+ # print(data.exog)
37
+ data_exog = data['YEAR']
38
+ data_exog = sm.add_constant(data_exog)
39
+ data_endog = data['SUNACTIVITY']
40
+
41
+ # Instantiate a gamma family model with the default link function.
42
+ import numpy as np
43
+
44
+ gamma_model = sm.NegativeBinomial(data_endog, data_exog)
45
+ gamma_results = gamma_model.fit()
46
+
47
+ print(gamma_results.summary())
48
+
49
+ # NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
50
+ import metacountregressor
51
+ from importlib.metadata import version
52
+ print(version('metacountregressor'))
53
+ import pandas as pd
54
+ import numpy as np
55
+ from metacountregressor.solution import ObjectiveFunction
56
+ from metacountregressor.metaheuristics import (harmony_search,
57
+ differential_evolution,
58
+ simulated_annealing)
59
+
60
+ # Model Decisions,
61
+ manual_fit_spec = {
62
+
63
+ 'fixed_terms': ['const', 'YEAR'],
64
+ 'rdm_terms': [],
65
+ 'rdm_cor_terms': [],
66
+ 'grouped_terms': [],
67
+ 'hetro_in_means': [],
68
+ 'transformations': ['no', 'no'],
69
+ 'dispersion': 1 # Negative Binomial
70
+ }
71
+
72
+ # Arguments
73
+ arguments = {
74
+ 'algorithm': 'hs',
75
+ 'test_percentage': 0,
76
+ 'test_complexity': 6,
77
+ 'instance_number': 'name',
78
+ 'Manual_Fit': manual_fit_spec
79
+ }
80
+ obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
81
+ #exit()
82
+
83
+
84
+
85
+
86
+
87
+
88
+
89
+
32
90
  print('the args is:', args)
33
91
  print('the kwargs is', kwargs)
34
92
 
@@ -109,6 +167,16 @@ def main(args, **kwargs):
109
167
  'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
110
168
  'dispersion': 1
111
169
  }
170
+ print('overriding this delete, just want to test the NB')
171
+ manual_fit_spec = {
172
+ 'fixed_terms': ['const'],
173
+ 'rdm_terms': [],
174
+ 'rdm_cor_terms': [],
175
+ 'grouped_terms': [],
176
+ 'hetro_in_means': [],
177
+ 'transformations': ['no'],
178
+ 'dispersion': 1
179
+ }
112
180
 
113
181
  df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
114
182
  y_df = df[['FREQ']].copy() # only consider crashes
@@ -118,7 +186,7 @@ def main(args, **kwargs):
118
186
  x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
119
187
  x_df = x_df.drop(columns=['AADT', 'LENGTH'])
120
188
 
121
- if args['separate_out_factors']:
189
+ if args.get('seperate_out_factors', 0):
122
190
 
123
191
  x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
124
192
  exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
@@ -173,8 +241,8 @@ def main(args, **kwargs):
173
241
  x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
174
242
  x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
175
243
  x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
176
-
177
- except:
244
+ except Exception as e:
245
+ print(e)
178
246
  x_df = df.drop(columns=['Y']) # was dropped postcode
179
247
 
180
248
  group_grab = x_df['county']
@@ -215,7 +283,6 @@ def main(args, **kwargs):
215
283
  else:
216
284
  print('fitting manually')
217
285
  args['Manual_Fit'] = manual_fit_spec
218
-
219
286
  if args['problem_number'] == str(8) or args['problem_number'] == 8:
220
287
  print('Maine County Dataset.')
221
288
  args['group'] = 'county'
@@ -346,10 +413,8 @@ if __name__ == '__main__':
346
413
  override = True
347
414
  if override:
348
415
  print('todo turn off, in testing phase')
349
- parser.add_argument('-problem_number', default='8')
416
+ parser.add_argument('-problem_number', default='4')
350
417
  print('did it make it')
351
-
352
-
353
418
  if 'algorithm' not in args:
354
419
  parser.add_argument('-algorithm', type=str, default='hs',
355
420
  help='optimization algorithm')
@@ -370,7 +435,7 @@ if __name__ == '__main__':
370
435
  parser.print_help()
371
436
  args = vars(parser.parse_args())
372
437
  print(type(args))
373
- # TODO add in chi 2 and df in estimation and compare degrees of freedom
438
+ # TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
374
439
 
375
440
  # Print the args.
376
441
  profiler = cProfile.Profile()
@@ -47,7 +47,7 @@ np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
49
49
  # define the computation boundary limits
50
- min_comp_val = 1e-20
50
+ min_comp_val = 1e-160
51
51
  max_comp_val = 1e+200
52
52
  log_lik_min = -1e+200
53
53
  log_lik_max = 1e+200
@@ -122,8 +122,9 @@ class ObjectiveFunction(object):
122
122
 
123
123
  def __init__(self, x_data, y_data, **kwargs):
124
124
 
125
- self.reg_penalty = 1
125
+ self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
+
127
128
  self.bic = None
128
129
  self.other_bic = False
129
130
  self.test_flag = 1
@@ -131,8 +132,8 @@ class ObjectiveFunction(object):
131
132
  print('change this to false latter ')
132
133
 
133
134
  # initialize values
134
- self.constant_value = -5.5
135
- self.negative_binomial_value = 0.05
135
+ self.constant_value = 0
136
+ self.negative_binomial_value = 1
136
137
 
137
138
  self.verbose_safe = True
138
139
  self.please_print = kwargs.get('please_print', 0)
@@ -169,7 +170,7 @@ class ObjectiveFunction(object):
169
170
  self._par = 0.3
170
171
  self._mpai = 1
171
172
  self._max_imp = 100000
172
- self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
173
+ self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
173
174
  self._panels = None
174
175
  self.is_multi = True
175
176
  self.method_ll = 'Nelder-Mead-BFGS'
@@ -389,6 +390,8 @@ class ObjectiveFunction(object):
389
390
  self.initial_sig = 1 # pass the test of a single model
390
391
  self.pvalue_sig_value = .1
391
392
  self.observations = self._x_data.shape[0]
393
+ self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
394
+
392
395
  self.batch_size = None
393
396
  # open the file in the write mode
394
397
  self.grab_transforms = 0
@@ -841,8 +844,12 @@ class ObjectiveFunction(object):
841
844
 
842
845
  return ([self._model_type_codes[dispersion]])
843
846
 
844
- def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
845
- zi_fit=None, obj_1=None, model_nature=None):
847
+ def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
848
+ r'''
849
+ setup for naming of the model summary
850
+ '''
851
+
852
+
846
853
  self.name_deleter = []
847
854
  group_rpm = None
848
855
  group_dist = []
@@ -1015,7 +1022,7 @@ class ObjectiveFunction(object):
1015
1022
  signif_list = self.pvalue_asterix_add(self.pvalues)
1016
1023
  if model == 1:
1017
1024
 
1018
- self.coeff_[-1] = np.abs(self.coeff_[-1])
1025
+ self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1019
1026
  if self.coeff_[-1] < 0.25:
1020
1027
  print(self.coeff_[-1], 'Warning Check Dispersion')
1021
1028
  print(np.exp(self.coeff_[-1]))
@@ -2683,7 +2690,7 @@ class ObjectiveFunction(object):
2683
2690
  grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
2684
2691
  return gradient, grad_n
2685
2692
 
2686
- def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
2693
+ def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
2687
2694
  """
2688
2695
  Negative Binomial model score (gradient) vector of the log-likelihood
2689
2696
  Parameters
@@ -2703,9 +2710,47 @@ class ObjectiveFunction(object):
2703
2710
 
2704
2711
  """
2705
2712
 
2706
- try:
2713
+ # Calculate common terms
2714
+ '''
2715
+ n = len(y)
2716
+ n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
2717
+
2718
+ # Flatten the data since there's only one panel, simplifying the operations
2719
+ X_flat = X.reshape(n * p, d)
2720
+ y_flat = y.flatten()
2721
+ mu_flat = mu.flatten()
2722
+
2723
+ # Prepare score array
2724
+ score = np.zeros(d + 1) # +1 for alpha
2725
+
2726
+ # Compute the gradient for regression coefficients
2727
+ for j in range(d): # Exclude the last parameter (alpha)
2728
+ score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
2729
+
2730
+ # Compute the gradient for the dispersion parameter
2731
+ if obs_specific:
2732
+ # Adjust the calculation if observation-specific effects are considered
2733
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2734
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2735
+ score[-1] = np.sum(sum_terms)
2736
+ else:
2737
+ # Standard calculation
2738
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2739
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2740
+ score[-1] = np.sum(sum_terms)
2741
+ return score
2742
+ '''
2743
+ #return score
2707
2744
 
2708
- alpha = params[-1]
2745
+
2746
+
2747
+
2748
+
2749
+ try:
2750
+ if alpha is None:
2751
+ alpha = np.exp(params[-1])
2752
+ else:
2753
+ alpha = np.exp(params[-1])
2709
2754
  a1 = 1 / alpha * mu ** Q
2710
2755
  prob = a1 / (a1 + mu)
2711
2756
  exog = X
@@ -2747,7 +2792,8 @@ class ObjectiveFunction(object):
2747
2792
  return np.concatenate((dparams, dalpha),
2748
2793
  axis=1)
2749
2794
  except Exception as e:
2750
- print('in ki nb probkemng')
2795
+ print(e)
2796
+ print('NB score exception problem..')
2751
2797
  exc_type, exc_obj, exc_tb = sys.exc_info()
2752
2798
  fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
2753
2799
  print(exc_type, fname, exc_tb.tb_lineno)
@@ -3404,24 +3450,44 @@ class ObjectiveFunction(object):
3404
3450
  # if gamma <= 0.01: #min defined value for stable nb
3405
3451
  # gamma = 0.01
3406
3452
 
3453
+
3454
+
3455
+
3407
3456
  endog = y
3408
3457
  mu = lam
3409
- alpha = gamma
3410
- size = 1.0 / alpha * mu ** Q
3458
+ alpha = np.exp(gamma)
3459
+ #size = 1.0 / alpha * mu ** Q
3411
3460
  alpha_size = alpha * mu ** Q
3412
3461
  # prob = size/(size+mu)
3413
3462
  prob = alpha / (alpha + mu)
3414
3463
  # prob = 1/(1+mu*alpha)
3464
+
3465
+ '''test'''
3466
+
3467
+
3415
3468
  try:
3416
3469
  # print(np.shape(y),np.shape(size), np.shape(prob))
3417
- # gg2 = self.negbinom_pmf(alpha_size, prob, y)
3470
+ #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
3471
+ #import time
3472
+ #start_time = time.time()
3473
+
3418
3474
 
3475
+ # Measure time for negbinom_pmf
3476
+ #start_time = time.time()
3477
+ #for _ in range(10000):
3478
+
3479
+ #gg = self.negbinom_pmf(alpha_size, prob, y)
3480
+ #end_time = time.time()
3481
+ #print("Custom functieon time:", end_time - start_time)
3482
+ #start_time = time.time()
3483
+ #for _ in range(10000):
3419
3484
  gg = np.exp(
3420
3485
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3421
3486
  y + alpha) * np.log(mu + alpha))
3422
-
3423
- # gg1 = self.negbinom_pmf(alpha_size, prob, y)
3424
- # gg = nbinom.pmf(y ,alpha, prob)
3487
+ gg[np.isnan(gg)] = 1
3488
+ #gg = nbinom.pmf(y ,alpha, prob)
3489
+ #end_time = time.time()
3490
+ #print("Custom functieon time:", end_time - start_time)
3425
3491
 
3426
3492
  except Exception as e:
3427
3493
  print(e)
@@ -3492,7 +3558,7 @@ class ObjectiveFunction(object):
3492
3558
 
3493
3559
  endog = y
3494
3560
  mu = lam
3495
- alpha = gamma
3561
+ alpha = np.exp(gamma)
3496
3562
  alpha = alpha * mu ** Q
3497
3563
  size = 1 / alpha * mu ** Q # also r
3498
3564
  # self.rate_param = size
@@ -3572,21 +3638,8 @@ class ObjectiveFunction(object):
3572
3638
 
3573
3639
  if dispersion == 1 or dispersion == 4: # nb
3574
3640
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3575
- # if b_gam < 0.8*model_nature['dispersion_penalty']:
3576
- # penalty += model_nature['dispersion_penalty'] -b_gam
3577
-
3578
- # if abs(b_gam) < 0.01:
3579
- # penalty += 1/np.abs(b_gam)
3580
3641
 
3581
- if b_gam >= 4.5:
3582
- penalty += b_gam
3583
- b_gam = 4.61
3584
- # b_gam = 7.9
3585
- # penalty += model_nature['dispersion_penalty'] -b_gam
3586
- # penalty += 1/np.max((0.01,abs(b_gam)))
3587
- # b_gam = model_nature['dispersion_penalty']
3588
3642
 
3589
- """
3590
3643
  if b_gam <= 0:
3591
3644
  #penalty += 100
3592
3645
  #penalty += abs(b_gam)
@@ -3594,21 +3647,21 @@ class ObjectiveFunction(object):
3594
3647
  #b_gam = 1
3595
3648
 
3596
3649
  # if b_gam < 0.03:
3597
- penalty += 10
3650
+ penalty += min(1, np.abs(b_gam))
3598
3651
 
3599
- b_gam = 0.03
3652
+ b_gam = 0.001
3600
3653
  #
3601
3654
 
3602
- if b_gam >= 10:
3603
- penalty+= b_gam
3655
+ #if b_gam >= 10:
3656
+ # penalty+= b_gam
3604
3657
 
3605
- if b_gam == 0:
3606
- b_gam = min_comp_val
3658
+ # if b_gam == 0:
3659
+ #b_gam = min_comp_val
3607
3660
  #b_gam = 0.03
3608
3661
 
3609
- b_gam = abs(b_gam)
3662
+ # b_gam = abs(b_gam)
3610
3663
 
3611
- """
3664
+
3612
3665
 
3613
3666
  elif dispersion == 2:
3614
3667
  if b_gam >= 1:
@@ -3761,7 +3814,8 @@ class ObjectiveFunction(object):
3761
3814
  elif dispersion == 1:
3762
3815
 
3763
3816
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
3764
- # print(1)
3817
+
3818
+
3765
3819
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3766
3820
  # print('fuck if this actually works')
3767
3821
 
@@ -3793,7 +3847,7 @@ class ObjectiveFunction(object):
3793
3847
  proba_p = self._prob_product_across_panels(
3794
3848
  proba_r, self.panel_info)
3795
3849
  proba_r = proba_p
3796
- proba_r = np.clip(proba_r, min_comp_val, None)
3850
+ proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
3797
3851
  loglik = np.log(proba_r)
3798
3852
  return loglik
3799
3853
 
@@ -4095,9 +4149,9 @@ class ObjectiveFunction(object):
4095
4149
 
4096
4150
  elif dispersion == 1:
4097
4151
 
4098
- der = -self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4152
+ der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4099
4153
  if both:
4100
- grad_n = -self.NB_Score(betas, y, eVd, Xd, 0, True)
4154
+ grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
4101
4155
  return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
4102
4156
  neginf=-140)
4103
4157
 
@@ -4351,7 +4405,7 @@ class ObjectiveFunction(object):
4351
4405
  P += Xd[key].shape[1]
4352
4406
  Kf += Xd[key].shape[2]
4353
4407
  else:
4354
- self.naming_for_printing(betas, 1, dispersion, zi_fit=zi_list, model_nature=model_nature)
4408
+ self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
4355
4409
  N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
4356
4410
  betas = np.array(betas)
4357
4411
  Bf = betas[0:Kf] # Fixed betas
@@ -4381,7 +4435,7 @@ class ObjectiveFunction(object):
4381
4435
  llf_main = self.loglik_obs(
4382
4436
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4383
4437
 
4384
- # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4438
+ llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4385
4439
 
4386
4440
  loglik = llf_main.sum()
4387
4441
 
@@ -4402,14 +4456,19 @@ class ObjectiveFunction(object):
4402
4456
  if return_gradient_n:
4403
4457
  der, grad_n = self.simple_score_grad(
4404
4458
  betas, y, eVd, Xd, dispersion, both=True)
4405
- return (-loglik + penalty, -der, grad_n)
4459
+ #return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
4460
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
4461
+ return scaled_tuple
4406
4462
  else:
4407
4463
  der = self.simple_score_grad(
4408
4464
  betas, y, eVd, Xd, dispersion, both=False)
4409
-
4410
- return (-loglik + penalty, -der.ravel())
4465
+ scaled_tuple = tuple(
4466
+ x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
4467
+ return scaled_tuple
4468
+ #return (-loglik + penalty, -der.ravel())*self.minimize_scaler
4411
4469
  else:
4412
- return -loglik + penalty
4470
+
4471
+ return (-loglik + penalty)*self.minimize_scaler
4413
4472
  # Else, we have draws
4414
4473
  self.n_obs = len(y) * self.Ndraws #todo is this problematic
4415
4474
  penalty += self._penalty_betas(
@@ -4420,7 +4479,7 @@ class ObjectiveFunction(object):
4420
4479
  # Kf =0
4421
4480
  betas = np.array(betas)
4422
4481
  betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
4423
- self.naming_for_printing(betas, 0, dispersion, zi_fit=zi_list, model_nature=model_nature)
4482
+ self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
4424
4483
  y = dev.to_gpu(y)
4425
4484
  if draws is not None and draws_grouped is not None:
4426
4485
  draws = np.concatenate((draws_grouped, draws), axis=1)
@@ -4509,11 +4568,11 @@ class ObjectiveFunction(object):
4509
4568
  # brstd), draws_) # Get random coefficients, old method
4510
4569
  Br = self._transform_rand_betas(br,
4511
4570
  brstd, draws_) # Get random coefficients
4512
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4571
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4513
4572
  self.Br = Br.copy()
4514
4573
 
4515
4574
  else:
4516
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4575
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4517
4576
  chol_mat = self._chol_mat(
4518
4577
  len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
4519
4578
  self.chol_mat = chol_mat.copy()
@@ -4633,34 +4692,18 @@ class ObjectiveFunction(object):
4633
4692
  # lik = np.nan_to_num(lik, )
4634
4693
  loglik = np.log(lik)
4635
4694
  llf_main = loglik
4636
- if 'exog_infl' in model_nature:
4637
- params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
4638
- params_main = Bf
4639
- exog_infl = model_nature.get('exog_inflX')
4640
- llf_main = llf_main.ravel() # TODO test this
4641
- w = self.predict_logit_part(params_infl, exog_infl)
4642
-
4643
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4644
-
4645
- zero_idx = np.nonzero(y == 0)[0]
4646
- nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
4647
-
4648
- llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
4649
- llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4650
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4651
- loglik = llf.sum()
4652
- else:
4653
4695
 
4654
- loglik = loglik.sum()
4696
+
4697
+ loglik = loglik.sum()
4655
4698
 
4656
4699
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4657
4700
  if self.power_up_ll:
4658
4701
  penalty += self.regularise_l2(betas)
4659
- loglik = 2*loglik
4702
+
4660
4703
  penalty += self.regularise_l2(betas)
4661
4704
  if not return_gradient:
4662
4705
 
4663
- output = (-loglik + penalty,)
4706
+ output = ((-loglik + penalty)*self.minimize_scaler,)
4664
4707
  if verbose > 1:
4665
4708
  print(
4666
4709
  f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
@@ -4690,19 +4733,24 @@ class ObjectiveFunction(object):
4690
4733
  # Hinv = np.linalg.inv(H)
4691
4734
  # except Exception:
4692
4735
  # Hinv = np.linalg.pinv(H)
4693
- output = (-loglik + penalty, -grad, grad_n)
4736
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
4737
+ return scaled_tuple
4738
+ #output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
4694
4739
 
4695
- return output
4740
+ #return output
4696
4741
  else:
4742
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
4743
+ return scaled_tuple
4744
+ #output = (-loglik + penalty, -grad)*self.minimize_scaler
4697
4745
 
4698
- output = (-loglik + penalty, -grad)
4699
-
4700
- return output
4746
+ #return output
4701
4747
  except Exception as e:
4702
4748
  traceback.print_exc()
4703
4749
  print(e)
4704
4750
 
4705
-
4751
+ def minimize_function(self, loglike):
4752
+ r'Takes the logliklihood function and tranforms it to a more handed minimization function'
4753
+ return loglike/self.n_obs
4706
4754
  def print_chol_mat(self, betas):
4707
4755
  print(self.chol_mat)
4708
4756
  self.get_br_and_bstd(betas)
@@ -4938,12 +4986,16 @@ class ObjectiveFunction(object):
4938
4986
  return H
4939
4987
 
4940
4988
  def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
4941
-
4989
+ #method = 'BFGS'
4942
4990
  if method == "BFGS":
4943
4991
 
4944
4992
  try:
4993
+ argbs = list(args)
4945
4994
 
4946
- return self._bfgs(loglik_fn, x, args=args, tol=tol, **options) # @IgnoreException
4995
+ argbs[7] = True
4996
+ argsb = tuple(argbs)
4997
+ a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
4998
+ return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
4947
4999
 
4948
5000
  except:
4949
5001
  return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
@@ -5190,7 +5242,7 @@ class ObjectiveFunction(object):
5190
5242
  if self.power_up_ll:
5191
5243
  loglikelihood =-optim_res['fun']/2 - penalty
5192
5244
  else:
5193
- loglikelihood = -optim_res['fun'] - penalty
5245
+ loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
5194
5246
 
5195
5247
  # self.coeff_names = coeff_names
5196
5248
  # self.total_iter = optim_res['nit']
@@ -5249,9 +5301,9 @@ class ObjectiveFunction(object):
5249
5301
  betas_est - array. Coefficients which maximize the negative log-liklihood.
5250
5302
  """
5251
5303
  # Set defualt method
5252
- sub_zi = None
5253
- exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5254
- inf_betas = 0 if exog_infl is None else len(exog_infl)
5304
+ #TODO, the inital fit worked but it throws
5305
+
5306
+
5255
5307
 
5256
5308
  sol = Solution()
5257
5309
  log_ll = 10.0 ** 9
@@ -5266,10 +5318,7 @@ class ObjectiveFunction(object):
5266
5318
  if self.hess_yes == False:
5267
5319
  method2 = 'BFGS_2'
5268
5320
  method2 = self.method_ll
5269
- # method2 = 'BFGS_2'
5270
5321
 
5271
- # method2 = 'BFGS_2'
5272
- # method2 = 'dogleg'
5273
5322
  bic = None
5274
5323
  pvalue_alt = None
5275
5324
  zvalues = None
@@ -5287,7 +5336,7 @@ class ObjectiveFunction(object):
5287
5336
 
5288
5337
  dispersion_param_num = self.is_dispersion(dispersion)
5289
5338
 
5290
- paramNum = self.get_param_num(dispersion)
5339
+ #paramNum = self.get_param_num(dispersion)
5291
5340
  self.no_random_paramaters = 0
5292
5341
  if 'XG' in mod:
5293
5342
  XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
@@ -5313,7 +5362,7 @@ class ObjectiveFunction(object):
5313
5362
  XX_test = mod.get('Xr_test')
5314
5363
 
5315
5364
  bb = np.random.uniform(
5316
- -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num + inf_betas)
5365
+ -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
5317
5366
 
5318
5367
  if method == 'L-BFGS-B':
5319
5368
  if dispersion == 0:
@@ -5347,11 +5396,13 @@ class ObjectiveFunction(object):
5347
5396
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5348
5397
  initial_beta = self._minimize(self._loglik_gradient, bb,
5349
5398
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5350
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None,
5399
+ dispersion, 0, False, 0, None, None, None, None, None,
5351
5400
  mod),
5352
5401
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5353
5402
  bounds=bounds)
5354
5403
 
5404
+
5405
+
5355
5406
  if method2 == 'L-BFGS-B':
5356
5407
  if hasattr(initial_beta.hess_inv, 'todense'):
5357
5408
  initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
@@ -5363,7 +5414,7 @@ class ObjectiveFunction(object):
5363
5414
  if initial_beta is not None and np.isnan(initial_beta['fun']):
5364
5415
  initial_beta = self._minimize(self._loglik_gradient, bb,
5365
5416
  args=(XX, y, None, None, None, None, True, True, dispersion,
5366
- 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5417
+ 0, False, 0, None, None, None, None, None, mod),
5367
5418
  method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
5368
5419
 
5369
5420
  if initial_beta is not None and not np.isnan(initial_beta['fun']):
@@ -5387,24 +5438,24 @@ class ObjectiveFunction(object):
5387
5438
  loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
5388
5439
 
5389
5440
  self.naming_for_printing(
5390
- initial_beta['x'], 1, dispersion, zi_fit=sub_zi, model_nature=mod)
5441
+ initial_beta['x'], 1, dispersion, model_nature=mod)
5391
5442
 
5392
5443
  if self.is_multi:
5393
5444
  in_sample_mae = self.validation(
5394
5445
  initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
5395
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5446
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5396
5447
  testing=0)
5397
5448
 
5398
5449
  sol.add_objective(TRAIN=in_sample_mae)
5399
5450
  MAE_out = self.validation(
5400
5451
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5401
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0)
5452
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
5402
5453
  sol.add_objective(TEST=MAE_out)
5403
5454
 
5404
5455
  if self.val_percentage >0:
5405
5456
  MAE_VAL = self.validation(
5406
5457
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5407
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5458
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5408
5459
  validation=1)
5409
5460
  sol.add_objective(VAL=MAE_VAL)
5410
5461
  if sol[self._obj_1] <= self.best_obj_1:
@@ -5509,9 +5560,6 @@ class ObjectiveFunction(object):
5509
5560
 
5510
5561
  bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
5511
5562
  count += 1
5512
-
5513
-
5514
-
5515
5563
  elif ii < jj:
5516
5564
  if bob2[count] > 0:
5517
5565
 
@@ -5584,14 +5632,14 @@ class ObjectiveFunction(object):
5584
5632
  mod['dispersion_penalty'] = np.abs(b[-1])
5585
5633
  grad_args = (
5586
5634
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
5587
- None, exog_infl, draws_grouped, XG, mod)
5635
+ None, None, draws_grouped, XG, mod)
5588
5636
  # self.gradients_est_yes = (1, 1)
5589
5637
 
5590
5638
  if draws is None and draws_hetro is not None:
5591
5639
  print('hold')
5592
5640
  betas_est = self._minimize(self._loglik_gradient, b, args=(
5593
5641
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5594
- self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
5642
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5595
5643
  method=method2, tol=tol['ftol'],
5596
5644
  options={'gtol': tol['gtol']}, bounds=bounds,
5597
5645
  hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
@@ -5610,7 +5658,7 @@ class ObjectiveFunction(object):
5610
5658
  betas_est = self._minimize(self._loglik_gradient, b, args=(
5611
5659
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
5612
5660
  self.rdm_cor_fit,
5613
- None, exog_infl, draws_grouped, XG, mod),
5661
+ None, None, draws_grouped, XG, mod),
5614
5662
  method=method2, tol=tol['ftol'],
5615
5663
  options={'gtol': tol['gtol']})
5616
5664
 
@@ -5646,7 +5694,7 @@ class ObjectiveFunction(object):
5646
5694
 
5647
5695
  paramNum = len(betas_est['x'])
5648
5696
  self.naming_for_printing(
5649
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5697
+ betas_est['x'], 0, dispersion, model_nature=mod)
5650
5698
 
5651
5699
  sol.add_objective(bic=bic, aic=aic,
5652
5700
  loglik=log_ll, num_parm=paramNum, GOF=other_measures)
@@ -5656,19 +5704,19 @@ class ObjectiveFunction(object):
5656
5704
  try:
5657
5705
 
5658
5706
  in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
5659
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5707
+ rdm_cor_fit=self.rdm_cor_fit,
5660
5708
  model_nature=mod, testing=0)
5661
5709
  sol.add_objective(TRAIN=in_sample_mae)
5662
5710
  y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
5663
5711
  Xr_grouped_test = mod.get('Xrtest')
5664
5712
  MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
5665
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5713
+ rdm_cor_fit=self.rdm_cor_fit,
5666
5714
  model_nature=mod)
5667
5715
 
5668
5716
  sol.add_objective(TEST=MAE_test)
5669
- if self.val_percentage >0:
5717
+ if self.val_percentage > 0:
5670
5718
  MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
5671
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5719
+ rdm_cor_fit=self.rdm_cor_fit,
5672
5720
  model_nature=mod, validation=1)
5673
5721
  sol.add_objective(VAL=MAE_val)
5674
5722
 
@@ -6105,17 +6153,17 @@ class ObjectiveFunction(object):
6105
6153
  if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
6106
6154
  self.bic = obj_1['bic']
6107
6155
  self.pvalues = pvalues
6108
- if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", "zi", 'grp', 'xh']):
6156
+ if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
6109
6157
  # todo: probably delete
6110
6158
  self.naming_for_printing(
6111
- pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'], obj_1['zi_fit'],
6159
+ pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6112
6160
  obj_1, model_nature)
6113
6161
  else:
6114
6162
  if is_delete == 0:
6115
6163
  # todo: probably delete
6116
6164
  self.naming_for_printing(
6117
6165
  pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6118
- obj_1['zi_fit'], obj_1, model_nature)
6166
+ obj_1, model_nature)
6119
6167
  self.coeff_ = betas
6120
6168
  self.stderr = stderr
6121
6169
  self.zvalues = zvalues
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metacountregressor
3
- Version: 0.1.78
3
+ Version: 0.1.93
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
12
  Requires-Dist: numpy >=1.13.1
13
13
  Requires-Dist: scipy >=1.0.0
14
+ Requires-Dist: requests
14
15
 
15
16
  <div style="display: flex; align-items: center;">
16
- <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 200px; margin-right: 20px;">
17
+ <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
17
18
  <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
18
19
  </div>
19
20
 
21
+ # Tutorial also available as a jupyter notebook
22
+ [Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
23
+
24
+ The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
25
+
20
26
  ##### Quick Setup
21
27
  The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
22
28
 
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
35
41
  from metacountregressor.metaheuristics import (harmony_search,
36
42
  differential_evolution,
37
43
  simulated_annealing)
44
+
45
+
38
46
  ```
39
47
 
48
+ loaded standard packages
49
+ loaded helper
50
+ testing
51
+
52
+
40
53
  #### Basic setup.
41
54
  The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
42
55
 
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
53
66
 
54
67
  #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
55
68
  arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
56
- 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
69
+ 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
57
70
  # Fit the model with metacountregressor
58
71
  obj_fun = ObjectiveFunction(X, y, **arguments)
59
72
  #replace with other metaheuristics if desired
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
71
84
  - `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
72
85
  - `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
73
86
  - `instance_number`: This parameter is used to give a name to the outputs.
74
- - `obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
87
+ - `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
75
88
  - `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
76
89
  - `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
77
90
  - `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
80
93
 
81
94
 
82
95
 
83
- ### An Example of changing the arguments.
96
+ ### Example of changing the arguments:
84
97
  Modify the arguments according to your preferences using the commented code as a guide.
85
98
 
86
99
 
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
108
121
 
109
122
 
110
123
  ```python
111
- #Model Decisions, Specify for Intial Optimization
124
+ #Model Decisions, Specify for initial solution that will be optimised.
112
125
  manual_fit_spec = {
113
126
  'fixed_terms': ['SINGLE', 'LENGTH'],
114
127
  'rdm_terms': ['AADT:normal'],
115
- 'rdm_cor_terms': ['GRADEBR:uniform', 'CURVES:triangular'],
128
+ 'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
116
129
  'grouped_terms': [],
117
130
  'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
118
131
  'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
119
- 'dispersion': 1
132
+ 'dispersion': 0
120
133
  }
134
+
135
+
121
136
  #Search Arguments
122
137
  arguments = {
123
138
  'algorithm': 'hs',
@@ -129,7 +144,47 @@ arguments = {
129
144
  obj_fun = ObjectiveFunction(X, y, **arguments)
130
145
  ```
131
146
 
132
- simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
147
+ Setup Complete...
148
+ Benchmaking test with Seed 42
149
+ --------------------------------------------------------------------------------
150
+ Log-Likelihood: -1339.1862434675106
151
+ --------------------------------------------------------------------------------
152
+ bic: 2732.31
153
+ --------------------------------------------------------------------------------
154
+ MSE: 650856.32
155
+ +--------------------------+--------+-------+----------+----------+------------+
156
+ | Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
157
+ +==========================+========+=======+==========+==========+============+
158
+ | LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
159
+ +--------------------------+--------+-------+----------+----------+------------+
160
+ | SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
161
+ +--------------------------+--------+-------+----------+----------+------------+
162
+ | GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
163
+ +--------------------------+--------+-------+----------+----------+------------+
164
+ | CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
165
+ +--------------------------+--------+-------+----------+----------+------------+
166
+ | Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
167
+ | Dev. normal) ) | | | | | |
168
+ +--------------------------+--------+-------+----------+----------+------------+
169
+ | Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
170
+ | normal) ) | | | | | |
171
+ +--------------------------+--------+-------+----------+----------+------------+
172
+ | Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
173
+ | normal) . GRADEBR (Std. | | | | | |
174
+ | Dev. normal ) | | | | | |
175
+ +--------------------------+--------+-------+----------+----------+------------+
176
+ | main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
177
+ | group 0 | | | | | |
178
+ +--------------------------+--------+-------+----------+----------+------------+
179
+ | ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
180
+ +--------------------------+--------+-------+----------+----------+------------+
181
+ | main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
182
+ | group 0:normal:sd hetro | | | | | |
183
+ | group 0 | | | | | |
184
+ +--------------------------+--------+-------+----------+----------+------------+
185
+
186
+
187
+ Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
133
188
 
134
189
 
135
190
  ```python
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
137
192
  print(results)
138
193
  ```
139
194
 
140
- ## Notes:
195
+ # Notes:
141
196
  ### Capabilities of the software include:
142
197
  * Handling of Panel Data
143
198
  * Support for Data Transformations
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
155
210
  * Customization of Hyper-parameters to solve problems tailored to your dataset
156
211
  * Out-of-the-box optimization capability using default metaheuristics
157
212
 
158
- ### Intreting the output of the model:
213
+ ### Intepreting the output of the model:
159
214
  A regression table is produced. The following text elements are explained:
160
215
  - Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
161
216
  - Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
162
- - hetro group #: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
217
+ - hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
163
218
  - $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
164
219
 
165
220
 
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
211
266
 
212
267
  8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
213
268
 
214
- # Example
269
+ ## Example: Assistance by Harmony Search
215
270
 
216
271
 
217
- Let's start by fitting very simple models, use those model sto help and define the objectives, then perform more of an extensive search on the variables that are identified more commonly
272
+ Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
218
273
 
219
274
 
220
275
 
@@ -241,27 +296,30 @@ arguments = {
241
296
  '_max_time': 10000
242
297
  }
243
298
  obj_fun = ObjectiveFunction(X, y, **arguments)
244
-
245
299
  results = harmony_search(obj_fun)
246
300
  print(results)
247
301
  ```
248
302
 
303
+ ## Paper
304
+
305
+ The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
306
+
249
307
  ## Contact
250
308
  If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
251
309
 
252
310
  ## Citing MetaCountRegressor
253
311
  Please cite MetaCountRegressor as follows:
254
312
 
255
- Ahern, Z., Corry P., Paz A. (2023). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
313
+ Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
256
314
 
257
315
  Or using BibTex as follows:
258
316
 
259
317
  ```bibtex
260
- @misc{Ahern2023,
261
- author = {Zeke Ahern and Paul Corry and Alexander Paz},
318
+ @misc{Ahern2024Meta,
319
+ author = {Zeke Ahern, Paul Corry and Alexander Paz},
262
320
  journal = {PyPi},
263
321
  title = {metacountregressor · PyPI},
264
- url = {https://pypi.org/project/metacountregressor/0.1.47/},
265
- year = {2023},
322
+ url = {https://pypi.org/project/metacountregressor/0.1.80/},
323
+ year = {2024},
266
324
  }
267
325
 
@@ -3,17 +3,17 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
3
3
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
4
4
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
5
5
  metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
6
- metacountregressor/main.py,sha256=asQL1Gey2izglX5FOQFZOaEqzzVacRf88EuSJnCVPKs,16289
6
+ metacountregressor/main.py,sha256=7ln6YvX2Nmesw1ose7T-2BQdLfDz0XmiLnP991AgQHw,18273
7
7
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
8
8
  metacountregressor/metaheuristics.py,sha256=2MW3qlgs7BFbe_w64snLSKc4Y0-e_9sa3s_96rUm_SE,105887
9
9
  metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
10
10
  metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
11
11
  metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
12
12
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
13
- metacountregressor/solution.py,sha256=OPwMkT1seW06zBYjs3N6vil79k1CE3of7Ua1-SajG0M,265586
13
+ metacountregressor/solution.py,sha256=wigjQ4tJrMS0EvbzmRMb2JRT7s0guvPdpCXRwEWUGQg,266891
14
14
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
15
- metacountregressor-0.1.78.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
- metacountregressor-0.1.78.dist-info/METADATA,sha256=htRwRhVDIqwgapI4uen7XPxoX5EtabdmlXWpkNGK62E,14341
17
- metacountregressor-0.1.78.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
18
- metacountregressor-0.1.78.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
19
- metacountregressor-0.1.78.dist-info/RECORD,,
15
+ metacountregressor-0.1.93.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
+ metacountregressor-0.1.93.dist-info/METADATA,sha256=lxko7pOT-xFIpuqN3cUYr9hF3SIWszwVmGcfASHysOY,18165
17
+ metacountregressor-0.1.93.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
18
+ metacountregressor-0.1.93.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
19
+ metacountregressor-0.1.93.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5