metacountregressor 0.1.78__py3-none-any.whl → 0.1.88__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -109,6 +109,16 @@ def main(args, **kwargs):
109
109
  'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
110
110
  'dispersion': 1
111
111
  }
112
+ print('overriding this delete, just want to test the NB')
113
+ manual_fit_spec = {
114
+ 'fixed_terms': ['const'],
115
+ 'rdm_terms': [],
116
+ 'rdm_cor_terms': [],
117
+ 'grouped_terms': [],
118
+ 'hetro_in_means': [],
119
+ 'transformations': ['no'],
120
+ 'dispersion': 1
121
+ }
112
122
 
113
123
  df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
114
124
  y_df = df[['FREQ']].copy() # only consider crashes
@@ -118,7 +128,7 @@ def main(args, **kwargs):
118
128
  x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
119
129
  x_df = x_df.drop(columns=['AADT', 'LENGTH'])
120
130
 
121
- if args['separate_out_factors']:
131
+ if args.get('seperate_out_factors', 0):
122
132
 
123
133
  x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
124
134
  exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
@@ -173,8 +183,8 @@ def main(args, **kwargs):
173
183
  x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
174
184
  x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
175
185
  x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
176
-
177
- except:
186
+ except Exception as e:
187
+ print(e)
178
188
  x_df = df.drop(columns=['Y']) # was dropped postcode
179
189
 
180
190
  group_grab = x_df['county']
@@ -215,7 +225,6 @@ def main(args, **kwargs):
215
225
  else:
216
226
  print('fitting manually')
217
227
  args['Manual_Fit'] = manual_fit_spec
218
-
219
228
  if args['problem_number'] == str(8) or args['problem_number'] == 8:
220
229
  print('Maine County Dataset.')
221
230
  args['group'] = 'county'
@@ -346,10 +355,8 @@ if __name__ == '__main__':
346
355
  override = True
347
356
  if override:
348
357
  print('todo turn off, in testing phase')
349
- parser.add_argument('-problem_number', default='8')
358
+ parser.add_argument('-problem_number', default='4')
350
359
  print('did it make it')
351
-
352
-
353
360
  if 'algorithm' not in args:
354
361
  parser.add_argument('-algorithm', type=str, default='hs',
355
362
  help='optimization algorithm')
@@ -370,7 +377,7 @@ if __name__ == '__main__':
370
377
  parser.print_help()
371
378
  args = vars(parser.parse_args())
372
379
  print(type(args))
373
- # TODO add in chi 2 and df in estimation and compare degrees of freedom
380
+ # TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
374
381
 
375
382
  # Print the args.
376
383
  profiler = cProfile.Profile()
@@ -47,7 +47,7 @@ np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
49
49
  # define the computation boundary limits
50
- min_comp_val = 1e-20
50
+ min_comp_val = 1e-160
51
51
  max_comp_val = 1e+200
52
52
  log_lik_min = -1e+200
53
53
  log_lik_max = 1e+200
@@ -131,8 +131,8 @@ class ObjectiveFunction(object):
131
131
  print('change this to false latter ')
132
132
 
133
133
  # initialize values
134
- self.constant_value = -5.5
135
- self.negative_binomial_value = 0.05
134
+ self.constant_value = 0
135
+ self.negative_binomial_value = 1
136
136
 
137
137
  self.verbose_safe = True
138
138
  self.please_print = kwargs.get('please_print', 0)
@@ -169,7 +169,7 @@ class ObjectiveFunction(object):
169
169
  self._par = 0.3
170
170
  self._mpai = 1
171
171
  self._max_imp = 100000
172
- self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
172
+ self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
173
173
  self._panels = None
174
174
  self.is_multi = True
175
175
  self.method_ll = 'Nelder-Mead-BFGS'
@@ -841,8 +841,7 @@ class ObjectiveFunction(object):
841
841
 
842
842
  return ([self._model_type_codes[dispersion]])
843
843
 
844
- def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
845
- zi_fit=None, obj_1=None, model_nature=None):
844
+ def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
846
845
  self.name_deleter = []
847
846
  group_rpm = None
848
847
  group_dist = []
@@ -2683,7 +2682,7 @@ class ObjectiveFunction(object):
2683
2682
  grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
2684
2683
  return gradient, grad_n
2685
2684
 
2686
- def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
2685
+ def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
2687
2686
  """
2688
2687
  Negative Binomial model score (gradient) vector of the log-likelihood
2689
2688
  Parameters
@@ -2702,10 +2701,48 @@ class ObjectiveFunction(object):
2702
2701
 
2703
2702
 
2704
2703
  """
2704
+ #print('delete this later')
2705
+ if alpha is None:
2706
+ alpha = params[-1]
2707
+ # Calculate common terms
2708
+ '''
2709
+ n = len(y)
2710
+ n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
2705
2711
 
2706
- try:
2712
+ # Flatten the data since there's only one panel, simplifying the operations
2713
+ X_flat = X.reshape(n * p, d)
2714
+ y_flat = y.flatten()
2715
+ mu_flat = mu.flatten()
2707
2716
 
2708
- alpha = params[-1]
2717
+ # Prepare score array
2718
+ score = np.zeros(d + 1) # +1 for alpha
2719
+
2720
+ # Compute the gradient for regression coefficients
2721
+ for j in range(d): # Exclude the last parameter (alpha)
2722
+ score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
2723
+
2724
+ # Compute the gradient for the dispersion parameter
2725
+ if obs_specific:
2726
+ # Adjust the calculation if observation-specific effects are considered
2727
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2728
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2729
+ score[-1] = np.sum(sum_terms)
2730
+ else:
2731
+ # Standard calculation
2732
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2733
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2734
+ score[-1] = np.sum(sum_terms)
2735
+ return score
2736
+ '''
2737
+ #return score
2738
+
2739
+
2740
+
2741
+
2742
+
2743
+ try:
2744
+ if alpha is None:
2745
+ alpha = params[-1]
2709
2746
  a1 = 1 / alpha * mu ** Q
2710
2747
  prob = a1 / (a1 + mu)
2711
2748
  exog = X
@@ -2747,7 +2784,8 @@ class ObjectiveFunction(object):
2747
2784
  return np.concatenate((dparams, dalpha),
2748
2785
  axis=1)
2749
2786
  except Exception as e:
2750
- print('in ki nb probkemng')
2787
+ print(e)
2788
+ print('NB score exception problem..')
2751
2789
  exc_type, exc_obj, exc_tb = sys.exc_info()
2752
2790
  fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
2753
2791
  print(exc_type, fname, exc_tb.tb_lineno)
@@ -3414,7 +3452,7 @@ class ObjectiveFunction(object):
3414
3452
  # prob = 1/(1+mu*alpha)
3415
3453
  try:
3416
3454
  # print(np.shape(y),np.shape(size), np.shape(prob))
3417
- # gg2 = self.negbinom_pmf(alpha_size, prob, y)
3455
+ gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
3418
3456
 
3419
3457
  gg = np.exp(
3420
3458
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
@@ -3572,21 +3610,8 @@ class ObjectiveFunction(object):
3572
3610
 
3573
3611
  if dispersion == 1 or dispersion == 4: # nb
3574
3612
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3575
- # if b_gam < 0.8*model_nature['dispersion_penalty']:
3576
- # penalty += model_nature['dispersion_penalty'] -b_gam
3577
-
3578
- # if abs(b_gam) < 0.01:
3579
- # penalty += 1/np.abs(b_gam)
3580
3613
 
3581
- if b_gam >= 4.5:
3582
- penalty += b_gam
3583
- b_gam = 4.61
3584
- # b_gam = 7.9
3585
- # penalty += model_nature['dispersion_penalty'] -b_gam
3586
- # penalty += 1/np.max((0.01,abs(b_gam)))
3587
- # b_gam = model_nature['dispersion_penalty']
3588
3614
 
3589
- """
3590
3615
  if b_gam <= 0:
3591
3616
  #penalty += 100
3592
3617
  #penalty += abs(b_gam)
@@ -3594,21 +3619,21 @@ class ObjectiveFunction(object):
3594
3619
  #b_gam = 1
3595
3620
 
3596
3621
  # if b_gam < 0.03:
3597
- penalty += 10
3622
+ penalty += min(1, np.abs(b_gam))
3598
3623
 
3599
- b_gam = 0.03
3624
+ b_gam = 0.001
3600
3625
  #
3601
3626
 
3602
- if b_gam >= 10:
3603
- penalty+= b_gam
3627
+ #if b_gam >= 10:
3628
+ # penalty+= b_gam
3604
3629
 
3605
- if b_gam == 0:
3606
- b_gam = min_comp_val
3630
+ # if b_gam == 0:
3631
+ #b_gam = min_comp_val
3607
3632
  #b_gam = 0.03
3608
3633
 
3609
- b_gam = abs(b_gam)
3634
+ # b_gam = abs(b_gam)
3610
3635
 
3611
- """
3636
+
3612
3637
 
3613
3638
  elif dispersion == 2:
3614
3639
  if b_gam >= 1:
@@ -3761,7 +3786,8 @@ class ObjectiveFunction(object):
3761
3786
  elif dispersion == 1:
3762
3787
 
3763
3788
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
3764
- # print(1)
3789
+
3790
+
3765
3791
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3766
3792
  # print('fuck if this actually works')
3767
3793
 
@@ -3793,7 +3819,7 @@ class ObjectiveFunction(object):
3793
3819
  proba_p = self._prob_product_across_panels(
3794
3820
  proba_r, self.panel_info)
3795
3821
  proba_r = proba_p
3796
- proba_r = np.clip(proba_r, min_comp_val, None)
3822
+ proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
3797
3823
  loglik = np.log(proba_r)
3798
3824
  return loglik
3799
3825
 
@@ -4095,9 +4121,9 @@ class ObjectiveFunction(object):
4095
4121
 
4096
4122
  elif dispersion == 1:
4097
4123
 
4098
- der = -self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4124
+ der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4099
4125
  if both:
4100
- grad_n = -self.NB_Score(betas, y, eVd, Xd, 0, True)
4126
+ grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
4101
4127
  return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
4102
4128
  neginf=-140)
4103
4129
 
@@ -4351,7 +4377,7 @@ class ObjectiveFunction(object):
4351
4377
  P += Xd[key].shape[1]
4352
4378
  Kf += Xd[key].shape[2]
4353
4379
  else:
4354
- self.naming_for_printing(betas, 1, dispersion, zi_fit=zi_list, model_nature=model_nature)
4380
+ self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
4355
4381
  N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
4356
4382
  betas = np.array(betas)
4357
4383
  Bf = betas[0:Kf] # Fixed betas
@@ -4381,7 +4407,7 @@ class ObjectiveFunction(object):
4381
4407
  llf_main = self.loglik_obs(
4382
4408
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4383
4409
 
4384
- # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4410
+ llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4385
4411
 
4386
4412
  loglik = llf_main.sum()
4387
4413
 
@@ -4420,7 +4446,7 @@ class ObjectiveFunction(object):
4420
4446
  # Kf =0
4421
4447
  betas = np.array(betas)
4422
4448
  betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
4423
- self.naming_for_printing(betas, 0, dispersion, zi_fit=zi_list, model_nature=model_nature)
4449
+ self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
4424
4450
  y = dev.to_gpu(y)
4425
4451
  if draws is not None and draws_grouped is not None:
4426
4452
  draws = np.concatenate((draws_grouped, draws), axis=1)
@@ -4509,11 +4535,11 @@ class ObjectiveFunction(object):
4509
4535
  # brstd), draws_) # Get random coefficients, old method
4510
4536
  Br = self._transform_rand_betas(br,
4511
4537
  brstd, draws_) # Get random coefficients
4512
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4538
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4513
4539
  self.Br = Br.copy()
4514
4540
 
4515
4541
  else:
4516
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4542
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4517
4543
  chol_mat = self._chol_mat(
4518
4544
  len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
4519
4545
  self.chol_mat = chol_mat.copy()
@@ -4938,12 +4964,16 @@ class ObjectiveFunction(object):
4938
4964
  return H
4939
4965
 
4940
4966
  def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
4941
-
4967
+ #method = 'BFGS'
4942
4968
  if method == "BFGS":
4943
4969
 
4944
4970
  try:
4971
+ argbs = list(args)
4945
4972
 
4946
- return self._bfgs(loglik_fn, x, args=args, tol=tol, **options) # @IgnoreException
4973
+ argbs[7] = True
4974
+ argsb = tuple(argbs)
4975
+ a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
4976
+ return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
4947
4977
 
4948
4978
  except:
4949
4979
  return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
@@ -5249,9 +5279,9 @@ class ObjectiveFunction(object):
5249
5279
  betas_est - array. Coefficients which maximize the negative log-liklihood.
5250
5280
  """
5251
5281
  # Set defualt method
5252
- sub_zi = None
5253
- exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5254
- inf_betas = 0 if exog_infl is None else len(exog_infl)
5282
+ #TODO, the inital fit worked but it throws
5283
+
5284
+
5255
5285
 
5256
5286
  sol = Solution()
5257
5287
  log_ll = 10.0 ** 9
@@ -5266,10 +5296,7 @@ class ObjectiveFunction(object):
5266
5296
  if self.hess_yes == False:
5267
5297
  method2 = 'BFGS_2'
5268
5298
  method2 = self.method_ll
5269
- # method2 = 'BFGS_2'
5270
5299
 
5271
- # method2 = 'BFGS_2'
5272
- # method2 = 'dogleg'
5273
5300
  bic = None
5274
5301
  pvalue_alt = None
5275
5302
  zvalues = None
@@ -5287,7 +5314,7 @@ class ObjectiveFunction(object):
5287
5314
 
5288
5315
  dispersion_param_num = self.is_dispersion(dispersion)
5289
5316
 
5290
- paramNum = self.get_param_num(dispersion)
5317
+ #paramNum = self.get_param_num(dispersion)
5291
5318
  self.no_random_paramaters = 0
5292
5319
  if 'XG' in mod:
5293
5320
  XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
@@ -5313,7 +5340,7 @@ class ObjectiveFunction(object):
5313
5340
  XX_test = mod.get('Xr_test')
5314
5341
 
5315
5342
  bb = np.random.uniform(
5316
- -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num + inf_betas)
5343
+ -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
5317
5344
 
5318
5345
  if method == 'L-BFGS-B':
5319
5346
  if dispersion == 0:
@@ -5347,10 +5374,12 @@ class ObjectiveFunction(object):
5347
5374
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5348
5375
  initial_beta = self._minimize(self._loglik_gradient, bb,
5349
5376
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5350
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None,
5377
+ dispersion, 0, False, 0, None, None, None, None, None,
5351
5378
  mod),
5352
5379
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5353
5380
  bounds=bounds)
5381
+ print(1)
5382
+
5354
5383
 
5355
5384
  if method2 == 'L-BFGS-B':
5356
5385
  if hasattr(initial_beta.hess_inv, 'todense'):
@@ -5363,7 +5392,7 @@ class ObjectiveFunction(object):
5363
5392
  if initial_beta is not None and np.isnan(initial_beta['fun']):
5364
5393
  initial_beta = self._minimize(self._loglik_gradient, bb,
5365
5394
  args=(XX, y, None, None, None, None, True, True, dispersion,
5366
- 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5395
+ 0, False, 0, None, None, None, None, None, mod),
5367
5396
  method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
5368
5397
 
5369
5398
  if initial_beta is not None and not np.isnan(initial_beta['fun']):
@@ -5387,24 +5416,24 @@ class ObjectiveFunction(object):
5387
5416
  loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
5388
5417
 
5389
5418
  self.naming_for_printing(
5390
- initial_beta['x'], 1, dispersion, zi_fit=sub_zi, model_nature=mod)
5419
+ initial_beta['x'], 1, dispersion, model_nature=mod)
5391
5420
 
5392
5421
  if self.is_multi:
5393
5422
  in_sample_mae = self.validation(
5394
5423
  initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
5395
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5424
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5396
5425
  testing=0)
5397
5426
 
5398
5427
  sol.add_objective(TRAIN=in_sample_mae)
5399
5428
  MAE_out = self.validation(
5400
5429
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5401
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0)
5430
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
5402
5431
  sol.add_objective(TEST=MAE_out)
5403
5432
 
5404
5433
  if self.val_percentage >0:
5405
5434
  MAE_VAL = self.validation(
5406
5435
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5407
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5436
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5408
5437
  validation=1)
5409
5438
  sol.add_objective(VAL=MAE_VAL)
5410
5439
  if sol[self._obj_1] <= self.best_obj_1:
@@ -5509,9 +5538,6 @@ class ObjectiveFunction(object):
5509
5538
 
5510
5539
  bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
5511
5540
  count += 1
5512
-
5513
-
5514
-
5515
5541
  elif ii < jj:
5516
5542
  if bob2[count] > 0:
5517
5543
 
@@ -5584,14 +5610,14 @@ class ObjectiveFunction(object):
5584
5610
  mod['dispersion_penalty'] = np.abs(b[-1])
5585
5611
  grad_args = (
5586
5612
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
5587
- None, exog_infl, draws_grouped, XG, mod)
5613
+ None, None, draws_grouped, XG, mod)
5588
5614
  # self.gradients_est_yes = (1, 1)
5589
5615
 
5590
5616
  if draws is None and draws_hetro is not None:
5591
5617
  print('hold')
5592
5618
  betas_est = self._minimize(self._loglik_gradient, b, args=(
5593
5619
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5594
- self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
5620
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5595
5621
  method=method2, tol=tol['ftol'],
5596
5622
  options={'gtol': tol['gtol']}, bounds=bounds,
5597
5623
  hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
@@ -5610,7 +5636,7 @@ class ObjectiveFunction(object):
5610
5636
  betas_est = self._minimize(self._loglik_gradient, b, args=(
5611
5637
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
5612
5638
  self.rdm_cor_fit,
5613
- None, exog_infl, draws_grouped, XG, mod),
5639
+ None, None, draws_grouped, XG, mod),
5614
5640
  method=method2, tol=tol['ftol'],
5615
5641
  options={'gtol': tol['gtol']})
5616
5642
 
@@ -5646,7 +5672,7 @@ class ObjectiveFunction(object):
5646
5672
 
5647
5673
  paramNum = len(betas_est['x'])
5648
5674
  self.naming_for_printing(
5649
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5675
+ betas_est['x'], 0, dispersion, model_nature=mod)
5650
5676
 
5651
5677
  sol.add_objective(bic=bic, aic=aic,
5652
5678
  loglik=log_ll, num_parm=paramNum, GOF=other_measures)
@@ -5656,19 +5682,19 @@ class ObjectiveFunction(object):
5656
5682
  try:
5657
5683
 
5658
5684
  in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
5659
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5685
+ rdm_cor_fit=self.rdm_cor_fit,
5660
5686
  model_nature=mod, testing=0)
5661
5687
  sol.add_objective(TRAIN=in_sample_mae)
5662
5688
  y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
5663
5689
  Xr_grouped_test = mod.get('Xrtest')
5664
5690
  MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
5665
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5691
+ rdm_cor_fit=self.rdm_cor_fit,
5666
5692
  model_nature=mod)
5667
5693
 
5668
5694
  sol.add_objective(TEST=MAE_test)
5669
- if self.val_percentage >0:
5695
+ if self.val_percentage > 0:
5670
5696
  MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
5671
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5697
+ rdm_cor_fit=self.rdm_cor_fit,
5672
5698
  model_nature=mod, validation=1)
5673
5699
  sol.add_objective(VAL=MAE_val)
5674
5700
 
@@ -6105,17 +6131,17 @@ class ObjectiveFunction(object):
6105
6131
  if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
6106
6132
  self.bic = obj_1['bic']
6107
6133
  self.pvalues = pvalues
6108
- if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", "zi", 'grp', 'xh']):
6134
+ if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
6109
6135
  # todo: probably delete
6110
6136
  self.naming_for_printing(
6111
- pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'], obj_1['zi_fit'],
6137
+ pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6112
6138
  obj_1, model_nature)
6113
6139
  else:
6114
6140
  if is_delete == 0:
6115
6141
  # todo: probably delete
6116
6142
  self.naming_for_printing(
6117
6143
  pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6118
- obj_1['zi_fit'], obj_1, model_nature)
6144
+ obj_1, model_nature)
6119
6145
  self.coeff_ = betas
6120
6146
  self.stderr = stderr
6121
6147
  self.zvalues = zvalues
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: metacountregressor
3
- Version: 0.1.78
3
+ Version: 0.1.88
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
11
11
  License-File: LICENSE.txt
12
12
  Requires-Dist: numpy >=1.13.1
13
13
  Requires-Dist: scipy >=1.0.0
14
+ Requires-Dist: requests
14
15
 
15
16
  <div style="display: flex; align-items: center;">
16
- <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 200px; margin-right: 20px;">
17
+ <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
17
18
  <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
18
19
  </div>
19
20
 
21
+ # Tutorial also available as a jupyter notebook
22
+ [Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
23
+
24
+ The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
25
+
20
26
  ##### Quick Setup
21
27
  The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
22
28
 
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
35
41
  from metacountregressor.metaheuristics import (harmony_search,
36
42
  differential_evolution,
37
43
  simulated_annealing)
44
+
45
+
38
46
  ```
39
47
 
48
+ loaded standard packages
49
+ loaded helper
50
+ testing
51
+
52
+
40
53
  #### Basic setup.
41
54
  The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
42
55
 
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
53
66
 
54
67
  #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
55
68
  arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
56
- 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
69
+ 'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
57
70
  # Fit the model with metacountregressor
58
71
  obj_fun = ObjectiveFunction(X, y, **arguments)
59
72
  #replace with other metaheuristics if desired
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
71
84
  - `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
72
85
  - `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
73
86
  - `instance_number`: This parameter is used to give a name to the outputs.
74
- - `obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
87
+ - `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
75
88
  - `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
76
89
  - `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
77
90
  - `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
80
93
 
81
94
 
82
95
 
83
- ### An Example of changing the arguments.
96
+ ### Example of changing the arguments:
84
97
  Modify the arguments according to your preferences using the commented code as a guide.
85
98
 
86
99
 
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
108
121
 
109
122
 
110
123
  ```python
111
- #Model Decisions, Specify for Intial Optimization
124
+ #Model Decisions, Specify for initial solution that will be optimised.
112
125
  manual_fit_spec = {
113
126
  'fixed_terms': ['SINGLE', 'LENGTH'],
114
127
  'rdm_terms': ['AADT:normal'],
115
- 'rdm_cor_terms': ['GRADEBR:uniform', 'CURVES:triangular'],
128
+ 'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
116
129
  'grouped_terms': [],
117
130
  'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
118
131
  'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
119
- 'dispersion': 1
132
+ 'dispersion': 0
120
133
  }
134
+
135
+
121
136
  #Search Arguments
122
137
  arguments = {
123
138
  'algorithm': 'hs',
@@ -129,7 +144,47 @@ arguments = {
129
144
  obj_fun = ObjectiveFunction(X, y, **arguments)
130
145
  ```
131
146
 
132
- simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
147
+ Setup Complete...
148
+ Benchmaking test with Seed 42
149
+ --------------------------------------------------------------------------------
150
+ Log-Likelihood: -1339.1862434675106
151
+ --------------------------------------------------------------------------------
152
+ bic: 2732.31
153
+ --------------------------------------------------------------------------------
154
+ MSE: 650856.32
155
+ +--------------------------+--------+-------+----------+----------+------------+
156
+ | Effect | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
157
+ +==========================+========+=======+==========+==========+============+
158
+ | LENGTH | no | -0.15 | 0.01 | -12.98 | 0.00*** |
159
+ +--------------------------+--------+-------+----------+----------+------------+
160
+ | SINGLE | no | -2.46 | 0.04 | -50.00 | 0.00*** |
161
+ +--------------------------+--------+-------+----------+----------+------------+
162
+ | GRADEBR | log | 4.23 | 0.10 | 42.17 | 0.00*** |
163
+ +--------------------------+--------+-------+----------+----------+------------+
164
+ | CURVES | no | 0.51 | 0.01 | 34.78 | 0.00*** |
165
+ +--------------------------+--------+-------+----------+----------+------------+
166
+ | Chol: GRADEBR (Std. | | 2.21 | 0.00 | 50.00 | 0.00*** |
167
+ | Dev. normal) ) | | | | | |
168
+ +--------------------------+--------+-------+----------+----------+------------+
169
+ | Chol: CURVES (Std. Dev. | | -0.51 | 0.00 | -50.00 | 0.00*** |
170
+ | normal) ) | | | | | |
171
+ +--------------------------+--------+-------+----------+----------+------------+
172
+ | Chol: CURVES (Std. Dev. | no | 0.55 | 0.00 | 50.00 | 0.00*** |
173
+ | normal) . GRADEBR (Std. | | | | | |
174
+ | Dev. normal ) | | | | | |
175
+ +--------------------------+--------+-------+----------+----------+------------+
176
+ | main: MINRAD: hetro | no | -0.00 | 0.00 | -44.36 | 0.00*** |
177
+ | group 0 | | | | | |
178
+ +--------------------------+--------+-------+----------+----------+------------+
179
+ | ACCESS: hetro group 0 | | 0.68 | 0.09 | 7.68 | 0.00*** |
180
+ +--------------------------+--------+-------+----------+----------+------------+
181
+ | main: MINRAD: hetro | | -0.00 | 0.00 | -44.86 | 0.00*** |
182
+ | group 0:normal:sd hetro | | | | | |
183
+ | group 0 | | | | | |
184
+ +--------------------------+--------+-------+----------+----------+------------+
185
+
186
+
187
+ Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
133
188
 
134
189
 
135
190
  ```python
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
137
192
  print(results)
138
193
  ```
139
194
 
140
- ## Notes:
195
+ # Notes:
141
196
  ### Capabilities of the software include:
142
197
  * Handling of Panel Data
143
198
  * Support for Data Transformations
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
155
210
  * Customization of Hyper-parameters to solve problems tailored to your dataset
156
211
  * Out-of-the-box optimization capability using default metaheuristics
157
212
 
158
- ### Intreting the output of the model:
213
+ ### Intepreting the output of the model:
159
214
  A regression table is produced. The following text elements are explained:
160
215
  - Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
161
216
  - Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
162
- - hetro group #: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
217
+ - hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
163
218
  - $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
164
219
 
165
220
 
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
211
266
 
212
267
  8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
213
268
 
214
- # Example
269
+ ## Example: Assistance by Harmony Search
215
270
 
216
271
 
217
- Let's start by fitting very simple models, use those model sto help and define the objectives, then perform more of an extensive search on the variables that are identified more commonly
272
+ Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
218
273
 
219
274
 
220
275
 
@@ -241,27 +296,30 @@ arguments = {
241
296
  '_max_time': 10000
242
297
  }
243
298
  obj_fun = ObjectiveFunction(X, y, **arguments)
244
-
245
299
  results = harmony_search(obj_fun)
246
300
  print(results)
247
301
  ```
248
302
 
303
+ ## Paper
304
+
305
+ The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
306
+
249
307
  ## Contact
250
308
  If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
251
309
 
252
310
  ## Citing MetaCountRegressor
253
311
  Please cite MetaCountRegressor as follows:
254
312
 
255
- Ahern, Z., Corry P., Paz A. (2023). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
313
+ Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
256
314
 
257
315
  Or using BibTex as follows:
258
316
 
259
317
  ```bibtex
260
- @misc{Ahern2023,
261
- author = {Zeke Ahern and Paul Corry and Alexander Paz},
318
+ @misc{Ahern2024Meta,
319
+ author = {Zeke Ahern, Paul Corry and Alexander Paz},
262
320
  journal = {PyPi},
263
321
  title = {metacountregressor · PyPI},
264
- url = {https://pypi.org/project/metacountregressor/0.1.47/},
265
- year = {2023},
322
+ url = {https://pypi.org/project/metacountregressor/0.1.80/},
323
+ year = {2024},
266
324
  }
267
325
 
@@ -3,17 +3,17 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
3
3
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
4
4
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
5
5
  metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
6
- metacountregressor/main.py,sha256=asQL1Gey2izglX5FOQFZOaEqzzVacRf88EuSJnCVPKs,16289
6
+ metacountregressor/main.py,sha256=RKddYRv3UKkszbWD-d2-u8yqcYeniCB5vL3vmj7am5I,16700
7
7
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
8
8
  metacountregressor/metaheuristics.py,sha256=2MW3qlgs7BFbe_w64snLSKc4Y0-e_9sa3s_96rUm_SE,105887
9
9
  metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
10
10
  metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
11
11
  metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
12
12
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
13
- metacountregressor/solution.py,sha256=OPwMkT1seW06zBYjs3N6vil79k1CE3of7Ua1-SajG0M,265586
13
+ metacountregressor/solution.py,sha256=6UFri1O62X5GGEmrhMTpi2PQdtbtbJoc02uKixfYXGo,266195
14
14
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
15
- metacountregressor-0.1.78.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
- metacountregressor-0.1.78.dist-info/METADATA,sha256=htRwRhVDIqwgapI4uen7XPxoX5EtabdmlXWpkNGK62E,14341
17
- metacountregressor-0.1.78.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
18
- metacountregressor-0.1.78.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
19
- metacountregressor-0.1.78.dist-info/RECORD,,
15
+ metacountregressor-0.1.88.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
16
+ metacountregressor-0.1.88.dist-info/METADATA,sha256=BLyeZoC1G7i0pMCkJBmsop3EFSg_QFYKH0nWPjWFkHE,18165
17
+ metacountregressor-0.1.88.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
18
+ metacountregressor-0.1.88.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
19
+ metacountregressor-0.1.88.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (70.3.0)
2
+ Generator: setuptools (72.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5