metacountregressor 0.1.237__tar.gz → 0.1.241__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {metacountregressor-0.1.237/metacountregressor.egg-info → metacountregressor-0.1.241}/PKG-INFO +1 -1
  2. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/metaheuristics.py +3 -3
  3. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/solution.py +261 -42
  4. {metacountregressor-0.1.237 → metacountregressor-0.1.241/metacountregressor.egg-info}/PKG-INFO +1 -1
  5. metacountregressor-0.1.241/version.txt +1 -0
  6. metacountregressor-0.1.237/version.txt +0 -1
  7. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/LICENSE.txt +0 -0
  8. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/MANIFEST.in +0 -0
  9. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/README.md +0 -0
  10. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/README.rst +0 -0
  11. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/__init__.py +0 -0
  12. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/_device_cust.py +0 -0
  13. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/app_main.py +0 -0
  14. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/data_split_helper.py +0 -0
  15. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/halton.py +0 -0
  16. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/helperprocess.py +0 -0
  17. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/main.py +0 -0
  18. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/main_old.py +0 -0
  19. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/pareto_file.py +0 -0
  20. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/pareto_logger__plot.py +0 -0
  21. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/setup.py +0 -0
  22. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/single_objective_finder.py +0 -0
  23. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor/test_generated_paper2.py +0 -0
  24. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/SOURCES.txt +0 -0
  25. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/dependency_links.txt +0 -0
  26. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/not-zip-safe +0 -0
  27. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/requires.txt +0 -0
  28. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/metacountregressor.egg-info/top_level.txt +0 -0
  29. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/setup.cfg +0 -0
  30. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/setup.py +0 -0
  31. {metacountregressor-0.1.237 → metacountregressor-0.1.241}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.237
3
+ Version: 0.1.241
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -236,15 +236,15 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
236
236
  else:
237
237
  de = DifferentialEvolution(objective_function, **kwargs)
238
238
 
239
- iterations, solutions, best_solutions, best_fitness, best_struct, average_best = de.differential_evolution_run(
239
+ iterations, solutions, best_solutions, best_fitness, best_struct = de.differential_evolution_run(
240
240
  initial_slns=initial_slns, mod_init=man)
241
-
241
+ AVERAGE_BEST = st.mean(best_solutions)
242
242
  end = datetime.now()
243
243
  elapsed_time = end - start
244
244
  return DifferentialEvolutionResults(elapsed_time=elapsed_time, iteration=iterations,
245
245
  iter_solution=solutions, best_solutions=best_solutions,
246
246
  best_fitness=best_fitness,
247
- best_struct=best_struct, average_best=average_best)
247
+ best_struct=best_struct, average_best=AVERAGE_BEST)
248
248
 
249
249
 
250
250
  def simulated_annealing(objective_function, initial_slns=None, **kwargs):
@@ -33,6 +33,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
33
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
35
  import time
36
+
36
37
  try:
37
38
  from ._device_cust import device as dev
38
39
  from .pareto_file import Pareto, Solution
@@ -122,14 +123,15 @@ class ObjectiveFunction(object):
122
123
 
123
124
  def __init__(self, x_data, y_data, **kwargs):
124
125
  self.gbl_best = 1000000.0
126
+ self.run_bootstrap = kwargs.get('run_bootstrap', False)
125
127
  self.linear_regression = kwargs.get('linear_model', False)
126
- self.reg_penalty = 0
128
+ self.reg_penalty = 1
127
129
  self.power_up_ll = False
128
130
  self.nb_parma = 1
129
131
  self.bic = None
130
132
  self.other_bic = False
131
133
  self.test_flag = 1
132
- self.no_extra_param =1 #if true, fix dispersion. w
134
+ self.no_extra_param =0 #if true, fix dispersion. w
133
135
  if self.other_bic:
134
136
  print('change this to false latter ')
135
137
 
@@ -151,7 +153,7 @@ class ObjectiveFunction(object):
151
153
  self.rdm_fit = None
152
154
  self.rdm_cor_fit = None
153
155
  self.dist_fit = None
154
- self.rounding_point = kwargs.get('decimals_in_coeff', 2)
156
+ self.rounding_point = kwargs.get('decimals_in_coeff', 4)
155
157
  self.MAE = None
156
158
  self.best_obj_1 = 1000000.0
157
159
  self._obj_1 = kwargs.get('_obj_1', 'bic')
@@ -426,7 +428,7 @@ class ObjectiveFunction(object):
426
428
 
427
429
 
428
430
 
429
- self.Ndraws = kwargs.get('Ndraws', 200)
431
+ self.Ndraws = kwargs.get('Ndraws', 100)
430
432
  self.draws1 = None
431
433
  self.initial_sig = 1 # pass the test of a single model
432
434
  self.pvalue_sig_value = .1
@@ -449,7 +451,7 @@ class ObjectiveFunction(object):
449
451
  # define the variables
450
452
 
451
453
 
452
- self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh"])
454
+ self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh", "nil"])
453
455
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
454
456
 
455
457
  self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
@@ -485,8 +487,8 @@ class ObjectiveFunction(object):
485
487
  model_types = [[0, 1]] # add 2 for Generalized Poisson
486
488
  #model_types = [[0]]
487
489
 
488
- if self:
489
- model_types = [[0]]
490
+ if self.linear_regression:
491
+ model_types = [[1]]
490
492
  self.grad_yes = False
491
493
 
492
494
  print(f'Linear Model Selected: turning off gradient calculation')
@@ -494,6 +496,11 @@ class ObjectiveFunction(object):
494
496
 
495
497
  model_t_dict = {'Poisson':0,
496
498
  "NB":1}
499
+ if self.linear_regression:
500
+ # Rename key "NB" to "sigma" if it exists in the dictionary
501
+ if "NB" in model_t_dict:
502
+ model_t_dict["sigma"] = model_t_dict.pop("NB")
503
+
497
504
  # Retrieve the keys (model names) corresponding to the values in model_types
498
505
  model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
499
506
  # Print the formatted result
@@ -503,6 +510,7 @@ class ObjectiveFunction(object):
503
510
 
504
511
  self._model_type_codes = ['p', 'nb',
505
512
  'gp', "pl", ["nb-theta", 'nb-dis']]
513
+ self.update_model_type_codes()
506
514
  self._variable = [True] * len(self._discrete_values)
507
515
  self._lower_bounds = [None] * \
508
516
  len(self._discrete_values) # TODO have continus
@@ -522,7 +530,18 @@ class ObjectiveFunction(object):
522
530
 
523
531
  self.solution_analyst = None
524
532
 
533
+ def update_model_type_codes(self):
534
+ if self.linear_regression:
535
+ # Recursively update all occurrences of 'nb' to 'sigma'
536
+ def replace_nb_with_sigma(item):
537
+ if isinstance(item, list):
538
+ return [replace_nb_with_sigma(sub_item) for sub_item in item]
539
+ elif item == 'nb':
540
+ return 'sigma'
541
+ return item
525
542
 
543
+ # Update the _model_type_codes list
544
+ self._model_type_codes = replace_nb_with_sigma(self._model_type_codes)
526
545
 
527
546
 
528
547
  def over_ride_self(self, **kwargs):
@@ -584,6 +603,7 @@ class ObjectiveFunction(object):
584
603
  self.set_defined_seed(42) # Set a specific seed
585
604
 
586
605
  modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
606
+ self.significant = 1
587
607
  self.makeRegression(modified_fit) # Perform regression with the modified fit
588
608
 
589
609
 
@@ -820,6 +840,10 @@ class ObjectiveFunction(object):
820
840
 
821
841
  if dispersion == 0:
822
842
  return None
843
+ if dispersion == 1:
844
+ return np.clip(np.exp(betas[-1]),None, 2)
845
+
846
+
823
847
  elif dispersion == 2 or dispersion == 1:
824
848
  if self.no_extra_param:
825
849
  return self.nb_parma
@@ -1126,7 +1150,8 @@ class ObjectiveFunction(object):
1126
1150
  print("-" * 80)
1127
1151
 
1128
1152
  if solution is not None:
1129
- print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1153
+ if self.is_multi:
1154
+ print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1130
1155
 
1131
1156
  self.pvalues = [self.round_with_padding(
1132
1157
  x, 2) for x in self.pvalues]
@@ -1140,13 +1165,15 @@ class ObjectiveFunction(object):
1140
1165
  self.zvalues = np.append(self.zvalues, 50)
1141
1166
 
1142
1167
  elif self.coeff_[-1] < 0.25:
1143
- print(self.coeff_[-1], 'Warning Check Dispersion')
1144
- print(np.exp(self.coeff_[-1]))
1168
+ #print(self.coeff_[-1], 'Warning Check Dispersion')
1169
+ print(f'dispession is para,aters {np.exp(self.coeff_[-1])}')
1145
1170
  #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1171
+
1146
1172
 
1147
1173
 
1174
+
1175
+ self.coeff_ = self.convert_coefficients(self.coeff_, model)
1148
1176
  self.coeff_ = [self.round_with_padding(x, self.rounding_point) for x in self.coeff_]
1149
-
1150
1177
  self.stderr = [self.round_with_padding(x, 2) for x in self.stderr]
1151
1178
  self.zvalues = [self.round_with_padding(
1152
1179
  x, 2) for x in self.zvalues]
@@ -1549,7 +1576,12 @@ class ObjectiveFunction(object):
1549
1576
  fixed_vars, random_vars, random_var_cor) # TODO handle distrubution
1550
1577
 
1551
1578
  distributions = alpha_rdm.copy()
1552
- transformations = ['no'] * len(alpha) # todo add transformations
1579
+ if self.linear_regression:
1580
+
1581
+ transformations = ['nil'] * len(alpha) # todo add transformations
1582
+ else:
1583
+ transformations = ['no'] * len(alpha) # todo add transformations
1584
+
1553
1585
  cnt = 0
1554
1586
  joined_alpha = np.add(alpha_rdm, alpha_rdm_cor)
1555
1587
  for i, x in enumerate(joined_alpha):
@@ -1961,7 +1993,7 @@ class ObjectiveFunction(object):
1961
1993
  subpvalues = pvalues.copy()
1962
1994
  else:
1963
1995
  slice_this_amount = self.num_dispersion_params(dispersion)
1964
- slice_this_amount = 0 # TODO handle this
1996
+
1965
1997
  if pvalues[-1] > sig_value:
1966
1998
  vio_counts += 1
1967
1999
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -3253,6 +3285,35 @@ class ObjectiveFunction(object):
3253
3285
  print('output', out)
3254
3286
  return out
3255
3287
 
3288
+ def custom_betas_to_penalise(self, params, dispersion):
3289
+ num_params = self.get_num_params()
3290
+ skip_count = sum(num_params[:2])
3291
+ betas_start = params[:skip_count]
3292
+ if dispersion:
3293
+ betas_end = params[-dispersion:]
3294
+ betas_ = np.concatenate((betas_start,betas_end))
3295
+ return betas_
3296
+ else: return betas_start
3297
+
3298
+
3299
+ def convert_coefficients(self, params, dispersion):
3300
+ num_params = self.get_num_params()
3301
+ skip_count = sum(num_params[:2])
3302
+ remain_params = num_params[2:]
3303
+ params[skip_count:skip_count+remain_params[1]] = np.abs(params[skip_count:skip_count+remain_params[1]])
3304
+ return params
3305
+
3306
+
3307
+
3308
+ def custom_penalty(self, params, penalty):
3309
+ num_params = self.get_num_params()
3310
+ skip_count = sum(num_params[:2])
3311
+
3312
+ for i in params[skip_count:-1]:
3313
+ if i < 0.25:
3314
+ penalty += self.reg_penalty*np.maximum(0, 2.25 -i)**2
3315
+ return penalty
3316
+
3256
3317
  # p is the paramaterisation GP1 is at 0
3257
3318
  def general_poisson(self, mu, y, nu, p=0): # TODO laxywhere??
3258
3319
 
@@ -3915,8 +3976,10 @@ class ObjectiveFunction(object):
3915
3976
 
3916
3977
 
3917
3978
  if dispersion:
3979
+ sigma = dispersion
3918
3980
  eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3919
3981
 
3982
+ epsilon = np.random.normal(loc=0, scale=sigma, size=eta.shape)
3920
3983
  #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3921
3984
  #print('check if this holds size')
3922
3985
  else:
@@ -3925,7 +3988,7 @@ class ObjectiveFunction(object):
3925
3988
 
3926
3989
 
3927
3990
  if linear:
3928
- eta = eta.astype('float')
3991
+ eta = eta.astype('float') +epsilon.astype('float')
3929
3992
  return eta
3930
3993
 
3931
3994
 
@@ -4591,6 +4654,41 @@ class ObjectiveFunction(object):
4591
4654
  pass
4592
4655
  """
4593
4656
  pass
4657
+ def _linear_logliklihood(self, y, eta, sigma):
4658
+ """
4659
+ Calculate the log-likelihood for a linear regression model with random parameters.
4660
+
4661
+ Parameters:
4662
+ y (np.ndarray): Observed responses (n_samples,).
4663
+ eta (np.ndarray): Predicted values (linear predictor) (n_samples, 1, n_draws).
4664
+ sigma (float): Standard deviation of the error term.
4665
+
4666
+ Returns:
4667
+ float: The log-likelihood value aggregated across all draws.
4668
+ """
4669
+ n_samples, _, n_draws = eta.shape # Number of observations and draws
4670
+
4671
+ # Repeat y to match the shape of eta
4672
+ y_repeated = np.repeat(y, n_draws, axis=2) # Shape (n_samples, 1, n_draws)
4673
+
4674
+ # Calculate residuals for each draw
4675
+ residuals = y_repeated - eta # Shape (n_samples, 1, n_draws)
4676
+
4677
+ # Calculate the residual sum of squares (RSS) for each draw
4678
+ rss = np.sum(residuals ** 2, axis=(0, 1)) # Shape (n_draws,)
4679
+
4680
+ # Log-likelihood for each draw
4681
+ log_likelihood_per_draw = (
4682
+ -0.5 * n_samples * np.log(2 * np.pi) # Constant term
4683
+ - 0.5 * n_samples * np.log(sigma**2) # Variance term
4684
+ - 0.5 * rss / sigma**2 # Residual term
4685
+ ) # Shape (n_draws,)
4686
+
4687
+ # Aggregate across draws (e.g., take the mean log-likelihood)
4688
+ log_likelihood_value = np.mean(log_likelihood_per_draw)
4689
+
4690
+ return log_likelihood_value
4691
+
4594
4692
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
4595
4693
  return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
4596
4694
  zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None, model_nature=None, kwarg=None,
@@ -4654,8 +4752,9 @@ class ObjectiveFunction(object):
4654
4752
 
4655
4753
  if self.linear_regression:
4656
4754
  # LINEAR MODEL PROCESS
4657
- mse = np.mean((y - eVd) ** 2)
4658
- return mse
4755
+ mse = self._linear_logliklihood(y, eVd, main_disper)
4756
+ #mse = np.mean((y - eVd) ** 2)
4757
+ return (-mse + penalty)*self.minimize_scaler
4659
4758
 
4660
4759
  ### GLM PROCESS ########
4661
4760
  llf_main = self.loglik_obs(
@@ -4671,7 +4770,10 @@ class ObjectiveFunction(object):
4671
4770
 
4672
4771
  loglik += 2*loglik
4673
4772
  print('am i powering up')
4674
- penalty = self.regularise_l2(betas)
4773
+
4774
+ b_pen = self.custom_betas_to_penalise(betas, dispersion)
4775
+ penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
4776
+ penalty = self.custom_penalty(betas, penalty)
4675
4777
 
4676
4778
  if not np.isreal(loglik):
4677
4779
  loglik = - 10000000.0
@@ -4888,12 +4990,24 @@ class ObjectiveFunction(object):
4888
4990
  betas_hetro_sd = None
4889
4991
 
4890
4992
  Vdr = dev.cust_einsum("njk,nkr -> njr", Xdr, Br) # (N,P,R)
4891
- if self:
4993
+ if self.linear_regression:
4892
4994
  ### LINEAR MODEL WAY #######
4893
4995
  eVd = np.clip(
4894
4996
  Vdf[:, :, None] + Vdr + Vdh + dev.np.array(offset), None, None)
4895
- mse = np.mean((y - eVd) ** 2)
4896
- return mse
4997
+ main_disper = self.get_dispersion_paramaters(betas, dispersion)
4998
+ penalty, main_disper = self._penalty_dispersion(
4999
+ dispersion, main_disper, eVd, y, penalty, model_nature)
5000
+ error_term = np.random.normal(loc=0, scale=main_disper, size=eVd.shape)
5001
+ b_pen = self.custom_betas_to_penalise(betas, dispersion)
5002
+ penalty += self.regularise_l2(b_pen) + self.regularise_l1(b_pen)
5003
+ #penalty = 0
5004
+ penalty = self.custom_penalty(betas, penalty)
5005
+ # LINEAR MODEL PROCESS
5006
+ mse = self._linear_logliklihood(y, eVd, main_disper)
5007
+ #mse = np.mean((y - eVd) ** 2)
5008
+
5009
+ return -mse + penalty
5010
+
4897
5011
 
4898
5012
  ##### GLM WAY #####
4899
5013
  eVd = dev.np.exp(np.clip(
@@ -4959,7 +5073,7 @@ class ObjectiveFunction(object):
4959
5073
  if self.power_up_ll:
4960
5074
  penalty += self.regularise_l2(betas)
4961
5075
 
4962
- penalty += self.regularise_l2(betas)
5076
+ penalty += self.regularise_l2(betas) + self.regularise_l1(betas)
4963
5077
  if not return_gradient:
4964
5078
 
4965
5079
  output = ((-loglik + penalty)*self.minimize_scaler,)
@@ -5022,6 +5136,11 @@ class ObjectiveFunction(object):
5022
5136
  else:
5023
5137
  return -self.reg_penalty*sum(np.square(betas.copy()))
5024
5138
 
5139
+ def regularise_l1(self, betas, backwards = False):
5140
+ if backwards == False:
5141
+ return self.reg_penalty*sum(np.square(betas.copy()))
5142
+ else:
5143
+ return -self.reg_penalty*sum(np.abs(betas.copy()))
5025
5144
 
5026
5145
  def _concat_gradients(self, gr_f):
5027
5146
  gr = np.concatenate((gr_f), axis=1)
@@ -5480,9 +5599,7 @@ class ObjectiveFunction(object):
5480
5599
  convergence = optim_res['success']
5481
5600
  coeff_ = optim_res['x']
5482
5601
  penalty = 0
5483
- for i in coeff_: # pvalue penalty should handle this
5484
- if abs(i) > 120:
5485
- penalty += abs(i)
5602
+
5486
5603
  if 'hess_inv' in optim_res:
5487
5604
  covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5488
5605
  if robust else optim_res['hess_inv']
@@ -5496,16 +5613,7 @@ class ObjectiveFunction(object):
5496
5613
  # stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
5497
5614
  if is_dispersion:
5498
5615
  stderr[-1] = random.uniform(0.001, 0.005)
5499
- if simple_fit == False:
5500
- # gets the number of parmas before the correlations
5501
- pre_cor_pams = sum(self.get_num_params()[:3])
5502
- # gets the number of correlated rpm
5503
- post_cor_pams = sum(self.get_num_params()[:5])
5504
-
5505
-
5506
- # this calculation takes into account the correlated rpms distinct values
5507
- for i in range(pre_cor_pams, post_cor_pams):
5508
- stderr[i] = stderr[i] / np.sqrt(sample_size)
5616
+
5509
5617
 
5510
5618
  if np.isnan(stderr).any():
5511
5619
  raise ValueError("Error: Matrix contains NaN values")
@@ -5518,6 +5626,9 @@ class ObjectiveFunction(object):
5518
5626
  optim_res['fun'] = 10.0 ** 10
5519
5627
  if self.power_up_ll:
5520
5628
  loglikelihood =-optim_res['fun']/2 - penalty
5629
+ elif self.linear_regression:
5630
+ loglikelihood= -optim_res['fun']
5631
+
5521
5632
  else:
5522
5633
  loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
5523
5634
 
@@ -5817,11 +5928,15 @@ class ObjectiveFunction(object):
5817
5928
  draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
5818
5929
 
5819
5930
  # Optimization method and options
5820
- method = self.method_ll
5931
+ method = self.method_ll if bounds is None else 'L-BFGS-B'
5821
5932
  print('updataing methods')
5822
- method = 'Nelder-Mead-BFGS'
5823
- options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
5824
5933
 
5934
+ #method = 'Nelder-Mead-BFGS'
5935
+ options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
5936
+ args=(
5937
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5938
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod
5939
+ )
5825
5940
  # Run optimization
5826
5941
  optimization_result = self._minimize(
5827
5942
  self._loglik_gradient,
@@ -5835,9 +5950,94 @@ class ObjectiveFunction(object):
5835
5950
  tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
5836
5951
  options=options
5837
5952
  )
5953
+
5954
+
5955
+
5956
+ # Run the bootstrap to calculate standard errors
5957
+ if self.run_bootstrap:
5958
+
5959
+ std_errors = self.bootstrap_std_dev(
5960
+ initial_params=optimization_result.x,
5961
+ XX=XX,
5962
+ y=y,
5963
+ dispersion=dispersion,
5964
+ bounds=bounds,
5965
+ tol=tol,
5966
+ mod=mod,
5967
+ n_bootstraps=100
5968
+ )
5969
+ self.stderr = std_errors
5970
+
5971
+
5972
+
5973
+
5838
5974
  return optimization_result
5839
5975
 
5976
+
5840
5977
 
5978
+
5979
+ def bootstrap_std_dev(self, initial_params, XX, y, dispersion, bounds, tol, mod, n_bootstraps=100):
5980
+ """
5981
+ Perform bootstrap resampling to estimate the standard deviations of the parameters.
5982
+
5983
+ Parameters:
5984
+ self: Reference to the class instance.
5985
+ initial_params: Initial parameter estimates from the optimization.
5986
+ XX: Design matrix.
5987
+ y: Observed outcomes.
5988
+ dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
5989
+ bounds: List of bounds for each parameter.
5990
+ tol: Tolerance for the optimization process (dictionary with ftol and gtol).
5991
+ mod: Dictionary containing additional data.
5992
+ n_bootstraps: Number of bootstrap resamples (default=100).
5993
+
5994
+ Returns:
5995
+ std_devs: Standard deviations of the parameter estimates (from bootstrap resampling).
5996
+ """
5997
+ # List to store parameter estimates from each bootstrap iteration
5998
+ bootstrap_estimates = []
5999
+
6000
+ # Extract design matrices and additional components from `mod`
6001
+ X, Xr, XG = mod.get('X'), mod.get('Xr'), mod.get('XG')
6002
+ distribution = mod.get('dist_fit')
6003
+
6004
+ # Prepare draws
6005
+ draws = self._prepare_draws(Xr, distribution)
6006
+ draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
6007
+
6008
+ # Perform bootstrap iterations
6009
+ for _ in range(n_bootstraps):
6010
+ # Resample data with replacement
6011
+ indices = np.random.choice(len(y), size=len(y), replace=True)
6012
+ X_resampled = X[indices]
6013
+ y_resampled = y[indices]
6014
+
6015
+ # Refit the model with resampled data
6016
+ bootstrap_result = self._minimize(
6017
+ self._loglik_gradient,
6018
+ initial_params,
6019
+ args=(
6020
+ X_resampled, y_resampled, draws, X_resampled, Xr, self.batch_size, self.grad_yes,
6021
+ self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, None, None,
6022
+ draws_grouped, XG, mod
6023
+ ),
6024
+ method=self.method_ll,
6025
+ bounds=bounds,
6026
+ tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
6027
+ options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
6028
+ )
6029
+
6030
+ # Store the parameter estimates from this bootstrap iteration
6031
+ bootstrap_estimates.append(bootstrap_result.x)
6032
+
6033
+ # Convert bootstrap parameter estimates to a NumPy array
6034
+ bootstrap_estimates = np.array(bootstrap_estimates)
6035
+
6036
+ # Compute the standard deviations of the parameter estimates
6037
+ std_devs = np.std(bootstrap_estimates, axis=0)
6038
+
6039
+ return std_devs
6040
+
5841
6041
  def _initialize_params_and_bounds(self, XX, dispersion):
5842
6042
  """Initialize parameters and set bounds for optimization."""
5843
6043
  num_params = XX.shape[2] # Number of features
@@ -5963,7 +6163,13 @@ class ObjectiveFunction(object):
5963
6163
  if dispersion == 0:
5964
6164
  return [(-30, 30) for _ in initial_params]
5965
6165
  elif dispersion == 1:
5966
- return [(-30, 30) for _ in initial_params[:-1]] + [(-1, 5)]
6166
+ num_params = self.get_num_params()
6167
+ skip_count = sum(num_params[:2])
6168
+
6169
+
6170
+ bounds = [(-3, 3) for _ in initial_params[:-1]] + [(-1, 1)]
6171
+ bounds[skip_count: -1] = [(0.02, None) for _ in bounds[skip_count: -1]]
6172
+ return bounds
5967
6173
  elif dispersion == 2:
5968
6174
  return [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
5969
6175
  else:
@@ -6024,11 +6230,16 @@ class ObjectiveFunction(object):
6024
6230
  Initial parameter array.
6025
6231
  """
6026
6232
  # Generate random initial coefficients
6027
- initial_params = np.random.uniform(-0.05, 0.05, size=num_coefficients)
6233
+ initial_params = np.random.uniform(-.1, 0.1, size=num_coefficients)
6234
+ parma_sum = sum(self.get_num_params()[:2])
6235
+
6236
+
6237
+ initial_params[parma_sum:-dispersion] =0.5
6028
6238
 
6029
6239
  # Add dispersion parameter if applicable
6030
6240
  if dispersion > 0:
6031
- initial_params = np.insert(initial_params, -1, 0.)
6241
+ initial_params[-1] = 0.0
6242
+ #initial_params[0] =3
6032
6243
 
6033
6244
  return initial_params
6034
6245
 
@@ -6047,8 +6258,9 @@ class ObjectiveFunction(object):
6047
6258
  obj_1, log_lik, betas, stderr, pvalues, zvalues, is_halton, is_delete
6048
6259
  """
6049
6260
  try:
6261
+ dispersion = mod.get('dispersion', dispersion)
6050
6262
  # Preprocessing
6051
- tol = {'ftol': 1e-8, 'gtol': 1e-6}
6263
+ tol = {'ftol': 1e-6, 'gtol': 1e-6, 'xtol': 1e-6}
6052
6264
  y, X, Xr, XG, XH = mod.get('y'), mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
6053
6265
 
6054
6266
  # Validate input data
@@ -7369,7 +7581,7 @@ class ObjectiveFunction(object):
7369
7581
  sequence.append(n_th_number)
7370
7582
  return sequence
7371
7583
 
7372
- def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=100, primes=None,
7584
+ def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=10, primes=None,
7373
7585
  long=False) -> np.ndarray:
7374
7586
  """Generate Halton draws for multiple random variables using different primes as base"""
7375
7587
  if primes is None:
@@ -7398,6 +7610,7 @@ class ObjectiveFunction(object):
7398
7610
  i += 1
7399
7611
  t += 1
7400
7612
  seq = seq[drop:length + drop]
7613
+ seq = np.clip(seq, 1e-4, 1-1e-4)
7401
7614
  if shuffled:
7402
7615
  np.random.shuffle(seq)
7403
7616
  return seq
@@ -7451,6 +7664,12 @@ class ObjectiveFunction(object):
7451
7664
  (1 - x) * np.random.gamma(2, scale=theta, size=n)
7452
7665
  return b
7453
7666
 
7667
+
7668
+
7669
+
7670
+
7671
+
7672
+
7454
7673
  def _compute_derivatives(self, betas, draws, betas_std=None, distribution=None):
7455
7674
  # N, N_draws, K = len(draws)/self.Ndraws, self.Ndraws, len(self._distribution)
7456
7675
  # N, D = draws.shape[0], draws.shape[1]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.237
3
+ Version: 0.1.241
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -0,0 +1 @@
1
+ 0.1.241
@@ -1 +0,0 @@
1
- 0.1.237