metacountregressor 0.1.237__tar.gz → 0.1.239__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {metacountregressor-0.1.237/metacountregressor.egg-info → metacountregressor-0.1.239}/PKG-INFO +1 -1
  2. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/metaheuristics.py +3 -3
  3. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/solution.py +254 -40
  4. {metacountregressor-0.1.237 → metacountregressor-0.1.239/metacountregressor.egg-info}/PKG-INFO +1 -1
  5. metacountregressor-0.1.239/version.txt +1 -0
  6. metacountregressor-0.1.237/version.txt +0 -1
  7. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/LICENSE.txt +0 -0
  8. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/MANIFEST.in +0 -0
  9. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/README.md +0 -0
  10. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/README.rst +0 -0
  11. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/__init__.py +0 -0
  12. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/_device_cust.py +0 -0
  13. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/app_main.py +0 -0
  14. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/data_split_helper.py +0 -0
  15. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/halton.py +0 -0
  16. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/helperprocess.py +0 -0
  17. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/main.py +0 -0
  18. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/main_old.py +0 -0
  19. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/pareto_file.py +0 -0
  20. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/pareto_logger__plot.py +0 -0
  21. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/setup.py +0 -0
  22. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/single_objective_finder.py +0 -0
  23. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor/test_generated_paper2.py +0 -0
  24. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor.egg-info/SOURCES.txt +0 -0
  25. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor.egg-info/dependency_links.txt +0 -0
  26. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor.egg-info/not-zip-safe +0 -0
  27. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor.egg-info/requires.txt +0 -0
  28. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/metacountregressor.egg-info/top_level.txt +0 -0
  29. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/setup.cfg +0 -0
  30. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/setup.py +0 -0
  31. {metacountregressor-0.1.237 → metacountregressor-0.1.239}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.237
3
+ Version: 0.1.239
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -236,15 +236,15 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
236
236
  else:
237
237
  de = DifferentialEvolution(objective_function, **kwargs)
238
238
 
239
- iterations, solutions, best_solutions, best_fitness, best_struct, average_best = de.differential_evolution_run(
239
+ iterations, solutions, best_solutions, best_fitness, best_struct = de.differential_evolution_run(
240
240
  initial_slns=initial_slns, mod_init=man)
241
-
241
+ AVERAGE_BEST = st.mean(best_solutions)
242
242
  end = datetime.now()
243
243
  elapsed_time = end - start
244
244
  return DifferentialEvolutionResults(elapsed_time=elapsed_time, iteration=iterations,
245
245
  iter_solution=solutions, best_solutions=best_solutions,
246
246
  best_fitness=best_fitness,
247
- best_struct=best_struct, average_best=average_best)
247
+ best_struct=best_struct, average_best=AVERAGE_BEST)
248
248
 
249
249
 
250
250
  def simulated_annealing(objective_function, initial_slns=None, **kwargs):
@@ -33,6 +33,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
33
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
35
  import time
36
+
36
37
  try:
37
38
  from ._device_cust import device as dev
38
39
  from .pareto_file import Pareto, Solution
@@ -122,14 +123,15 @@ class ObjectiveFunction(object):
122
123
 
123
124
  def __init__(self, x_data, y_data, **kwargs):
124
125
  self.gbl_best = 1000000.0
126
+ self.run_bootstrap = kwargs.get('run_bootstrap', False)
125
127
  self.linear_regression = kwargs.get('linear_model', False)
126
- self.reg_penalty = 0
128
+ self.reg_penalty = 1
127
129
  self.power_up_ll = False
128
130
  self.nb_parma = 1
129
131
  self.bic = None
130
132
  self.other_bic = False
131
133
  self.test_flag = 1
132
- self.no_extra_param =1 #if true, fix dispersion. w
134
+ self.no_extra_param =0 #if true, fix dispersion. w
133
135
  if self.other_bic:
134
136
  print('change this to false latter ')
135
137
 
@@ -151,7 +153,7 @@ class ObjectiveFunction(object):
151
153
  self.rdm_fit = None
152
154
  self.rdm_cor_fit = None
153
155
  self.dist_fit = None
154
- self.rounding_point = kwargs.get('decimals_in_coeff', 2)
156
+ self.rounding_point = kwargs.get('decimals_in_coeff', 4)
155
157
  self.MAE = None
156
158
  self.best_obj_1 = 1000000.0
157
159
  self._obj_1 = kwargs.get('_obj_1', 'bic')
@@ -426,7 +428,7 @@ class ObjectiveFunction(object):
426
428
 
427
429
 
428
430
 
429
- self.Ndraws = kwargs.get('Ndraws', 200)
431
+ self.Ndraws = kwargs.get('Ndraws', 100)
430
432
  self.draws1 = None
431
433
  self.initial_sig = 1 # pass the test of a single model
432
434
  self.pvalue_sig_value = .1
@@ -485,8 +487,8 @@ class ObjectiveFunction(object):
485
487
  model_types = [[0, 1]] # add 2 for Generalized Poisson
486
488
  #model_types = [[0]]
487
489
 
488
- if self:
489
- model_types = [[0]]
490
+ if self.linear_regression:
491
+ model_types = [[1]]
490
492
  self.grad_yes = False
491
493
 
492
494
  print(f'Linear Model Selected: turning off gradient calculation')
@@ -494,6 +496,11 @@ class ObjectiveFunction(object):
494
496
 
495
497
  model_t_dict = {'Poisson':0,
496
498
  "NB":1}
499
+ if self.linear_regression:
500
+ # Rename key "NB" to "sigma" if it exists in the dictionary
501
+ if "NB" in model_t_dict:
502
+ model_t_dict["sigma"] = model_t_dict.pop("NB")
503
+
497
504
  # Retrieve the keys (model names) corresponding to the values in model_types
498
505
  model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
499
506
  # Print the formatted result
@@ -503,6 +510,7 @@ class ObjectiveFunction(object):
503
510
 
504
511
  self._model_type_codes = ['p', 'nb',
505
512
  'gp', "pl", ["nb-theta", 'nb-dis']]
513
+ self.update_model_type_codes()
506
514
  self._variable = [True] * len(self._discrete_values)
507
515
  self._lower_bounds = [None] * \
508
516
  len(self._discrete_values) # TODO have continus
@@ -522,7 +530,18 @@ class ObjectiveFunction(object):
522
530
 
523
531
  self.solution_analyst = None
524
532
 
533
+ def update_model_type_codes(self):
534
+ if self.linear_regression:
535
+ # Recursively update all occurrences of 'nb' to 'sigma'
536
+ def replace_nb_with_sigma(item):
537
+ if isinstance(item, list):
538
+ return [replace_nb_with_sigma(sub_item) for sub_item in item]
539
+ elif item == 'nb':
540
+ return 'sigma'
541
+ return item
525
542
 
543
+ # Update the _model_type_codes list
544
+ self._model_type_codes = replace_nb_with_sigma(self._model_type_codes)
526
545
 
527
546
 
528
547
  def over_ride_self(self, **kwargs):
@@ -584,6 +603,7 @@ class ObjectiveFunction(object):
584
603
  self.set_defined_seed(42) # Set a specific seed
585
604
 
586
605
  modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
606
+ self.significant = 1
587
607
  self.makeRegression(modified_fit) # Perform regression with the modified fit
588
608
 
589
609
 
@@ -820,6 +840,10 @@ class ObjectiveFunction(object):
820
840
 
821
841
  if dispersion == 0:
822
842
  return None
843
+ if dispersion == 1:
844
+ return np.clip(np.exp(betas[-1]),None, 2)
845
+
846
+
823
847
  elif dispersion == 2 or dispersion == 1:
824
848
  if self.no_extra_param:
825
849
  return self.nb_parma
@@ -1126,7 +1150,8 @@ class ObjectiveFunction(object):
1126
1150
  print("-" * 80)
1127
1151
 
1128
1152
  if solution is not None:
1129
- print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1153
+ if self.is_multi:
1154
+ print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1130
1155
 
1131
1156
  self.pvalues = [self.round_with_padding(
1132
1157
  x, 2) for x in self.pvalues]
@@ -1140,13 +1165,15 @@ class ObjectiveFunction(object):
1140
1165
  self.zvalues = np.append(self.zvalues, 50)
1141
1166
 
1142
1167
  elif self.coeff_[-1] < 0.25:
1143
- print(self.coeff_[-1], 'Warning Check Dispersion')
1144
- print(np.exp(self.coeff_[-1]))
1168
+ #print(self.coeff_[-1], 'Warning Check Dispersion')
1169
+ print(f'dispession is para,aters {np.exp(self.coeff_[-1])}')
1145
1170
  #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1171
+
1146
1172
 
1147
1173
 
1174
+
1175
+ self.coeff_ = self.convert_coefficients(self.coeff_, model)
1148
1176
  self.coeff_ = [self.round_with_padding(x, self.rounding_point) for x in self.coeff_]
1149
-
1150
1177
  self.stderr = [self.round_with_padding(x, 2) for x in self.stderr]
1151
1178
  self.zvalues = [self.round_with_padding(
1152
1179
  x, 2) for x in self.zvalues]
@@ -1961,7 +1988,7 @@ class ObjectiveFunction(object):
1961
1988
  subpvalues = pvalues.copy()
1962
1989
  else:
1963
1990
  slice_this_amount = self.num_dispersion_params(dispersion)
1964
- slice_this_amount = 0 # TODO handle this
1991
+
1965
1992
  if pvalues[-1] > sig_value:
1966
1993
  vio_counts += 1
1967
1994
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -3253,6 +3280,35 @@ class ObjectiveFunction(object):
3253
3280
  print('output', out)
3254
3281
  return out
3255
3282
 
3283
+ def custom_betas_to_penalise(self, params, dispersion):
3284
+ num_params = self.get_num_params()
3285
+ skip_count = sum(num_params[:2])
3286
+ betas_start = params[:skip_count]
3287
+ if dispersion:
3288
+ betas_end = params[-dispersion:]
3289
+ betas_ = np.concatenate((betas_start,betas_end))
3290
+ return betas_
3291
+ else: return betas_start
3292
+
3293
+
3294
+ def convert_coefficients(self, params, dispersion):
3295
+ num_params = self.get_num_params()
3296
+ skip_count = sum(num_params[:2])
3297
+ remain_params = num_params[2:]
3298
+ params[skip_count:skip_count+remain_params[1]] = np.abs(params[skip_count:skip_count+remain_params[1]])
3299
+ return params
3300
+
3301
+
3302
+
3303
+ def custom_penalty(self, params, penalty):
3304
+ num_params = self.get_num_params()
3305
+ skip_count = sum(num_params[:2])
3306
+
3307
+ for i in params[skip_count:-1]:
3308
+ if i < 0.25:
3309
+ penalty += self.reg_penalty*np.maximum(0, 2.25 -i)**2
3310
+ return penalty
3311
+
3256
3312
  # p is the paramaterisation GP1 is at 0
3257
3313
  def general_poisson(self, mu, y, nu, p=0): # TODO laxywhere??
3258
3314
 
@@ -3915,8 +3971,10 @@ class ObjectiveFunction(object):
3915
3971
 
3916
3972
 
3917
3973
  if dispersion:
3974
+ sigma = dispersion
3918
3975
  eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3919
3976
 
3977
+ epsilon = np.random.normal(loc=0, scale=sigma, size=eta.shape)
3920
3978
  #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3921
3979
  #print('check if this holds size')
3922
3980
  else:
@@ -3925,7 +3983,7 @@ class ObjectiveFunction(object):
3925
3983
 
3926
3984
 
3927
3985
  if linear:
3928
- eta = eta.astype('float')
3986
+ eta = eta.astype('float') +epsilon.astype('float')
3929
3987
  return eta
3930
3988
 
3931
3989
 
@@ -4591,6 +4649,41 @@ class ObjectiveFunction(object):
4591
4649
  pass
4592
4650
  """
4593
4651
  pass
4652
+ def _linear_logliklihood(self, y, eta, sigma):
4653
+ """
4654
+ Calculate the log-likelihood for a linear regression model with random parameters.
4655
+
4656
+ Parameters:
4657
+ y (np.ndarray): Observed responses (n_samples,).
4658
+ eta (np.ndarray): Predicted values (linear predictor) (n_samples, 1, n_draws).
4659
+ sigma (float): Standard deviation of the error term.
4660
+
4661
+ Returns:
4662
+ float: The log-likelihood value aggregated across all draws.
4663
+ """
4664
+ n_samples, _, n_draws = eta.shape # Number of observations and draws
4665
+
4666
+ # Repeat y to match the shape of eta
4667
+ y_repeated = np.repeat(y, n_draws, axis=2) # Shape (n_samples, 1, n_draws)
4668
+
4669
+ # Calculate residuals for each draw
4670
+ residuals = y_repeated - eta # Shape (n_samples, 1, n_draws)
4671
+
4672
+ # Calculate the residual sum of squares (RSS) for each draw
4673
+ rss = np.sum(residuals ** 2, axis=(0, 1)) # Shape (n_draws,)
4674
+
4675
+ # Log-likelihood for each draw
4676
+ log_likelihood_per_draw = (
4677
+ -0.5 * n_samples * np.log(2 * np.pi) # Constant term
4678
+ - 0.5 * n_samples * np.log(sigma**2) # Variance term
4679
+ - 0.5 * rss / sigma**2 # Residual term
4680
+ ) # Shape (n_draws,)
4681
+
4682
+ # Aggregate across draws (e.g., take the mean log-likelihood)
4683
+ log_likelihood_value = np.mean(log_likelihood_per_draw)
4684
+
4685
+ return log_likelihood_value
4686
+
4594
4687
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
4595
4688
  return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
4596
4689
  zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None, model_nature=None, kwarg=None,
@@ -4654,8 +4747,9 @@ class ObjectiveFunction(object):
4654
4747
 
4655
4748
  if self.linear_regression:
4656
4749
  # LINEAR MODEL PROCESS
4657
- mse = np.mean((y - eVd) ** 2)
4658
- return mse
4750
+ mse = self._linear_logliklihood(y, eVd, main_disper)
4751
+ #mse = np.mean((y - eVd) ** 2)
4752
+ return (-mse + penalty)*self.minimize_scaler
4659
4753
 
4660
4754
  ### GLM PROCESS ########
4661
4755
  llf_main = self.loglik_obs(
@@ -4671,7 +4765,10 @@ class ObjectiveFunction(object):
4671
4765
 
4672
4766
  loglik += 2*loglik
4673
4767
  print('am i powering up')
4674
- penalty = self.regularise_l2(betas)
4768
+
4769
+ b_pen = self.custom_betas_to_penalise(betas, dispersion)
4770
+ penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
4771
+ penalty = self.custom_penalty(betas, penalty)
4675
4772
 
4676
4773
  if not np.isreal(loglik):
4677
4774
  loglik = - 10000000.0
@@ -4888,12 +4985,24 @@ class ObjectiveFunction(object):
4888
4985
  betas_hetro_sd = None
4889
4986
 
4890
4987
  Vdr = dev.cust_einsum("njk,nkr -> njr", Xdr, Br) # (N,P,R)
4891
- if self:
4988
+ if self.linear_regression:
4892
4989
  ### LINEAR MODEL WAY #######
4893
4990
  eVd = np.clip(
4894
4991
  Vdf[:, :, None] + Vdr + Vdh + dev.np.array(offset), None, None)
4895
- mse = np.mean((y - eVd) ** 2)
4896
- return mse
4992
+ main_disper = self.get_dispersion_paramaters(betas, dispersion)
4993
+ penalty, main_disper = self._penalty_dispersion(
4994
+ dispersion, main_disper, eVd, y, penalty, model_nature)
4995
+ error_term = np.random.normal(loc=0, scale=main_disper, size=eVd.shape)
4996
+ b_pen = self.custom_betas_to_penalise(betas, dispersion)
4997
+ penalty += self.regularise_l2(b_pen) + self.regularise_l1(b_pen)
4998
+ #penalty = 0
4999
+ penalty = self.custom_penalty(betas, penalty)
5000
+ # LINEAR MODEL PROCESS
5001
+ mse = self._linear_logliklihood(y, eVd, main_disper)
5002
+ #mse = np.mean((y - eVd) ** 2)
5003
+
5004
+ return -mse + penalty
5005
+
4897
5006
 
4898
5007
  ##### GLM WAY #####
4899
5008
  eVd = dev.np.exp(np.clip(
@@ -4959,7 +5068,7 @@ class ObjectiveFunction(object):
4959
5068
  if self.power_up_ll:
4960
5069
  penalty += self.regularise_l2(betas)
4961
5070
 
4962
- penalty += self.regularise_l2(betas)
5071
+ penalty += self.regularise_l2(betas) + self.regularise_l1(betas)
4963
5072
  if not return_gradient:
4964
5073
 
4965
5074
  output = ((-loglik + penalty)*self.minimize_scaler,)
@@ -5022,6 +5131,11 @@ class ObjectiveFunction(object):
5022
5131
  else:
5023
5132
  return -self.reg_penalty*sum(np.square(betas.copy()))
5024
5133
 
5134
+ def regularise_l1(self, betas, backwards = False):
5135
+ if backwards == False:
5136
+ return self.reg_penalty*sum(np.square(betas.copy()))
5137
+ else:
5138
+ return -self.reg_penalty*sum(np.abs(betas.copy()))
5025
5139
 
5026
5140
  def _concat_gradients(self, gr_f):
5027
5141
  gr = np.concatenate((gr_f), axis=1)
@@ -5480,9 +5594,7 @@ class ObjectiveFunction(object):
5480
5594
  convergence = optim_res['success']
5481
5595
  coeff_ = optim_res['x']
5482
5596
  penalty = 0
5483
- for i in coeff_: # pvalue penalty should handle this
5484
- if abs(i) > 120:
5485
- penalty += abs(i)
5597
+
5486
5598
  if 'hess_inv' in optim_res:
5487
5599
  covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5488
5600
  if robust else optim_res['hess_inv']
@@ -5496,16 +5608,7 @@ class ObjectiveFunction(object):
5496
5608
  # stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
5497
5609
  if is_dispersion:
5498
5610
  stderr[-1] = random.uniform(0.001, 0.005)
5499
- if simple_fit == False:
5500
- # gets the number of parmas before the correlations
5501
- pre_cor_pams = sum(self.get_num_params()[:3])
5502
- # gets the number of correlated rpm
5503
- post_cor_pams = sum(self.get_num_params()[:5])
5504
-
5505
-
5506
- # this calculation takes into account the correlated rpms distinct values
5507
- for i in range(pre_cor_pams, post_cor_pams):
5508
- stderr[i] = stderr[i] / np.sqrt(sample_size)
5611
+
5509
5612
 
5510
5613
  if np.isnan(stderr).any():
5511
5614
  raise ValueError("Error: Matrix contains NaN values")
@@ -5518,6 +5621,9 @@ class ObjectiveFunction(object):
5518
5621
  optim_res['fun'] = 10.0 ** 10
5519
5622
  if self.power_up_ll:
5520
5623
  loglikelihood =-optim_res['fun']/2 - penalty
5624
+ elif self.linear_regression:
5625
+ loglikelihood= -optim_res['fun']
5626
+
5521
5627
  else:
5522
5628
  loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
5523
5629
 
@@ -5817,11 +5923,15 @@ class ObjectiveFunction(object):
5817
5923
  draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
5818
5924
 
5819
5925
  # Optimization method and options
5820
- method = self.method_ll
5926
+ method = self.method_ll if bounds is None else 'L-BFGS-B'
5821
5927
  print('updataing methods')
5822
- method = 'Nelder-Mead-BFGS'
5823
- options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
5824
5928
 
5929
+ #method = 'Nelder-Mead-BFGS'
5930
+ options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
5931
+ args=(
5932
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5933
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod
5934
+ )
5825
5935
  # Run optimization
5826
5936
  optimization_result = self._minimize(
5827
5937
  self._loglik_gradient,
@@ -5835,9 +5945,94 @@ class ObjectiveFunction(object):
5835
5945
  tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
5836
5946
  options=options
5837
5947
  )
5948
+
5949
+
5950
+
5951
+ # Run the bootstrap to calculate standard errors
5952
+ if self.run_bootstrap:
5953
+
5954
+ std_errors = self.bootstrap_std_dev(
5955
+ initial_params=optimization_result.x,
5956
+ XX=XX,
5957
+ y=y,
5958
+ dispersion=dispersion,
5959
+ bounds=bounds,
5960
+ tol=tol,
5961
+ mod=mod,
5962
+ n_bootstraps=100
5963
+ )
5964
+ self.stderr = std_errors
5965
+
5966
+
5967
+
5968
+
5838
5969
  return optimization_result
5839
5970
 
5971
+
5840
5972
 
5973
+
5974
+ def bootstrap_std_dev(self, initial_params, XX, y, dispersion, bounds, tol, mod, n_bootstraps=100):
5975
+ """
5976
+ Perform bootstrap resampling to estimate the standard deviations of the parameters.
5977
+
5978
+ Parameters:
5979
+ self: Reference to the class instance.
5980
+ initial_params: Initial parameter estimates from the optimization.
5981
+ XX: Design matrix.
5982
+ y: Observed outcomes.
5983
+ dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
5984
+ bounds: List of bounds for each parameter.
5985
+ tol: Tolerance for the optimization process (dictionary with ftol and gtol).
5986
+ mod: Dictionary containing additional data.
5987
+ n_bootstraps: Number of bootstrap resamples (default=100).
5988
+
5989
+ Returns:
5990
+ std_devs: Standard deviations of the parameter estimates (from bootstrap resampling).
5991
+ """
5992
+ # List to store parameter estimates from each bootstrap iteration
5993
+ bootstrap_estimates = []
5994
+
5995
+ # Extract design matrices and additional components from `mod`
5996
+ X, Xr, XG = mod.get('X'), mod.get('Xr'), mod.get('XG')
5997
+ distribution = mod.get('dist_fit')
5998
+
5999
+ # Prepare draws
6000
+ draws = self._prepare_draws(Xr, distribution)
6001
+ draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
6002
+
6003
+ # Perform bootstrap iterations
6004
+ for _ in range(n_bootstraps):
6005
+ # Resample data with replacement
6006
+ indices = np.random.choice(len(y), size=len(y), replace=True)
6007
+ X_resampled = X[indices]
6008
+ y_resampled = y[indices]
6009
+
6010
+ # Refit the model with resampled data
6011
+ bootstrap_result = self._minimize(
6012
+ self._loglik_gradient,
6013
+ initial_params,
6014
+ args=(
6015
+ X_resampled, y_resampled, draws, X_resampled, Xr, self.batch_size, self.grad_yes,
6016
+ self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, None, None,
6017
+ draws_grouped, XG, mod
6018
+ ),
6019
+ method=self.method_ll,
6020
+ bounds=bounds,
6021
+ tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
6022
+ options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
6023
+ )
6024
+
6025
+ # Store the parameter estimates from this bootstrap iteration
6026
+ bootstrap_estimates.append(bootstrap_result.x)
6027
+
6028
+ # Convert bootstrap parameter estimates to a NumPy array
6029
+ bootstrap_estimates = np.array(bootstrap_estimates)
6030
+
6031
+ # Compute the standard deviations of the parameter estimates
6032
+ std_devs = np.std(bootstrap_estimates, axis=0)
6033
+
6034
+ return std_devs
6035
+
5841
6036
  def _initialize_params_and_bounds(self, XX, dispersion):
5842
6037
  """Initialize parameters and set bounds for optimization."""
5843
6038
  num_params = XX.shape[2] # Number of features
@@ -5963,7 +6158,13 @@ class ObjectiveFunction(object):
5963
6158
  if dispersion == 0:
5964
6159
  return [(-30, 30) for _ in initial_params]
5965
6160
  elif dispersion == 1:
5966
- return [(-30, 30) for _ in initial_params[:-1]] + [(-1, 5)]
6161
+ num_params = self.get_num_params()
6162
+ skip_count = sum(num_params[:2])
6163
+
6164
+
6165
+ bounds = [(-3, 3) for _ in initial_params[:-1]] + [(-1, 1)]
6166
+ bounds[skip_count: -1] = [(0.02, None) for _ in bounds[skip_count: -1]]
6167
+ return bounds
5967
6168
  elif dispersion == 2:
5968
6169
  return [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
5969
6170
  else:
@@ -6024,11 +6225,16 @@ class ObjectiveFunction(object):
6024
6225
  Initial parameter array.
6025
6226
  """
6026
6227
  # Generate random initial coefficients
6027
- initial_params = np.random.uniform(-0.05, 0.05, size=num_coefficients)
6228
+ initial_params = np.random.uniform(-.1, 0.1, size=num_coefficients)
6229
+ parma_sum = sum(self.get_num_params()[:2])
6230
+
6231
+
6232
+ initial_params[parma_sum:-dispersion] =0.5
6028
6233
 
6029
6234
  # Add dispersion parameter if applicable
6030
6235
  if dispersion > 0:
6031
- initial_params = np.insert(initial_params, -1, 0.)
6236
+ initial_params[-1] = 0.0
6237
+ #initial_params[0] =3
6032
6238
 
6033
6239
  return initial_params
6034
6240
 
@@ -6047,8 +6253,9 @@ class ObjectiveFunction(object):
6047
6253
  obj_1, log_lik, betas, stderr, pvalues, zvalues, is_halton, is_delete
6048
6254
  """
6049
6255
  try:
6256
+ dispersion = mod.get('dispersion', dispersion)
6050
6257
  # Preprocessing
6051
- tol = {'ftol': 1e-8, 'gtol': 1e-6}
6258
+ tol = {'ftol': 1e-6, 'gtol': 1e-6, 'xtol': 1e-6}
6052
6259
  y, X, Xr, XG, XH = mod.get('y'), mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
6053
6260
 
6054
6261
  # Validate input data
@@ -7369,7 +7576,7 @@ class ObjectiveFunction(object):
7369
7576
  sequence.append(n_th_number)
7370
7577
  return sequence
7371
7578
 
7372
- def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=100, primes=None,
7579
+ def _generate_halton_draws(self, sample_size, n_draws, n_vars, shuffled=False, drop=10, primes=None,
7373
7580
  long=False) -> np.ndarray:
7374
7581
  """Generate Halton draws for multiple random variables using different primes as base"""
7375
7582
  if primes is None:
@@ -7398,6 +7605,7 @@ class ObjectiveFunction(object):
7398
7605
  i += 1
7399
7606
  t += 1
7400
7607
  seq = seq[drop:length + drop]
7608
+ seq = np.clip(seq, 1e-4, 1-1e-4)
7401
7609
  if shuffled:
7402
7610
  np.random.shuffle(seq)
7403
7611
  return seq
@@ -7451,6 +7659,12 @@ class ObjectiveFunction(object):
7451
7659
  (1 - x) * np.random.gamma(2, scale=theta, size=n)
7452
7660
  return b
7453
7661
 
7662
+
7663
+
7664
+
7665
+
7666
+
7667
+
7454
7668
  def _compute_derivatives(self, betas, draws, betas_std=None, distribution=None):
7455
7669
  # N, N_draws, K = len(draws)/self.Ndraws, self.Ndraws, len(self._distribution)
7456
7670
  # N, D = draws.shape[0], draws.shape[1]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.237
3
+ Version: 0.1.239
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -0,0 +1 @@
1
+ 0.1.239
@@ -1 +0,0 @@
1
- 0.1.237