metacountregressor 1.0.10__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {metacountregressor-1.0.10/metacountregressor.egg-info → metacountregressor-1.0.11}/PKG-INFO +2 -2
  2. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/main.py +2 -2
  3. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/solution.py +102 -154
  4. {metacountregressor-1.0.10 → metacountregressor-1.0.11/metacountregressor.egg-info}/PKG-INFO +2 -2
  5. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/setup.py +1 -1
  6. metacountregressor-1.0.11/version.txt +1 -0
  7. metacountregressor-1.0.10/version.txt +0 -1
  8. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/LICENSE.txt +0 -0
  9. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/MANIFEST.in +0 -0
  10. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/README.md +0 -0
  11. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/README.rst +0 -0
  12. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/__init__.py +0 -0
  13. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/_device_cust.py +0 -0
  14. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/app_main.py +0 -0
  15. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/data_split_helper.py +0 -0
  16. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/halton.py +0 -0
  17. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/helperprocess.py +0 -0
  18. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/main_old.py +0 -0
  19. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/metaheuristics.py +0 -0
  20. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/pareto_file.py +0 -0
  21. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/pareto_logger__plot.py +0 -0
  22. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/setup.py +0 -0
  23. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/single_objective_finder.py +0 -0
  24. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/test_code.py +0 -0
  25. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor/test_generated_paper2.py +0 -0
  26. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor.egg-info/SOURCES.txt +0 -0
  27. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor.egg-info/dependency_links.txt +0 -0
  28. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor.egg-info/not-zip-safe +0 -0
  29. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor.egg-info/requires.txt +0 -0
  30. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/metacountregressor.egg-info/top_level.txt +0 -0
  31. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/setup.cfg +0 -0
  32. {metacountregressor-1.0.10 → metacountregressor-1.0.11}/tests/test.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metacountregressor
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
- Author: Zeke Ahern
6
+ Author: Zeke Ahern, Alexander Paz
7
7
  Author-email: z.ahern@qut.edu.au
8
8
  License: MIT
9
9
  Requires-Python: >=3.10
@@ -195,12 +195,12 @@ def main(args, **kwargs):
195
195
 
196
196
  args = {'algorithm': 'hs', 'test_percentage': 0, 'test_complexity': 2, 'instance_number': 1,
197
197
  'val_percentage': 0, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions': a_des,
198
- 'is_multi': False, 'grad_est': False, 'non_sig_prints':True, 'model_types': [[0]], 'run_bootstrap':0}
198
+ 'is_multi': False, 'grad_est': True, 'non_sig_prints':True, 'model_types': [[0]], 'run_bootstrap':0, 'r_nu_hess':0, '_transformations': ["no", "no", "nil", 'log']}
199
199
  # Fit the model with metacountregressor
200
200
  # Step 5: Transform the dataset based on the configuration
201
201
  #data_new = helperprocess.transform_dataframe(dataset, config)
202
202
  y = df[['Y']]
203
- X = df.drop(columns=['Y'])
203
+ X = df.drop(columns=['Y', 'ID', 'TRAIN', 'MXMEDSH', 'DECLANES', 'DOUBLE', 'INTECHAG', 'MINRAD', 'PEAKHR', 'AVESNOW', 'FC', 'SINGLE', 'WIDTH', 'MEDWIDTH', 'CURVES', 'URB', 'ADTLANE', 'GRADEBR', 'SLOPE', 'MIMEDSH', 'TANGENT', 'AVEPRE', 'ACCESS'])
204
204
  obj_fun = ObjectiveFunction(X, y, **args)
205
205
  # replace with other metaheuristics if desired
206
206
  results = harmony_search(obj_fun)
@@ -128,7 +128,7 @@ class ObjectiveFunction(object):
128
128
  self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
129
129
  self.run_bootstrap = kwargs.get('run_bootstrap', False)
130
130
  self.linear_regression = kwargs.get('linear_model', False)
131
- self.reg_penalty = kwargs.get('reg_penalty', 1)
131
+ self.reg_penalty = kwargs.get('reg_penalty', 0)
132
132
  self.power_up_ll = False
133
133
  self.nb_parma = 1
134
134
  self.bic = None
@@ -435,7 +435,7 @@ class ObjectiveFunction(object):
435
435
  self.pvalue_sig_value = kwargs.get('pvalue_sig_value', .1)
436
436
  self.observations = self._x_data.shape[0]
437
437
  self.minimize_scaler = 1 / self.observations # scale the minimization function to the observations
438
-
438
+ self.minimize_scaler =1
439
439
  self.batch_size = None
440
440
  # open the file in the write mode
441
441
  self.grab_transforms = 0
@@ -3610,68 +3610,7 @@ class ObjectiveFunction(object):
3610
3610
  print(exc_type, fname, exc_tb.tb_lineno)
3611
3611
  raise Exception
3612
3612
 
3613
- def NB_score_lindley(self, params, y, mu, X, Q=0, obs_specific=False):
3614
- """
3615
- Calculate the score (gradient) vector of the Negative Binomial-Lindley log-likelihood
3616
- Parameters
3617
- ----------
3618
- params : array_like
3619
- The parameters of the model
3620
- params[-1]: is the dispersion parameter
3621
- y: array_like
3622
- Vector of true counts N long
3623
- mu: array_like
3624
- Vector of predicted counts N long
3625
- X: array_like
3626
- Matrix of explanatory variables len N* (D-1)
3627
- a: float or None, optional
3628
- Optional parameter, if not None the function calculates the score for the NB-Lindley model with Lindley parameter a,
3629
- otherwise, it calculates the score for the Negative Binomial model.
3630
- Returns
3631
- -------
3632
- score : ndarray, 1-D
3633
- The score vector of the model, i.e. the first derivative of the
3634
- loglikelihood function, evaluated at `params`
3635
- """
3636
-
3637
- alpha = params[-1]
3638
- a = params[-2]
3639
- a1 = 1 / alpha * mu
3640
- prob = a1 / (a1 + mu)
3641
- exog = X
3642
-
3643
- # Calculate the score of the Negative Binomial model
3644
- dgpart = sc.digamma(y + alpha * mu) - sc.digamma(alpha * mu)
3645
- dparams = exog * alpha * (np.log(prob) + dgpart)
3646
- dalpha = ((alpha * (y - mu * np.log(prob) -
3647
- mu * (dgpart + 1)) -
3648
- mu * (np.log(prob) +
3649
- dgpart)) /
3650
- (alpha ** 2 * (alpha + 1)))
3651
-
3652
- # If a is not None, calculate the score of the NB-Lindley model
3653
- if a is not None:
3654
- a1 = (1 + a) / (alpha + a + mu)
3655
- prob = a1 / (a1 + mu)
3656
- dgpart = sc.digamma(y + alpha * mu + a) - \
3657
- sc.digamma(alpha * mu + a)
3658
- dparams_lindley = exog * (alpha + a) * (np.log(prob) + dgpart)
3659
- dalpha_lindley = (((alpha + a) * (y - mu * np.log(prob) -
3660
- mu * (dgpart + 1)) -
3661
- mu * (np.log(prob) +
3662
- dgpart)) /
3663
- ((alpha + a) ** 2 * (alpha + a + 1)))
3664
3613
 
3665
- if obs_specific is False:
3666
- return np.r_[dparams.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3667
- # return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3668
- else:
3669
- return np.concatenate((dparams, dalpha_lindley, dalpha), axis=1)
3670
- # return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
3671
- # return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
3672
-
3673
- else:
3674
- return np.r_[dparams.sum(0), dalpha]
3675
3614
 
3676
3615
  def PoissonNegLogLikelihood(self, lam, y, penalty=0, X=None):
3677
3616
  """computers the negative log-likelihood for a poisson random variable"""
@@ -3763,71 +3702,7 @@ class ObjectiveFunction(object):
3763
3702
  p = np.exp(-ltheta)
3764
3703
  return r, p
3765
3704
 
3766
- def negative_binomial_lindley_pmf(self, y, r, theta2, mu):
3767
- """
3768
- Calculate the probability mass function (PMF) of the Negative Binomial Lindley (NB-L) distribution
3769
- for a given count y, mean lambda, dispersion r, shape alpha, and scale beta.
3770
-
3771
- Parameters:
3772
- y (int or array-like): The count(s) of interest.
3773
- mu (float): The mean parameter of the Negative Binomial distribution.
3774
- r (float): The dispersion parameter of the Negative Binomial distribution.
3775
- theta2: The shape parameter of the Lindley distribution.
3776
-
3777
-
3778
- Returns:
3779
- pmf (float or ndarray): The probability mass function evaluated at the count(s) y.
3780
- """
3781
-
3782
- theta = self.my_lindley(y, theta2)
3783
- mu1 = mu * theta
3784
3705
 
3785
- var = mu1 + 1 / r * mu1 ** 2
3786
- p = (var - mu1) / var
3787
- numerator = math.comb(r + y.ravel() - 1.0, y.ravel()
3788
- ) * ((theta ** 2) / (theta + 1))
3789
- denominator = 0
3790
- for j in range(y + 1):
3791
- denominator += math.comb(y, j) * ((-1) ** j) * \
3792
- ((theta + r + j + 1) / ((theta + r + j) ** 2))
3793
-
3794
- please = numerator / denominator * p ** y * (1 - p) ** r
3795
- return please
3796
-
3797
- def negative_binomial_lindley_pmf_gradient(self, y, r, theta2, mu):
3798
- """
3799
- Calculate the gradient of the probability mass function (PMF) of the Negative Binomial Lindley (NB-L)
3800
- distribution for a given count y, mean lambda, dispersion r, shape alpha, and scale beta.
3801
-
3802
- Parameters:
3803
- y (int or array-like): The count(s) of interest.
3804
- mu (float): The mean parameter of the Negative Binomial distribution.
3805
- r (float): The dispersion parameter of the Negative Binomial distribution.
3806
- theta2: The shape parameter of the Lindley distribution.
3807
-
3808
-
3809
- Returns:
3810
- gradient (ndarray): The gradient of the probability mass function evaluated at the count(s) y.
3811
- """
3812
-
3813
- theta = self.my_lindley(y, theta2)
3814
- mu = mu * mu + theta
3815
- var = mu + 1 / r * mu ** 2
3816
- p = (var - mu) / var
3817
- numerator = math.comb(r + y - 1, y) * ((theta ** 2) / (theta + 1))
3818
- denominator = 0
3819
- for j in range(y + 1):
3820
- denominator += math.comb(y, j) * ((-1) ** j) * \
3821
- ((theta + r + j + 1) / ((theta + r + j) ** 2))
3822
-
3823
- dtheta = numerator * (y * (2 * theta + 1) - theta * (theta + 1)) / denominator ** 2
3824
- dmu = (y - mu) * p / (1 - p)
3825
- dr = -r ** 2 / var + r / var * (y - r * mu / (1 - p))
3826
- dtheta2 = theta * (y * (theta + 1) / (theta + 1 + mu) -
3827
- (theta2 + 1) / (theta2 + mu)) / denominator
3828
-
3829
- gradient = np.array([dtheta2, dmu, dr])
3830
- return gradient
3831
3706
 
3832
3707
  def dnbl(self, x, r, theta):
3833
3708
 
@@ -4754,6 +4629,43 @@ class ObjectiveFunction(object):
4754
4629
  pch[pch == 0] = 0.00001
4755
4630
  return pch
4756
4631
 
4632
+
4633
+
4634
+
4635
+
4636
+
4637
+ def compute_gradient_central(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
4638
+ return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0,
4639
+ corr_list=None, zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None,
4640
+ model_nature=None, kwarg=None, **kwargs)->np.ndarray:
4641
+ # {
4642
+ params = np.array(betas)
4643
+
4644
+ delta = np.ones_like(params) * 1e-5 #
4645
+ gradient = np.zeros_like(params) # create an array
4646
+ for i in range(len(params)):
4647
+ # {
4648
+ orig = params[i]
4649
+ params[i] = orig + delta[i]
4650
+ case_1 = self._loglik_gradient(
4651
+ params, Xd, y, draws=draws, Xf=Xf, Xr=Xr, batch_size=batch_size, return_gradient=return_gradient,
4652
+ return_gradient_n=return_gradient_n, dispersion=dispersion, test_set=test_set, return_EV=return_EV,
4653
+ verbose=verbose, corr_list=corr_list, zi_list=zi_list, exog_infl=exog_infl, draws_grouped=draws_grouped,
4654
+ Xgroup=Xgroup, model_nature=model_nature, kwarg=kwarg, **kwargs
4655
+ )
4656
+ params[i] = orig - delta[i]
4657
+ case_2 = self._loglik_gradient(
4658
+ params, Xd, y, draws=draws, Xf=Xf, Xr=Xr, batch_size=batch_size, return_gradient=return_gradient,
4659
+ return_gradient_n=return_gradient_n, dispersion=dispersion, test_set=test_set, return_EV=return_EV,
4660
+ verbose=verbose, corr_list=corr_list, zi_list=zi_list, exog_infl=exog_infl, draws_grouped=draws_grouped,
4661
+ Xgroup=Xgroup, model_nature=model_nature, kwarg=kwarg, **kwargs
4662
+ )
4663
+ params[i] = orig # restore value
4664
+ gradient[i] = (case_1 - case_2) / (2.0 * delta[i])
4665
+ # }
4666
+ return gradient
4667
+
4668
+
4757
4669
  def gradient_calc_est(self, N, Kf, Kr, Kchol, dispersion, proba_n, eVd, br, brstd, draws_, Xdf, Xdr, y, R, lik,
4758
4670
  alpha=0.5, betas=None, Br=None, panels=None, model_nature=None, br_h=None, br_hs=None):
4759
4671
 
@@ -5000,7 +4912,7 @@ class ObjectiveFunction(object):
5000
4912
  (self._group_Y[key].ravel() - sub_eVd[i].ravel())[:, None] * Xd[key])
5001
4913
 
5002
4914
  # todo make dummies with grouped
5003
- grad = np.concatenate(der_list, axis=0)
4915
+ grad_g = np.concatenate(der_list, axis=0)
5004
4916
  grad_n = np.concatenate(der_n, axis=1)
5005
4917
  grad = grad_n.sum(axis=0)
5006
4918
  # grad = grad_n.sum(axis = 1)
@@ -5016,17 +4928,22 @@ class ObjectiveFunction(object):
5016
4928
  der = grad_n.sum(axis=0)
5017
4929
  # to do prob product arcross panel
5018
4930
 
5019
- return np.nan_to_num(der, nan=200000, posinf=200000, neginf=-200000), np.nan_to_num(grad_n, nan=200000,
5020
- posinf=200000,
5021
- neginf=-200000)
4931
+ return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=200,
4932
+ posinf=200,
4933
+ neginf=-200)
5022
4934
 
5023
4935
  if obs_specific:
5024
4936
  grad_n_p = (y - eVd)[:, :, :] * Xd
5025
4937
  grad_n = self._prob_product_across_panels(grad_n_p, self.panel_info)
5026
4938
  der = grad_n.sum(axis=0)
5027
- return np.nan_to_num(der, nan=200000, posinf=200000, neginf=-200000)
4939
+ return np.nan_to_num(der, nan=200, posinf=200, neginf=-200)
5028
4940
  else:
4941
+ n, p, k = Xd.shape
5029
4942
  grad_n_p = (y - eVd)[:, :, :] * Xd
4943
+ #residual = (y - eVd).squeeze(axis = -1)
4944
+ # grad = np.zeros(k)
4945
+ #for j in range(p):
4946
+ # grad += Xd[:, j, :].T @ residual[:, j] # Shape: (k,)
5030
4947
  grad_n = self._prob_product_across_panels(grad_n_p, self.panel_info)
5031
4948
  der = grad_n.sum(axis=0)
5032
4949
 
@@ -5046,11 +4963,7 @@ class ObjectiveFunction(object):
5046
4963
  y, eVd, Xd, obs_specific=True)
5047
4964
  return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
5048
4965
  neginf=-140)
5049
- elif dispersion == 3:
5050
4966
 
5051
- der, grad_n = self.poisson_lindley_gradient(betas, Xd, y)
5052
-
5053
- return der, grad_n
5054
4967
 
5055
4968
 
5056
4969
  elif dispersion == 'poisson_lognormal':
@@ -5058,7 +4971,7 @@ class ObjectiveFunction(object):
5058
4971
  der, grad_n = self.poisson_lognormal_glm_score(betas, y, Xd, sig)
5059
4972
  return der, grad_n
5060
4973
 
5061
- return np.nan_to_num(der, nan=200000, posinf=2000000, neginf=-20000)
4974
+ return np.nan_to_num(der, nan=200, posinf=200, neginf=-200)
5062
4975
 
5063
4976
  def prob_obs_draws(self, eVi, y, disp, dispersion=0.0, disp2=0):
5064
4977
 
@@ -5072,12 +4985,10 @@ class ObjectiveFunction(object):
5072
4985
 
5073
4986
  proba_r = self.general_poisson_pmf(eVi, y, disp)
5074
4987
 
5075
- elif dispersion == 3:
5076
- proba_r = self.poisson_lindley_pmf(eVi, disp2, y)
4988
+
5077
4989
  # proba_r = self.dpoisl(y, eVi)
5078
4990
 
5079
- elif dispersion == 4:
5080
- proba_r = self.dnegbimonli(y, eVi, disp)
4991
+
5081
4992
 
5082
4993
  else:
5083
4994
  raise Exception
@@ -5359,13 +5270,13 @@ class ObjectiveFunction(object):
5359
5270
  if self.is_dispersion(dispersion):
5360
5271
  penalty, main_disper = self._penalty_dispersion(dispersion, main_disper, eVd, y, penalty,
5361
5272
  model_nature)
5362
- b_pen = self.custom_betas_to_penalise(betas, dispersion)
5363
- penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
5364
- penalty = self.custom_penalty(betas, penalty)
5273
+ #b_pen = self.custom_betas_to_penalise(betas, dispersion)
5274
+ #penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
5275
+ #penalty = self.custom_penalty(betas, penalty)
5365
5276
 
5366
5277
  betas[-1] = main_disper
5367
5278
 
5368
- b_pen = self.custom_betas_to_penalise(betas, dispersion)
5279
+ #b_pen = self.custom_betas_to_penalise(betas, dispersion)
5369
5280
  penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
5370
5281
  penalty = self.custom_penalty(betas, penalty)
5371
5282
 
@@ -5390,7 +5301,7 @@ class ObjectiveFunction(object):
5390
5301
  loglik += 2*loglik
5391
5302
  print('am i powering up')
5392
5303
 
5393
- b_pen = self.custom_betas_to_penalise(betas, dispersion)
5304
+ #b_pen = self.custom_betas_to_penalise(betas, dispersion)
5394
5305
  penalty = self.regularise_l2(betas) + self.regularise_l1(betas)
5395
5306
  penalty = self.custom_penalty(betas, penalty)
5396
5307
 
@@ -5404,13 +5315,26 @@ class ObjectiveFunction(object):
5404
5315
  der, grad_n = self.simple_score_grad(
5405
5316
  betas, y, eVd, Xd, dispersion, both=True)
5406
5317
  #return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
5407
- scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
5318
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, der.ravel(), grad_n))
5319
+
5408
5320
  return scaled_tuple
5409
5321
  else:
5410
5322
  der = self.simple_score_grad(
5411
5323
  betas, y, eVd, Xd, dispersion, both=False)
5324
+ #arguments = locals() # Capture all current arguments
5325
+ #arguments["return_gradient"] = False # Change `dispersion` to 1
5326
+ #del arguments["self"] # Remove `self` from arguments (not needed in the call)
5327
+ '''
5328
+ der_alt = self.compute_gradient_central(betas, Xd, y, draws=draws, Xf=Xf, Xr=Xr, batch_size=batch_size, return_gradient=False,
5329
+ return_gradient_n=False, dispersion=dispersion, test_set=test_set, return_EV=return_EV, verbose=verbose, corr_list=corr_list,
5330
+ zi_list=zi_list, exog_infl=exog_infl, draws_grouped=draws_grouped, Xgroup=Xgroup, model_nature=model_nature, kwarg=kwarg,
5331
+ **kwargs)
5332
+ '''
5412
5333
  scaled_tuple = tuple(
5413
5334
  x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
5335
+
5336
+
5337
+
5414
5338
  return scaled_tuple
5415
5339
  #return (-loglik + penalty, -der.ravel())*self.minimize_scaler
5416
5340
  else:
@@ -5752,9 +5676,7 @@ class ObjectiveFunction(object):
5752
5676
  traceback.print_exc()
5753
5677
  print(e, 'where loglik')
5754
5678
 
5755
- def minimize_function(self, loglike):
5756
- r'Takes the logliklihood function and tranforms it to a more handed minimization function'
5757
- return loglike/self.n_obs
5679
+
5758
5680
  def print_chol_mat(self, betas):
5759
5681
  print(self.chol_mat)
5760
5682
  self.get_br_and_bstd(betas)
@@ -6623,6 +6545,12 @@ class ObjectiveFunction(object):
6623
6545
  tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
6624
6546
  options=options
6625
6547
  )
6548
+
6549
+ #print(result.summary())
6550
+
6551
+
6552
+ #i want to compare this to stats model.s
6553
+
6626
6554
  if optimization_result.message == 'NaN result encountered.':
6627
6555
  optimization_result = self._minimize(self._loglik_gradient,
6628
6556
  initial_params,
@@ -6930,7 +6858,7 @@ class ObjectiveFunction(object):
6930
6858
  total = sum(self.get_num_params()) + dispersion_param
6931
6859
  return total
6932
6860
 
6933
- def _build_initial_params(self, num_coefficients, dispersion):
6861
+ def _build_initial_params(self, num_coefficients, dispersion, XX, y):
6934
6862
  """
6935
6863
  Build the initial parameter array for optimization.
6936
6864
 
@@ -6942,7 +6870,27 @@ class ObjectiveFunction(object):
6942
6870
  Initial parameter array.
6943
6871
  """
6944
6872
  # Generate random initial coefficients
6945
- initial_params = np.random.uniform(0.0000, 0.01, size=num_coefficients)
6873
+ # call in statsmodels
6874
+ try:
6875
+ if dispersion ==0:
6876
+ model = sm.GLM(y.squeeze(axis=-1), XX.squeeze(axis=1), family=sm.families.Poisson())
6877
+ else:
6878
+ model = sm.NegativeBinomial(y.squeeze(axis=-1), XX.squeeze(axis=1))
6879
+ result = model.fit()
6880
+ initial_params = result.params # then exten to num_coefficients
6881
+ if len(initial_params) < num_coefficients:
6882
+ initial_params = np.concatenate([
6883
+ initial_params,
6884
+ np.random.uniform(-0.01, 0.03, size=num_coefficients - len(initial_params))
6885
+ ])
6886
+
6887
+ else:
6888
+ initial_params = np.random.uniform(-0.01, 0.3, size=num_coefficients)
6889
+ except:
6890
+ print('pre fit failed')
6891
+ initial_params = np.random.uniform(-0.01, 0.01, size=num_coefficients)
6892
+
6893
+
6946
6894
  parma_sum = sum(self.get_num_params()[:2])
6947
6895
 
6948
6896
 
@@ -6972,7 +6920,7 @@ class ObjectiveFunction(object):
6972
6920
  try:
6973
6921
  dispersion = mod.get('dispersion', dispersion)
6974
6922
  # Preprocessing
6975
- tol = {'ftol': 1e-6, 'gtol': 1e-6, 'xtol': 1e-6}
6923
+ tol = {'ftol': 1e-10, 'gtol': 1e-6, 'xtol': 1e-7}
6976
6924
  y, X, Xr, XG, XH = mod.get('y'), mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
6977
6925
 
6978
6926
  # Validate input data
@@ -6987,7 +6935,7 @@ class ObjectiveFunction(object):
6987
6935
  num_coefficients = self._calculate_num_coefficients(mod, dispersion)
6988
6936
 
6989
6937
  # Build initial parameters and bounds
6990
- initial_params = self._build_initial_params(num_coefficients, dispersion)
6938
+ initial_params = self._build_initial_params(num_coefficients, dispersion, XX, y)
6991
6939
  bounds = self._set_bounds(initial_params, dispersion)
6992
6940
 
6993
6941
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metacountregressor
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
- Author: Zeke Ahern
6
+ Author: Zeke Ahern, Alexander Paz
7
7
  Author-email: z.ahern@qut.edu.au
8
8
  License: MIT
9
9
  Requires-Python: >=3.10
@@ -38,7 +38,7 @@ setuptools.setup(
38
38
  long_description=long_description,
39
39
  long_description_content_type='text/markdown', # Specify Markdown content
40
40
  url='https://github.com/zahern/CountDataEstimation',
41
- author='Zeke Ahern',
41
+ author='Zeke Ahern, Alexander Paz',
42
42
  author_email='z.ahern@qut.edu.au',
43
43
  license='MIT',
44
44
  packages=['metacountregressor'],
@@ -0,0 +1 @@
1
+ 1.0.11
@@ -1 +0,0 @@
1
- 1.0.10