metacountregressor 0.1.235__py3-none-any.whl → 0.1.237__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,7 +121,7 @@ class ObjectiveFunction(object):
121
121
  """
122
122
 
123
123
  def __init__(self, x_data, y_data, **kwargs):
124
-
124
+ self.linear_regression = kwargs.get('linear_model', False)
125
125
  self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
127
  self.nb_parma = 1
@@ -481,7 +481,14 @@ class ObjectiveFunction(object):
481
481
 
482
482
  model_types = [[0, 1]] # add 2 for Generalized Poisson
483
483
  #model_types = [[0]]
484
- #TODO change back and fix NB
484
+
485
+ if kwargs.get('linear_model', None) is not None:
486
+ model_types = [[0]]
487
+ self.grad_yes = False
488
+
489
+ print(f'Linear Model Selected: turning off gradient calculation')
490
+
491
+
485
492
  model_t_dict = {'Poisson':0,
486
493
  "NB":1}
487
494
  # Retrieve the keys (model names) corresponding to the values in model_types
@@ -789,11 +796,7 @@ class ObjectiveFunction(object):
789
796
  data_names = list(set(b + a))
790
797
 
791
798
  print(data_names)
792
- # from bs4 import BeautifulSoup
793
- # explainer = shap.TreeExplainer(rf)
794
- # shap_values = explainer.shap_values(self._x_data)
795
- # shap.initjs()
796
- # dis = shap.force_plot(explainer.expected_value, shap_values[0,:], self._x_data.iloc[0,:], matplotlib = True)
799
+
797
800
 
798
801
  return data_names
799
802
 
@@ -813,18 +816,18 @@ class ObjectiveFunction(object):
813
816
  def get_dispersion_paramaters(self, betas, dispersion):
814
817
 
815
818
  if dispersion == 0:
816
- return None, None
819
+ return None
817
820
  elif dispersion == 2 or dispersion == 1:
818
821
  if self.no_extra_param:
819
- return self.nb_parma, None
820
- return betas[-1], None
822
+ return self.nb_parma
823
+ return betas[-1]
821
824
 
822
825
  elif dispersion == 3:
823
- return None, betas[-1]
826
+ return betas[-1]
824
827
  elif dispersion == 4:
825
- return betas[-1], betas[-2]
828
+ return betas[-1]
826
829
  elif dispersion == 'poisson_lognormal':
827
- return betas[-1], None
830
+ return betas[-1]
828
831
 
829
832
  def reset_pvalue_conditions(self):
830
833
  self.initial_sig = .5 # pass the test of a single model
@@ -1410,20 +1413,7 @@ class ObjectiveFunction(object):
1410
1413
 
1411
1414
 
1412
1415
  def poisson_mean_get_dispersion(self, betas, X, y):
1413
- '''
1414
- eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1415
- return_EV=True,
1416
- zi_list=None, draws_grouped=None, Xgroup=None)
1417
-
1418
- print('trying thi instead')
1419
1416
 
1420
- '''
1421
-
1422
- '''
1423
- nb_model = sm.GLM(y_long, x_long, family=sm.families.NegativeBinomial()).fit()
1424
- gamma = nb_model.scale
1425
-
1426
- '''
1427
1417
  #poisson way
1428
1418
  try:
1429
1419
  num_panels, num_obs, num_features = X.shape # Dimensions of x
@@ -1547,9 +1537,7 @@ class ObjectiveFunction(object):
1547
1537
  y = np.tile(y, self.Ndraws).ravel()
1548
1538
  eVy = eVy.ravel()
1549
1539
 
1550
- # y_avg = np.mean(y, axis = (1,2))
1551
- # eVy_avg = np.mean(eVy, axis = (1,2))
1552
- # mspe1 = np.nan_to_num(MSPE(np.squeeze(y_avg), np.squeeze(eVy_avg)), nan=100000, posinf=100000)
1540
+
1553
1541
  eVy = np.nan_to_num(eVy, nan=100000, posinf=100000)
1554
1542
  eVy = np.clip(eVy, None, 1000)
1555
1543
  mae = np.nan_to_num(MAE(np.squeeze(y), np.squeeze(eVy)), nan=100000, posinf=100000)
@@ -3921,9 +3909,9 @@ class ObjectiveFunction(object):
3921
3909
 
3922
3910
  return penalty, b_gam
3923
3911
 
3924
- def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3912
+ def eXB_calc(self, params_main, Xd, offset, dispersion, linear = False):
3925
3913
 
3926
- # print('this was 0')
3914
+
3927
3915
  if dispersion:
3928
3916
  eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3929
3917
 
@@ -3933,20 +3921,10 @@ class ObjectiveFunction(object):
3933
3921
  eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3934
3922
  eta = np.array(eta)
3935
3923
 
3936
- # eta = np.float64(eta)
3937
- # eta = np.dot(Xd, params_main)+offset[:,:,0]
3938
- # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
3939
-
3940
- if dispersion == 5:
3941
- get_lindley = b_gam
3942
- if b_gam == 0:
3943
- get_lindley = 0.01
3944
- eps_i = self.my_lindley(Xd, get_lindley)
3945
- eVd = eps_i * np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT)).ravel()
3946
- # Vd = self.my_lindley(np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT)), get_lindley)
3947
3924
 
3948
- # eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
3949
- # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1) #todo grab param
3925
+ if linear:
3926
+ eta = eta.astype('float')
3927
+ return eta
3950
3928
 
3951
3929
 
3952
3930
  else:
@@ -4079,7 +4057,7 @@ class ObjectiveFunction(object):
4079
4057
  # proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
4080
4058
 
4081
4059
  elif dispersion == 'poisson_lognormal':
4082
- sig, vl = self.get_dispersion_paramaters(betas, dispersion)
4060
+ sig = self.get_dispersion_paramaters(betas, dispersion)
4083
4061
  store = list()
4084
4062
  for i in range(len(y)):
4085
4063
  store.append(self.poisson_lognormal_pmf(
@@ -4127,7 +4105,7 @@ class ObjectiveFunction(object):
4127
4105
 
4128
4106
  # if alpha < 0:
4129
4107
  # alpha = np.abs(alpha)
4130
- sig, omeg = self.get_dispersion_paramaters(betas, dispersion)
4108
+ sig = self.get_dispersion_paramaters(betas, dispersion)
4131
4109
 
4132
4110
  if model_nature is not None:
4133
4111
  if 'XH' in model_nature:
@@ -4152,15 +4130,7 @@ class ObjectiveFunction(object):
4152
4130
  gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
4153
4131
  (N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
4154
4132
 
4155
- if dispersion == 3:
4156
-
4157
- q = omeg / (1 + omeg)
4158
- d_beta = (y + 1) / (eVd + y + 1) - q / (1 - q)
4159
-
4160
- gr_e = d_beta * (proba_n[:, None, :]).sum(axis=2)
4161
- for i in len(y):
4162
- if y[i] == 0:
4163
- gr_e[i] = 0
4133
+
4164
4134
 
4165
4135
  if self.is_dispersion(dispersion) and not self.no_extra_param:
4166
4136
  gr_d = np.zeros((N, 1))
@@ -4329,9 +4299,8 @@ class ObjectiveFunction(object):
4329
4299
  # print('check this')
4330
4300
  if dispersion == 0:
4331
4301
  grad_n = self._concat_gradients((gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4332
- elif dispersion == 3:
4333
- grad_n = self._concat_gradients(
4334
- (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4302
+
4303
+
4335
4304
  else:
4336
4305
  if self.no_extra_param:
4337
4306
  grad_n = self._concat_gradients(
@@ -4426,17 +4395,9 @@ class ObjectiveFunction(object):
4426
4395
 
4427
4396
  return der, grad_n
4428
4397
 
4429
- elif dispersion == 4:
4430
- b_gam, l_gam = self.get_dispersion_paramaters(betas, dispersion)
4431
- ravel_me = self.my_lindley(y, l_gam)
4432
- der = self.nbl_score(y, Xd, betas, b_gam, l_gam)
4433
- print('00lol')
4434
- # der = -self.NB_score_lindley(betas, y, eVd, Xd, 0, obs_specific)
4435
- # if both:
4436
- # grad_n = -self.NB_score_lindley(betas, y, eVd, Xd, 0, True)
4437
- # return der, grad_n
4398
+
4438
4399
  elif dispersion == 'poisson_lognormal':
4439
- sig, s = self.get_dispersion_paramaters(betas, dispersion)
4400
+ sig= self.get_dispersion_paramaters(betas, dispersion)
4440
4401
  der, grad_n = self.poisson_lognormal_glm_score(betas, y, Xd, sig)
4441
4402
  return der, grad_n
4442
4403
 
@@ -4666,30 +4627,31 @@ class ObjectiveFunction(object):
4666
4627
  betas = np.array(betas)
4667
4628
  Bf = betas[0:Kf] # Fixed betas
4668
4629
 
4669
- main_disper, lindley_disp = self.get_dispersion_paramaters(
4670
- betas, dispersion) #todo fix this up
4671
- if lindley_disp is not None:
4672
- if lindley_disp <= 0:
4673
- penalty += 1
4674
- penalty += - lindley_disp
4675
- lindley_disp = 0
4630
+ main_disper = self.get_dispersion_paramaters(
4631
+ betas, dispersion)
4632
+
4676
4633
 
4677
- eVd = self.eXB_calc(Bf, Xd, offset, main_disper, lindley_disp)
4634
+ eVd = self.eXB_calc(Bf, Xd, offset, main_disper, self.linear_regression)
4678
4635
 
4679
4636
  if return_EV is True:
4680
4637
  return eVd
4681
4638
 
4682
- # eVd = dev.np.exp(np.clip(Vdf[:, :, None] + Vdr, None, EXP_UPPER_LIMIT) )
4683
-
4684
- # self.lam = eVd
4639
+
4685
4640
 
4686
4641
  if self.is_dispersion(dispersion):
4687
4642
  penalty, main_disper = self._penalty_dispersion(dispersion, main_disper, eVd, y, penalty,
4688
4643
  model_nature)
4689
4644
 
4690
4645
  betas[-1] = main_disper
4646
+
4647
+ if self.linear_regression:
4648
+ # LINEAR MODEL PROCESS
4649
+ mse = np.mean((y - eVd) ** 2)
4650
+ return mse
4651
+
4652
+ ### GLM PROCESS ########
4691
4653
  llf_main = self.loglik_obs(
4692
- y, eVd, dispersion, main_disper, lindley_disp, betas)
4654
+ y, eVd, dispersion, main_disper, None, betas)
4693
4655
 
4694
4656
  llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4695
4657
 
@@ -4725,7 +4687,9 @@ class ObjectiveFunction(object):
4725
4687
  else:
4726
4688
 
4727
4689
  return (-loglik + penalty)*self.minimize_scaler
4728
- # Else, we have draws
4690
+ ### ELSE WE HAVE DRAW DO THE DRAWS CODE ####
4691
+ ## ELSE DRAWS ####
4692
+ #############################################
4729
4693
  self.n_obs = len(y) * self.Ndraws #todo is this problematic
4730
4694
  penalty += self._penalty_betas(
4731
4695
  betas, dispersion, penalty, float(len(y) / 10.0))
@@ -4916,7 +4880,14 @@ class ObjectiveFunction(object):
4916
4880
  betas_hetro_sd = None
4917
4881
 
4918
4882
  Vdr = dev.cust_einsum("njk,nkr -> njr", Xdr, Br) # (N,P,R)
4919
-
4883
+ if self:
4884
+ ### LINEAR MODEL WAY #######
4885
+ eVd = np.clip(
4886
+ Vdf[:, :, None] + Vdr + Vdh + dev.np.array(offset), None, None)
4887
+ mse = np.mean((y - eVd) ** 2)
4888
+ return mse
4889
+
4890
+ ##### GLM WAY #####
4920
4891
  eVd = dev.np.exp(np.clip(
4921
4892
  Vdf[:, :, None] + Vdr + Vdh + dev.np.array(offset), None, EXP_UPPER_LIMIT))
4922
4893
  if dispersion == 3:
@@ -5034,7 +5005,7 @@ class ObjectiveFunction(object):
5034
5005
  def print_chol_mat(self, betas):
5035
5006
  print(self.chol_mat)
5036
5007
  self.get_br_and_bstd(betas)
5037
- print(1)
5008
+
5038
5009
 
5039
5010
 
5040
5011
  def regularise_l2(self, betas, backwards = False):
@@ -5574,6 +5545,8 @@ class ObjectiveFunction(object):
5574
5545
  """
5575
5546
  Fits a poisson regression given data and outcomes if dispersion is not declared
5576
5547
  if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
5548
+
5549
+ #TODO lineraregression
5577
5550
  Inputs:
5578
5551
  X - array. Design matrix
5579
5552
  y - array. Observed outcomes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.235
3
+ Version: 0.1.237
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -11,10 +11,10 @@ metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiL
11
11
  metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
12
12
  metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,936
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
- metacountregressor/solution.py,sha256=jyWFVoLWOAJaLKM4Ha0I_uzeALD_7J3F-tsVXc7kEPY,284813
14
+ metacountregressor/solution.py,sha256=4TCa87mLfBtCY2APrfZYsOkw5MDf_rsjErWPW_NgJgc,282948
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.235.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.235.dist-info/METADATA,sha256=qL2KUxNg3FbX5zBK-_qR8TQ5T05laVMMlRseHCLDPVw,23529
18
- metacountregressor-0.1.235.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
19
- metacountregressor-0.1.235.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.235.dist-info/RECORD,,
16
+ metacountregressor-0.1.237.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.237.dist-info/METADATA,sha256=o8WsXzCVVEte5uzaR1_-XjOZ4rDdKMGf9KI3w9TJTX8,23529
18
+ metacountregressor-0.1.237.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
19
+ metacountregressor-0.1.237.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.237.dist-info/RECORD,,