metacountregressor 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,6 +41,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
41
41
  from sklearn.preprocessing import StandardScaler
42
42
  #from tabulate import tabulate
43
43
  from texttable import Texttable
44
+ #from optimparallel import minimize_parallel
44
45
 
45
46
  from ._device_cust import device as dev
46
47
  #from optimparallel import minimize_parallel
@@ -63,7 +64,7 @@ log_lik_max = 1e+200
63
64
  log_lik_max = 0
64
65
 
65
66
  # Setup Limits, and Batches for custom GPU code
66
- EXP_UPPER_LIMIT = np.float64(np.log(np.finfo(np.float64).max) - 10.0)
67
+ EXP_UPPER_LIMIT = np.float64(np.log(np.finfo(np.float64).max) - 50.0)
67
68
  def _unpack_tuple(x): return x if len(x) > 1 else x[0]
68
69
 
69
70
 
@@ -134,6 +135,14 @@ class ObjectiveFunction(object):
134
135
  if self.other_bic:
135
136
  print('change this to false latter ')
136
137
  offset = None
138
+
139
+ #initi
140
+ self.constant_value = -5.5
141
+ self.negative_binomial_value = 0.05
142
+
143
+ self.verbose_safe = True
144
+ self.zi_force = None #Analst want a zi model and formally declares the zi components below
145
+ self.zi_force_names = None #delare the zi components
137
146
  self.please_print = 1
138
147
  self.group_halton = None
139
148
  self.grad_yes = False
@@ -170,9 +179,12 @@ class ObjectiveFunction(object):
170
179
  self._max_imp = 100000
171
180
  self._panels = 1
172
181
  self.is_multi = True
182
+ self.method = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
183
+ self.method = 'BFGS_2'
184
+ self.method = 'Nelder-Mead-BFGS'
185
+ #Nelder-Mead-BFGS
173
186
 
174
-
175
- self._max_characteristics = 40
187
+ self._max_characteristics = 26
176
188
 
177
189
 
178
190
 
@@ -182,11 +194,17 @@ class ObjectiveFunction(object):
182
194
  'algorithm', '_random_seed', '_max_time',
183
195
  'forcedvariables', '_obj_1', '_obj_2', '_par',
184
196
  'Manuel_Estimate', 'test_percentage', 'is_multi', 'val_percentage'
185
- 'complexity_level', '_hms', '_mpai', 'group', '_max_characteristics']
197
+ 'complexity_level', '_hms', '_mpai', 'group', '_max_characteristics', 'zi_force_names']
186
198
  for k in kwargs.keys():
187
199
  if k in acceptable_keys_list:
188
200
  self.__setattr__(k, self.tryeval(kwargs[k]))
189
201
 
202
+ if self.zi_force_names is not None:
203
+ self.zi_force = True
204
+ if 'const' not in self.zi_force_names:
205
+ self.zi_force_names = ['const'] + self.zi_force_names
206
+ print('did this work?')
207
+
190
208
  if 'complexity_level' in kwargs:
191
209
  self.complexity_level = kwargs['complexity_level']
192
210
 
@@ -276,11 +294,23 @@ class ObjectiveFunction(object):
276
294
  test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
277
295
 
278
296
 
297
+ try: #@IgnoreException
298
+ df_train = x_data.loc[train_idx, :]
299
+ df_test = x_data.loc[test_idx, :]
300
+ y_train =y_data.loc[train_idx, :]
301
+ y_test=y_data.loc[test_idx, :]
302
+ except:
303
+ # Convert all values to their real parts
304
+ df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
279
305
 
280
- df_train = x_data.loc[train_idx, :]
281
- df_test = x_data.loc[test_idx, :]
282
- y_train =y_data.loc[train_idx, :]
283
- y_test=y_data.loc[test_idx, :]
306
+ # Replace the original DataFrame's numerical columns with real-valued ones
307
+ x_data[df_real.columns] = df_real
308
+
309
+ df_train = x_data.iloc[train_idx, :]
310
+ df_test = x_data.iloc[test_idx, :]
311
+ y_train =y_data.iloc[train_idx, :]
312
+ y_test=y_data.iloc[test_idx, :]
313
+
284
314
 
285
315
 
286
316
 
@@ -289,9 +319,13 @@ class ObjectiveFunction(object):
289
319
  #self._x_data, self._x_data_test, self._y_data, self.y_data_test = train_test_split(new_data_test[data_names], y_data, test_size = self.test_percentage, random_state=self.get_random_seed())
290
320
  #data_names = self._random_forest_preprocess()
291
321
 
292
-
293
-
322
+
323
+ self.n_obs = N
294
324
  self._characteristics_names = list(self._x_data.columns)
325
+ if self.zi_force:
326
+
327
+ self.alpha_hurdle = np.isin(self._characteristics_names, [item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
328
+ print(1)
295
329
  #self._characteristics_names = [x for x in self._characteristics_names if not 'ID' in x]
296
330
 
297
331
 
@@ -410,10 +444,7 @@ class ObjectiveFunction(object):
410
444
 
411
445
 
412
446
 
413
-
414
-
415
-
416
-
447
+
417
448
 
418
449
 
419
450
  self._samples, self._panels, self._characteristics = self._x_data.shape
@@ -506,7 +537,7 @@ class ObjectiveFunction(object):
506
537
  self.significant = 0
507
538
  # define the states of our explanaotory variables
508
539
 
509
- self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test, kwargs.get('Keep_Fit', []))
540
+ self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test, kwargs.get('must_include', []))
510
541
  self._discrete_values = self._discrete_values + \
511
542
  [[x for x in self._distribution]] * self._characteristics
512
543
 
@@ -515,7 +546,7 @@ class ObjectiveFunction(object):
515
546
  if 'model_types' in kwargs:
516
547
  model_types = kwargs['model_types']
517
548
  else:
518
- model_types = [[0,1]] # add 2 for Generalized Poisson
549
+ model_types = [[1]] # add 2 for Generalized Poisson
519
550
 
520
551
 
521
552
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(self._transformations) + model_types
@@ -530,7 +561,7 @@ class ObjectiveFunction(object):
530
561
  # model specs
531
562
  self.endog = None
532
563
  # solution parameters
533
- self._min_characteristics = 4
564
+ self._min_characteristics = 0
534
565
 
535
566
 
536
567
  self._max_hurdle = 4
@@ -585,13 +616,15 @@ class ObjectiveFunction(object):
585
616
  'grouped_terms': [],
586
617
  'hetro_in_means': [],
587
618
  'transformations': ['no'],
588
- 'dispersion': i
619
+ 'dispersion': 1
589
620
  }
590
621
  a = self.modify_initial_fit(manual_fit_spec)
591
622
  self.makeRegression(a)
592
- constant_values.append(self.beta_dict['const'][0][1])
593
- dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0,0],[0,0]])[0][1])
594
-
623
+ try:
624
+ constant_values.append(self.beta_dict['const'][0][1])
625
+ dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0,0],[0,0]])[0][1])
626
+ except:
627
+ print('d')
595
628
  i += 1
596
629
 
597
630
  # Add the values of this iteration to the total
@@ -601,6 +634,7 @@ class ObjectiveFunction(object):
601
634
  # Calculate the averages
602
635
  constant_values_avg = [x / 100 for x in constant_values_total]
603
636
  dispersion_values_avg = [x / 100 for x in dispersion_values_total]
637
+
604
638
 
605
639
 
606
640
 
@@ -653,6 +687,24 @@ class ObjectiveFunction(object):
653
687
  return np.exp(-lam) * (lam**x) / math.factorial(x) * lognorm.pdf(lam, sigma, scale=np.exp(mu))
654
688
  return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
655
689
 
690
+
691
+ def _call_MAXlike(self):
692
+
693
+ import rpy2.rinterface as rinterface
694
+ import rpy2.robjects as robjects
695
+ import rpy2.robjects as ro
696
+ from rpy2.robjects import pandas2ri
697
+ r = robjects.r
698
+ r['source']('testMAX.R')
699
+ rMAX = robjects.globalenv['maxLik']
700
+ args = (1)
701
+ betas = 1
702
+ def loglike(p): return self._loglik_gradient(
703
+ p, *args)
704
+ loglik = ro.conversion._py2rpy(loglik)
705
+ rMAX(loglik, start = betas)
706
+ raise Exception('not yet implemented')
707
+
656
708
  def _random_forest_call_r(self):
657
709
  import rpy2.rinterface as rinterface
658
710
  import rpy2.robjects as robjects
@@ -959,6 +1011,7 @@ class ObjectiveFunction(object):
959
1011
  zi_fit = self.none_handler(self.zi_fit)
960
1012
  dis_fit = [x for x in self.none_handler(
961
1013
  self.dist_fit)] # check if dis fit is name
1014
+
962
1015
  hetro_long = []
963
1016
  big_hetro = []
964
1017
  if model_nature is not None:
@@ -1026,7 +1079,7 @@ class ObjectiveFunction(object):
1026
1079
  #br_w_names = np.char.add(randvars, "sd.")
1027
1080
  #br_w_names = np.char.add(br_w_names, rand_vars_dis)
1028
1081
  # br_w_names = br_w_names.tolist()
1029
- zi_names = [x for x in self.none_handler(zi_fit)]
1082
+ zi_names = [x + ":inflated" for x in self.none_handler(self.zi_force_names)]
1030
1083
 
1031
1084
  names = fixednames+randvars+chol_names + \
1032
1085
  br_w_names+chol + zi_names+hetro_long+dispersion_name
@@ -1057,7 +1110,7 @@ class ObjectiveFunction(object):
1057
1110
  randvars = [x for x in self.none_handler(rdm_fit)]
1058
1111
  chol_names = [x for x in self.none_handler(rdm_cor_fit)]
1059
1112
 
1060
- zi_names = [x for x in self.none_handler(zi_fit)]
1113
+ zi_names = [x +': inflated' for x in self.none_handler(self.zi_force_names)]
1061
1114
 
1062
1115
  names = fixednames+randvars+chol_names + zi_names+big_hetro+dispersion_name
1063
1116
 
@@ -1074,7 +1127,7 @@ class ObjectiveFunction(object):
1074
1127
  except Exception as e:
1075
1128
  print(e)
1076
1129
 
1077
- def summary_alternative(self, long_print=0, model=0, solution=None, save_state = 0):
1130
+ def summary_alternative(self, long_print=0, model=0, solution=None, save_state = 1):
1078
1131
  fmt = "{:19} {:13} {:13.10f} {:13.10f}{:13.10f} {:13.3g} {:3}"
1079
1132
  coeff_name_str_length = 19
1080
1133
 
@@ -1138,7 +1191,10 @@ class ObjectiveFunction(object):
1138
1191
 
1139
1192
  self.coeff_[-1] = np.abs(self.coeff_[-1])
1140
1193
  if self.coeff_[-1] < 0.25:
1141
- self.coeff_[-1] =.25 #min possible value for negbinom
1194
+ print(self.coeff_[-1], 'is this why')
1195
+ print(np.exp(self.coeff_[-1]))
1196
+ self.coeff_[-1] =np.exp(self.coeff_[-1]) #min possible value for negbinom
1197
+
1142
1198
 
1143
1199
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1144
1200
 
@@ -1365,7 +1421,11 @@ class ObjectiveFunction(object):
1365
1421
  x_data = self._x_data.copy()
1366
1422
  for col in x_data:
1367
1423
 
1368
- if all(x_data[col] <= 5):
1424
+ if 'AADT' in self._characteristics_names[col]:
1425
+ new_transform = [['log']]
1426
+ transform_set = transform_set + new_transform
1427
+
1428
+ elif all(x_data[col] <= 5):
1369
1429
  new_transform = [['no']]
1370
1430
  transform_set = transform_set + new_transform
1371
1431
  elif col == "Offset":
@@ -1373,7 +1433,7 @@ class ObjectiveFunction(object):
1373
1433
  transform_set = transform_set + new_transform
1374
1434
  else:
1375
1435
  new_transform = transforms.copy()
1376
- if (x_data[col] > 0).all() and (x_data[col] >= 100000).any():
1436
+ if (x_data[col] >= 0).all() and (x_data[col] >= 200).any():
1377
1437
  unwanted = {'no', 2, 3, 'exp', 'fact'}
1378
1438
  new_transform = [
1379
1439
  ele for ele in new_transform if ele not in unwanted]
@@ -1593,17 +1653,33 @@ class ObjectiveFunction(object):
1593
1653
  alpha_hetro= [
1594
1654
  0 if x != 5 else 1 for x in vector[:self._characteristics]]
1595
1655
 
1596
- return {
1597
- 'alpha': alpha,
1598
- 'alpha_rdm': alpha_rdm,
1599
- 'alpha_cor_rdm': alpha_cor_rdm,
1600
- 'alpha_grouped': alpha_grouped,
1601
- 'alpha_hetro': alpha_hetro,
1602
- 'distributions': distributions,
1603
- 'transformations': transformations,
1604
-
1605
- 'dispersion': dispersion
1606
- }
1656
+
1657
+ if self.zi_force == True:
1658
+
1659
+ return {
1660
+ 'alpha': alpha,
1661
+ 'alpha_rdm': alpha_rdm,
1662
+ 'alpha_cor_rdm': alpha_cor_rdm,
1663
+ 'alpha_grouped': alpha_grouped,
1664
+ 'alpha_hetro': alpha_hetro,
1665
+ 'distributions': distributions,
1666
+ 'transformations': transformations,
1667
+ 'exog_infl' : self.zi_force_names,
1668
+ 'dispersion': dispersion
1669
+ }
1670
+
1671
+ else:
1672
+ return {
1673
+ 'alpha': alpha,
1674
+ 'alpha_rdm': alpha_rdm,
1675
+ 'alpha_cor_rdm': alpha_cor_rdm,
1676
+ 'alpha_grouped': alpha_grouped,
1677
+ 'alpha_hetro': alpha_hetro,
1678
+ 'distributions': distributions,
1679
+ 'transformations': transformations,
1680
+
1681
+ 'dispersion': dispersion
1682
+ }
1607
1683
 
1608
1684
  # TODO implement the interactions
1609
1685
 
@@ -2408,7 +2484,7 @@ class ObjectiveFunction(object):
2408
2484
  if self.pvalues is None:
2409
2485
  self.reset_sln()
2410
2486
  return obj_1
2411
- print(1)
2487
+
2412
2488
 
2413
2489
 
2414
2490
  sub_slns.append([obj_1.copy()])
@@ -2783,12 +2859,7 @@ class ObjectiveFunction(object):
2783
2859
  dparams = dparams.sum(axis = 1)
2784
2860
  dalpha = dalpha.sum(axis = 0)
2785
2861
  return np.r_[dparams.sum(0), dalpha.ravel()]
2786
- return score
2787
-
2788
- score_obs = np.concatenate((dparams, dalpha),
2789
- axis=2)
2790
-
2791
- score = np.sum(score_obs, axis=(1,2))
2862
+
2792
2863
 
2793
2864
 
2794
2865
 
@@ -3053,8 +3124,8 @@ class ObjectiveFunction(object):
3053
3124
  dparams = dparams.sum(axis = 1)
3054
3125
  dalpha = dalpha.sum(axis = 0)
3055
3126
  return np.r_[dparams.sum(0), dalpha]
3056
- dparams2 = dparms.sum(axis = 1)
3057
- dalpha1 =dalpha[:,None].sum(axis = 1)
3127
+ #dparams2 = dparms.sum(axis = 1)
3128
+ # dalpha1 =dalpha[:,None].sum(axis = 1)
3058
3129
  return np.concatenate((dparams.sum(0),dalpha[:, None]), axis = 1)
3059
3130
  else:
3060
3131
  dparams = dparams.sum(axis = 1)
@@ -3122,11 +3193,11 @@ class ObjectiveFunction(object):
3122
3193
 
3123
3194
  if obs_specific is False:
3124
3195
  return np.r_[dparams.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3125
- return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3196
+ #return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
3126
3197
  else:
3127
3198
  return np.concatenate((dparams, dalpha_lindley, dalpha), axis=1)
3128
- return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
3129
- return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
3199
+ #return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
3200
+ #return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
3130
3201
 
3131
3202
  else:
3132
3203
  return np.r_[dparams.sum(0), dalpha]
@@ -3722,8 +3793,8 @@ class ObjectiveFunction(object):
3722
3793
  _type_: _description_
3723
3794
  """
3724
3795
 
3725
- if gamma <= 0.25: #min defined value for stable nb
3726
- gamma = 0.25
3796
+ # if gamma <= 0.01: #min defined value for stable nb
3797
+ # gamma = 0.01
3727
3798
 
3728
3799
  endog = y
3729
3800
  mu = lam
@@ -3776,8 +3847,8 @@ class ObjectiveFunction(object):
3776
3847
  Returns:
3777
3848
  _type_: _description_
3778
3849
  """
3779
- if gamma <= 0.25:
3780
- gamma = 0.25
3850
+ # if gamma <= 0.25:
3851
+ # gamma = 0.25
3781
3852
 
3782
3853
  endog = y
3783
3854
  mu = lam
@@ -3806,8 +3877,8 @@ class ObjectiveFunction(object):
3806
3877
  array: The negative binomial PMF for the given parameters.
3807
3878
  """
3808
3879
 
3809
- if gamma <= 0.25:
3810
- gamma = 0.25
3880
+ # if gamma <= 0.01:
3881
+ # gamma = 0.01
3811
3882
 
3812
3883
  endog = y
3813
3884
  mu = lam
@@ -3896,17 +3967,16 @@ class ObjectiveFunction(object):
3896
3967
 
3897
3968
 
3898
3969
 
3899
- if abs(b_gam) < 0.05:
3900
- penalty += 1/np.abs(b_gam)
3970
+ #if abs(b_gam) < 0.01:
3971
+ # penalty += 1/np.abs(b_gam)
3901
3972
 
3902
3973
 
3903
- if b_gam < 0:
3904
- penalty += 100
3974
+
3905
3975
 
3906
- if b_gam >= 8:
3976
+ if b_gam >= 4.5:
3907
3977
  penalty += b_gam
3908
-
3909
- b_gam = 7.9
3978
+ b_gam = 4.61
3979
+ #b_gam = 7.9
3910
3980
  # penalty += model_nature['dispersion_penalty'] -b_gam
3911
3981
  #penalty += 1/np.max((0.01,abs(b_gam)))
3912
3982
  # b_gam = model_nature['dispersion_penalty']
@@ -3951,7 +4021,7 @@ class ObjectiveFunction(object):
3951
4021
  #b_gam = -.3
3952
4022
  if penalty < 0:
3953
4023
  raise Exception
3954
-
4024
+
3955
4025
  return penalty, b_gam
3956
4026
 
3957
4027
 
@@ -3959,6 +4029,7 @@ class ObjectiveFunction(object):
3959
4029
 
3960
4030
  #print('this was 0')
3961
4031
  eta = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
4032
+ eta = np.array(eta)
3962
4033
  #eta = np.float64(eta)
3963
4034
  #eta = np.dot(Xd, params_main)+offset[:,:,0]
3964
4035
  #eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3973,11 +4044,13 @@ class ObjectiveFunction(object):
3973
4044
 
3974
4045
  #eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
3975
4046
  # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1) #todo grab param
4047
+
4048
+
3976
4049
  else:
3977
4050
  #eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
3978
4051
 
3979
4052
  try:
3980
- eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
4053
+ eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3981
4054
  #eta_clip = np.clip(np.array(eta), np.float64(-1000.0), EXP_UPPER_LIMIT)
3982
4055
  # eVd = np.exp(eta_clip)
3983
4056
  except Exception as e:
@@ -4303,7 +4376,7 @@ class ObjectiveFunction(object):
4303
4376
  elif dispersion == 1:
4304
4377
 
4305
4378
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
4306
-
4379
+ # print(1)
4307
4380
  #proba_d = self.dnegbimonli(y, eVd, b_gam )
4308
4381
  # print('fuck if this actually works')
4309
4382
 
@@ -4386,8 +4459,8 @@ class ObjectiveFunction(object):
4386
4459
  if panels is None:
4387
4460
  panels = self.panels
4388
4461
 
4389
- if alpha < 0:
4390
- alpha = np.abs(alpha)
4462
+ # if alpha < 0:
4463
+ # alpha = np.abs(alpha)
4391
4464
  sig, omeg = self.get_dispersion_paramaters(betas, dispersion)
4392
4465
 
4393
4466
 
@@ -4765,7 +4838,7 @@ class ObjectiveFunction(object):
4765
4838
 
4766
4839
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4767
4840
  penalty_val = 0.05
4768
- penalty_val_max = 100
4841
+ penalty_val_max = 130
4769
4842
 
4770
4843
  # print('change_later')
4771
4844
  if dispersion != 0:
@@ -4866,8 +4939,18 @@ class ObjectiveFunction(object):
4866
4939
  stuff = tuple(new_stuff)
4867
4940
 
4868
4941
  return stuff
4942
+
4943
+
4944
+
4945
+
4946
+
4947
+ def _loglik_gradient2(self, betas, stuff, *args, **kwargs):
4948
+
4949
+ return self._loglik_gradient(self, betas, *stuff)
4869
4950
 
4870
4951
 
4952
+
4953
+
4871
4954
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False, return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None, zi_list=None, exog_infl=None, draws_grouped = None, Xgroup = None, model_nature = None, kwarg=None, **kwargs):
4872
4955
  """Fixed and random parameters are handled separately to speed up the estimation and the results are concatenated.
4873
4956
  """
@@ -4893,6 +4976,7 @@ class ObjectiveFunction(object):
4893
4976
 
4894
4977
  penalty = self._penalty_betas(
4895
4978
  betas, dispersion, penalty, float(len(y)/10.0))
4979
+ self.n_obs = len(y) #feeds into gradient
4896
4980
  if draws is None and draws_grouped is None and ('draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1]==0) :
4897
4981
 
4898
4982
  if type(Xd) == dict:
@@ -4915,7 +4999,7 @@ class ObjectiveFunction(object):
4915
4999
  penalty += - lindley_disp
4916
5000
  lindley_disp = 0
4917
5001
 
4918
- eVd = self.eXB_calc(Bf, Xd, offset, dispersion, lindley_disp)
5002
+ eVd = self.eXB_calc(Bf, Xd, offset, main_disper, lindley_disp)
4919
5003
 
4920
5004
  if return_EV is True:
4921
5005
 
@@ -4927,16 +5011,38 @@ class ObjectiveFunction(object):
4927
5011
  #self.lam = eVd
4928
5012
 
4929
5013
  if self.is_dispersion(dispersion):
4930
- penalty, betas[-1] = self._penalty_dispersion(dispersion, betas[-1], eVd, y, penalty, model_nature)
5014
+ penalty, main_disper = self._penalty_dispersion(dispersion, main_disper, eVd, y, penalty, model_nature)
4931
5015
 
4932
-
5016
+ betas[-1] = main_disper
4933
5017
  llf_main = self.loglik_obs(
4934
5018
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4935
5019
 
4936
5020
 
4937
5021
  #llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4938
-
5022
+
4939
5023
  loglik = llf_main.sum()
5024
+ if 'exog_infl' in model_nature:
5025
+ params_infl = betas[Kf:Kf+len(model_nature.get('exog_infl'))]
5026
+ params_main = Bf
5027
+ #ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
5028
+ #exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
5029
+ exog_infl = model_nature.get('exog_inflX')
5030
+ llf_main = llf_main #TODO test this
5031
+ w = self.predict_logit_part(params_infl, exog_infl)
5032
+
5033
+ w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
5034
+
5035
+
5036
+
5037
+
5038
+ zero_idx = np.nonzero(y == 0)[0]
5039
+ nonzero_idx = np.nonzero(y)[0] #FIXME should shape be unravelled
5040
+
5041
+ llf = np.zeros_like(y, dtype=np.float64).reshape(-1,1) # TODO test this i added ravel to this code
5042
+ llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
5043
+ llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
5044
+ loglik = llf.sum()
5045
+
4940
5046
 
4941
5047
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4942
5048
  if not np.isreal(loglik):
@@ -4965,7 +5071,7 @@ class ObjectiveFunction(object):
4965
5071
  else:
4966
5072
  return -loglik+penalty
4967
5073
  # Else, we have draws
4968
-
5074
+ self.n_obs = len(y) *self.Ndraws
4969
5075
  penalty = self._penalty_betas(
4970
5076
  betas, dispersion, penalty, float(len(y)/10.0))
4971
5077
 
@@ -5202,9 +5308,28 @@ class ObjectiveFunction(object):
5202
5308
  #lik = np.nan_to_num(lik, )
5203
5309
  loglik = np.log(lik)
5204
5310
  llf_main = loglik
5205
-
5311
+ if 'exog_infl' in model_nature:
5312
+ params_infl = betas[Kf:Kf+len(model_nature.get('exog_infl'))]
5313
+ params_main = Bf
5314
+ exog_infl = model_nature.get('exog_inflX')
5315
+ llf_main = llf_main.ravel() #TODO test this
5316
+ w = self.predict_logit_part(params_infl, exog_infl)
5317
+
5318
+ w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
5319
+
5320
+
5321
+
5322
+
5323
+ zero_idx = np.nonzero(y == 0)[0]
5324
+ nonzero_idx = np.nonzero(y)[0] #FIXME should shape be unravelled
5325
+
5326
+ llf = np.zeros_like(y, dtype=np.float64).reshape(-1,1) # TODO test this i added ravel to this code
5327
+ llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
5328
+ llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
5329
+ loglik = llf.sum()
5330
+ else:
5206
5331
 
5207
- loglik = loglik.sum()
5332
+ loglik = loglik.sum()
5208
5333
 
5209
5334
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
5210
5335
 
@@ -5453,7 +5578,40 @@ class ObjectiveFunction(object):
5453
5578
  return {'success': convergence, 'x': x, 'fun': res, 'message': message,
5454
5579
  'hess_inv': Hinv, 'grad_n': grad_n, 'grad': g, 'nit': nit, 'nfev': nfev, 'njev': njev}
5455
5580
 
5456
- def _minimize(self, loglik_fn, x, args, method, tol, options, bounds = None):
5581
+
5582
+ def numerical_hessian(self, f, x0, eps=1.e-7):
5583
+ """
5584
+ Function to calculate numerical approximation to the Hessian.
5585
+
5586
+ Parameters:
5587
+ f : function
5588
+ The function for which the Hessian should be calculated.
5589
+ x0 : ndarray
5590
+ The point at which the Hessian should be calculated.
5591
+ eps : float
5592
+ The small change in x used to calculate the numerical derivative.
5593
+
5594
+ Returns:
5595
+ H : ndarray
5596
+ Numerical approximation to the Hessian.
5597
+ """
5598
+ n = len(x0)
5599
+ H = np.zeros((n, n))
5600
+ f1 = approx_fprime(x0, f, eps)
5601
+
5602
+ # Iterate over columns
5603
+ for j in range(n):
5604
+ x1 = np.copy(x0)
5605
+ x1[j] += eps
5606
+ f2 = approx_fprime(x1, f, eps)
5607
+ H[:, j] = (f2 - f1)/eps
5608
+
5609
+ return H
5610
+
5611
+
5612
+
5613
+
5614
+ def _minimize(self, loglik_fn, x, args, method, tol, options, bounds = None, hess_calc = None):
5457
5615
 
5458
5616
  if method == "BFGS":
5459
5617
  #return minimize(loglik_fn, x, args=args, jac=args[6], hess=True, method='BFGS', tol=tol, options=options)
@@ -5473,10 +5631,28 @@ class ObjectiveFunction(object):
5473
5631
 
5474
5632
  elif method == 'dogleg' or method == 'trust-exact':
5475
5633
  return minimize(loglik_fn, x, args=args, tol=tol, jac=True, hess='3-point', method='trust-constr', options=options)
5476
- elif method == 'Nelder-Mead':
5477
- return minimize(loglik_fn, x, args=args, method=method, options=options)
5634
+ elif method == 'Nelder-Mead-BFGS':
5635
+ argbs = list(args)
5636
+
5637
+ argbs[6] = False
5638
+ argbs[7] = False
5639
+ argbs = tuple(argbs)
5640
+ result = minimize(loglik_fn, x, args=argbs, method='nelder-mead', options=options)
5641
+
5642
+ # Calculate numerical Hessian
5643
+ if hess_calc is not None:
5644
+ x = result.x
5645
+ H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps = 1e-7 *self.n_obs)
5646
+ result['Hessian'] = H
5647
+ result['hess_inv'] =np.linalg.pinv(H)
5648
+ print('to do, only if hessian is fhfhfhf')
5649
+ standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
5650
+ return result
5651
+ #return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
5652
+ else:
5653
+ return result
5478
5654
  elif method == 'BFGS_2':
5479
- return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', tol=tol, options=options)
5655
+ return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS')
5480
5656
  elif method == "L-BFGS-B":
5481
5657
 
5482
5658
  return minimize(loglik_fn, x, args=args, jac=args[6], hess = args[7], method='L-BFGS-B', bounds =bounds, tol=tol, options=options)
@@ -5657,9 +5833,11 @@ class ObjectiveFunction(object):
5657
5833
  for i in coeff_: #pvalue penalty should handle this
5658
5834
  if abs(i) > 120:
5659
5835
  penalty += abs(i)
5660
-
5661
- covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5662
- if robust else optim_res['hess_inv']
5836
+ if 'hess_inv' in optim_res:
5837
+ covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5838
+ if robust else optim_res['hess_inv']
5839
+ else:
5840
+ covariance = np.diag(np.ones(len(optim_res.x)))
5663
5841
  covariance = np.clip(covariance, 0, None)
5664
5842
  stderr = np.sqrt(np.diag(covariance))
5665
5843
  #stderr = [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
@@ -5678,7 +5856,7 @@ class ObjectiveFunction(object):
5678
5856
 
5679
5857
  # if post_cor_pams - post_cor_pams > 1: # if it's only one then we don't technically have any correlations
5680
5858
  # this calculation takes into account the correlated rpms distinct values
5681
- for i in range(0, post_cor_pams):
5859
+ for i in range(pre_cor_pams, post_cor_pams):
5682
5860
 
5683
5861
  stderr[i] = stderr[i]/np.sqrt(sample_size)
5684
5862
 
@@ -5740,6 +5918,7 @@ class ObjectiveFunction(object):
5740
5918
 
5741
5919
  def fitRegression(self, mod,
5742
5920
  dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5921
+
5743
5922
  """
5744
5923
  Fits a poisson regression given data and outcomes if dispersion is not declared
5745
5924
  if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
@@ -5751,7 +5930,9 @@ class ObjectiveFunction(object):
5751
5930
  """
5752
5931
  # Set defualt method
5753
5932
  sub_zi = None
5754
- exog_infl = None
5933
+ exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5934
+ inf_betas = 0 if exog_infl is None else len(exog_infl)
5935
+
5755
5936
 
5756
5937
  sol = Solution()
5757
5938
  log_ll = 10 ** 9
@@ -5761,11 +5942,11 @@ class ObjectiveFunction(object):
5761
5942
  y = mod.get('y')
5762
5943
  try:
5763
5944
  method = 'BFGS'
5764
- method2 = 'L-BFGS-B'
5945
+ method2 = self.method
5765
5946
  # method2 = 'BFGS_2'
5766
5947
  if self.hess_yes == False:
5767
5948
  method2 = 'BFGS_2'
5768
- method2 = 'L-BFGS-B'
5949
+ method2 = self.method
5769
5950
  #method2 = 'BFGS_2'
5770
5951
 
5771
5952
  #method2 = 'BFGS_2'
@@ -5815,7 +5996,7 @@ class ObjectiveFunction(object):
5815
5996
 
5816
5997
 
5817
5998
  bb = np.random.normal(
5818
- 0.1, 0.05, size=k + kr+kg+kh+dispersion_param_num)
5999
+ 0, 0.01, size=k + kr+kg+kh+dispersion_param_num +inf_betas)
5819
6000
  #bb = np.zeros(k + kr+kg+kh+dispersion_param_num)
5820
6001
 
5821
6002
 
@@ -5895,7 +6076,7 @@ class ObjectiveFunction(object):
5895
6076
  bounds = []
5896
6077
  for i in bb[:-1]:
5897
6078
  bounds = bounds + [(i-30, i+30)]
5898
- bounds =bounds + [(0.25, 10)]
6079
+ bounds =bounds + [(-1, 5)]
5899
6080
 
5900
6081
  elif dispersion == 2:
5901
6082
  bounds = []
@@ -5906,14 +6087,55 @@ class ObjectiveFunction(object):
5906
6087
  else:
5907
6088
  bounds = None
5908
6089
  else:
6090
+ bb[0] = self.constant_value
6091
+ if dispersion ==1:
6092
+ bb[-1] = self.negative_binomial_value
5909
6093
  bounds = None
5910
6094
 
5911
- hess_est = False if method2 == 'L-BFGS-B' else True
5912
- initial_beta = self._minimize(self._loglik_gradient, bb,
5913
- args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5914
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5915
- method=method2, tol=1e-5, options={'gtol': tol['gtol']}, bounds = bounds)
5916
6095
 
6096
+ # import numpy as np
6097
+
6098
+ comment_out = 0
6099
+ if comment_out:
6100
+ import rpy2.rinterface as rinterface
6101
+ import rpy2.robjects as robjects
6102
+ from rpy2.robjects import numpy2ri
6103
+ import rpy2.robjects as ro
6104
+ from rpy2.robjects import pandas2ri
6105
+ r = robjects.r
6106
+ numpy2ri.activate()
6107
+ r['source']('testMAX.R')
6108
+ rMAX = robjects.globalenv['LLFUN']
6109
+ hess_est = False
6110
+ args = (XX, y, None, None, None, None, False, hess_est,
6111
+ dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod)
6112
+ #betas = 1
6113
+
6114
+
6115
+ # Store the reference to the function as an instance variable
6116
+ self.loglike = lambda p: self._loglik_gradient(p, *args)
6117
+
6118
+ # Use the instance variable when calling the R function
6119
+ rMAX(self.loglike, start = bb)
6120
+ #loglik = ro.conversion._py2rpy(loglik)
6121
+ #rMAX(loglike, start = bb)
6122
+
6123
+ # Print the result.
6124
+ #print(base.summary(result))
6125
+
6126
+
6127
+
6128
+ hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2'] else True
6129
+
6130
+ #intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
6131
+ hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
6132
+ initial_beta = self._minimize(self._loglik_gradient, bb,
6133
+ args=(XX, y, None, None, None, None, calc_gradient, hess_est,
6134
+ dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
6135
+ method=method2, tol=1e-5, options={'gtol': tol['gtol']}, bounds = bounds)
6136
+ #a = minimize_parallel(fun=self._loglik_gradient, x0=bb, args=(XX, y, None, None, None, None, calc_gradient, hess_est,
6137
+ # dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod))
6138
+
5917
6139
  if method2 == 'L-BFGS-B':
5918
6140
  if hasattr(initial_beta.hess_inv, 'todense'):
5919
6141
  initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv, 'todense') else np.array([initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
@@ -6018,9 +6240,9 @@ class ObjectiveFunction(object):
6018
6240
 
6019
6241
  while len(b) < self.get_param_num(dispersion):
6020
6242
  if dispersion == 0:
6021
- b = np.append(b, np.random.uniform(0.5, 1))
6243
+ b = np.append(b, np.random.uniform(0.05, 0.1))
6022
6244
  else:
6023
- b = np.insert(b, -1, np.random.uniform(0.5, 1))
6245
+ b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
6024
6246
  if dispersion ==1:
6025
6247
  b[-1] = np.abs(b[-1])
6026
6248
  if b[-1] >10:
@@ -6185,11 +6407,11 @@ class ObjectiveFunction(object):
6185
6407
  kgh = len(mod.get('hetro_hold'))
6186
6408
  draws_hetro = self.prepare_halton(kgh, nh, self.Ndraws, styd, slice_this_way= self.group_halton)
6187
6409
  mod['draws_hetro'] = draws_hetro.copy()
6188
-
6189
- XHtest = mod.get('XH_test')
6190
- nht, pht, ______ = XHtest.shape
6191
- draws_hetro_test = self.prepare_halton(kgh, nht, self.Ndraws, styd, slice_this_way= self.group_halton_test)
6192
- mod['draws_hetro_test'] = draws_hetro_test.copy()
6410
+ if self.is_multi:
6411
+ XHtest = mod.get('XH_test')
6412
+ nht, pht, ______ = XHtest.shape
6413
+ draws_hetro_test = self.prepare_halton(kgh, nht, self.Ndraws, styd, slice_this_way= self.group_halton_test)
6414
+ mod['draws_hetro_test'] = draws_hetro_test.copy()
6193
6415
 
6194
6416
  else:
6195
6417
  draws_hetro = None
@@ -6217,14 +6439,14 @@ class ObjectiveFunction(object):
6217
6439
  mod['dispersion_penalty'] = np.abs(b[-1])
6218
6440
  grad_args = (X, y, draws, X, Xr, self.batch_size,False, False, dispersion, 0, False, 0, self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod)
6219
6441
  #self.gradients_est_yes = (1, 1)
6220
- if len(b) ==2:
6221
- print(1)
6442
+
6443
+
6222
6444
 
6223
6445
  if draws is None and draws_hetro is not None:
6224
6446
  print('hold')
6225
6447
  betas_est = self._minimize(self._loglik_gradient, b, args=(X, y, draws, X, Xr, self.batch_size,self.grad_yes, self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
6226
6448
  method=method2, tol=tol['ftol'],
6227
- options={'gtol': tol['gtol']}, bounds = bounds)
6449
+ options={'gtol': tol['gtol']}, bounds = bounds, hess_calc = True if method2 == 'Nelder-Mead-BFGS' else False)
6228
6450
 
6229
6451
 
6230
6452
  #self.numerical_hessian_calc = True
@@ -6433,8 +6655,8 @@ class ObjectiveFunction(object):
6433
6655
  self.rdm_cor_fit = [x for x, y in zip(
6434
6656
  select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
6435
6657
 
6436
-
6437
- # [x for x, y in zip(select_data, model_nature.get('hurdle_terms')) if y == 1]
6658
+ #if self.zi_force:
6659
+ #self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
6438
6660
  #if alpha_grouped is not None:
6439
6661
  self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
6440
6662
  self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
@@ -6619,6 +6841,11 @@ class ObjectiveFunction(object):
6619
6841
  #indices7 = layout[:]
6620
6842
  indices = self.get_named_indices(self.fixed_fit)
6621
6843
  indices5 = self.get_named_indices(self.hetro_fit)
6844
+
6845
+ if self.zi_force:
6846
+ indices6 = self.get_named_indices(self.zi_force_names)
6847
+ model_nature['exog_inflX'] = df_tf[:, :, indices6]
6848
+
6622
6849
  x_h_storage = []
6623
6850
  x_h_storage_test = []
6624
6851
  transform_hetro = []