PyPI - metacountregressor - Versions diffs - 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl - Mend

metacountregressor 0.1.47py3-none-any.whl → 0.1.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

metacountregressor/__init__.py +102 -5
metacountregressor/_device_cust.py +2 -1
metacountregressor/alog.png +0 -0
metacountregressor/solution.py +333 -106
metacountregressor-0.1.49.dist-info/METADATA +236 -0
{metacountregressor-0.1.47.dist-info → metacountregressor-0.1.49.dist-info}/RECORD +7 -6
metacountregressor-0.1.47.dist-info/METADATA +0 -543
{metacountregressor-0.1.47.dist-info → metacountregressor-0.1.49.dist-info}/WHEEL +0 -0

metacountregressor/solution.py CHANGED Viewed

@@ -41,6 +41,7 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
 from sklearn.preprocessing import StandardScaler
 #from tabulate import tabulate
 from texttable import Texttable
+#from optimparallel import minimize_parallel
 from ._device_cust import device as dev
 #from optimparallel import minimize_parallel
@@ -63,7 +64,7 @@ log_lik_max = 1e+200
 log_lik_max = 0
 # Setup Limits, and Batches for custom GPU code
-EXP_UPPER_LIMIT = np.float64(np.log(np.finfo(np.float64).max) - 10.0)
+EXP_UPPER_LIMIT = np.float64(np.log(np.finfo(np.float64).max) - 50.0)
 def _unpack_tuple(x): return x if len(x) > 1 else x[0]
@@ -134,6 +135,14 @@ class ObjectiveFunction(object):
         if self.other_bic:
             print('change this to false latter ')
         offset = None
+        #initi
+        self.constant_value = -5.5
+        self.negative_binomial_value = 0.05
+        self.verbose_safe = True
+        self.zi_force = None #Analst want a zi model and formally declares the zi components below
+        self.zi_force_names = None #delare the zi components
         self.please_print = 1
         self.group_halton = None
         self.grad_yes = False
@@ -170,9 +179,12 @@ class ObjectiveFunction(object):
         self._max_imp = 100000
         self._panels = 1
         self.is_multi = True
+        self.method = 'L-BFGS-B'  # alternatives 'BFGS_2', 'BFGS
+        self.method = 'BFGS_2'
+        self.method = 'Nelder-Mead-BFGS'
+        #Nelder-Mead-BFGS
-        self._max_characteristics = 40
+        self._max_characteristics = 26
@@ -182,11 +194,17 @@ class ObjectiveFunction(object):
                                 'algorithm', '_random_seed', '_max_time',
                                 'forcedvariables', '_obj_1', '_obj_2', '_par',
                                 'Manuel_Estimate', 'test_percentage', 'is_multi', 'val_percentage'
-                                'complexity_level', '_hms', '_mpai', 'group', '_max_characteristics']
+                                'complexity_level', '_hms', '_mpai', 'group', '_max_characteristics', 'zi_force_names']
         for k in kwargs.keys():
             if k in acceptable_keys_list:
                 self.__setattr__(k, self.tryeval(kwargs[k]))
+        if self.zi_force_names is not None:
+            self.zi_force = True
+            if 'const' not in self.zi_force_names:
+                self.zi_force_names = ['const'] + self.zi_force_names
+                print('did this work?')
         if 'complexity_level' in kwargs:
             self.complexity_level = kwargs['complexity_level']
@@ -276,11 +294,23 @@ class ObjectiveFunction(object):
                 test_idx  = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
+                try:  #@IgnoreException
+                    df_train = x_data.loc[train_idx, :]
+                    df_test = x_data.loc[test_idx, :]
+                    y_train =y_data.loc[train_idx, :]
+                    y_test=y_data.loc[test_idx, :]
+                except:
+                    # Convert all values to their real parts
+                    df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
-                df_train = x_data.loc[train_idx, :]
-                df_test = x_data.loc[test_idx, :]
-                y_train =y_data.loc[train_idx, :]
-                y_test=y_data.loc[test_idx, :]
+                    # Replace the original DataFrame's numerical columns with real-valued ones
+                    x_data[df_real.columns] = df_real
+                    df_train = x_data.iloc[train_idx, :]
+                    df_test = x_data.iloc[test_idx, :]
+                    y_train =y_data.iloc[train_idx, :]
+                    y_test=y_data.iloc[test_idx, :]
@@ -289,9 +319,13 @@ class ObjectiveFunction(object):
             #self._x_data, self._x_data_test, self._y_data, self.y_data_test = train_test_split(new_data_test[data_names], y_data, test_size = self.test_percentage, random_state=self.get_random_seed())
             #data_names = self._random_forest_preprocess()
+        self.n_obs = N
         self._characteristics_names = list(self._x_data.columns)
+        if self.zi_force:
+            self.alpha_hurdle = np.isin(self._characteristics_names, [item.split(':')[0] for item in  self.zi_force_names]).astype(int).tolist()
+            print(1)
         #self._characteristics_names = [x for x in self._characteristics_names if not 'ID' in x]
@@ -410,10 +444,7 @@ class ObjectiveFunction(object):
             self._samples, self._panels, self._characteristics = self._x_data.shape
@@ -506,7 +537,7 @@ class ObjectiveFunction(object):
         self.significant = 0
         # define the states of our explanaotory variables
-        self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test, kwargs.get('Keep_Fit', []))
+        self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test, kwargs.get('must_include', []))
         self._discrete_values = self._discrete_values + \
             [[x for x in self._distribution]] * self._characteristics
@@ -515,7 +546,7 @@ class ObjectiveFunction(object):
         if 'model_types' in kwargs:
             model_types = kwargs['model_types']
         else:
-            model_types = [[0,1]] # add 2 for Generalized Poisson
+            model_types = [[1]] # add 2 for Generalized Poisson
         self._discrete_values = self._discrete_values + self.define_poissible_transforms(self._transformations) + model_types
@@ -530,7 +561,7 @@ class ObjectiveFunction(object):
         # model specs
         self.endog = None
         # solution parameters
-        self._min_characteristics = 4
+        self._min_characteristics = 0
         self._max_hurdle = 4
@@ -585,13 +616,15 @@ class ObjectiveFunction(object):
                         'grouped_terms': [],
                         'hetro_in_means': [],
                         'transformations': ['no'],
-                        'dispersion': i
+                        'dispersion': 1
                     }
                     a = self.modify_initial_fit(manual_fit_spec)
                     self.makeRegression(a)
-                    constant_values.append(self.beta_dict['const'][0][1])
-                    dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0,0],[0,0]])[0][1])
+                    try:
+                        constant_values.append(self.beta_dict['const'][0][1])
+                        dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0,0],[0,0]])[0][1])
+                    except:
+                        print('d')
                     i += 1
                 # Add the values of this iteration to the total
@@ -601,6 +634,7 @@ class ObjectiveFunction(object):
             # Calculate the averages
             constant_values_avg = [x / 100 for x in constant_values_total]
             dispersion_values_avg = [x / 100 for x in dispersion_values_total]
@@ -653,6 +687,24 @@ class ObjectiveFunction(object):
             return np.exp(-lam) * (lam**x) / math.factorial(x) * lognorm.pdf(lam, sigma, scale=np.exp(mu))
         return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
+    def _call_MAXlike(self):
+        import rpy2.rinterface as rinterface
+        import rpy2.robjects as robjects
+        import rpy2.robjects as ro
+        from rpy2.robjects import pandas2ri
+        r = robjects.r
+        r['source']('testMAX.R')
+        rMAX = robjects.globalenv['maxLik']
+        args = (1)
+        betas = 1
+        def loglike(p): return self._loglik_gradient(
+            p, *args)
+        loglik = ro.conversion._py2rpy(loglik)
+        rMAX(loglik, start = betas)
+        raise Exception('not yet implemented')
     def _random_forest_call_r(self):
         import rpy2.rinterface as rinterface
         import rpy2.robjects as robjects
@@ -959,6 +1011,7 @@ class ObjectiveFunction(object):
             zi_fit = self.none_handler(self.zi_fit)
         dis_fit = [x for x in   self.none_handler(
                 self.dist_fit)]  # check if dis fit is name
         hetro_long = []
         big_hetro = []
         if model_nature is not None:
@@ -1026,7 +1079,7 @@ class ObjectiveFunction(object):
             #br_w_names = np.char.add(randvars, "sd.")
             #br_w_names = np.char.add(br_w_names, rand_vars_dis)
        # br_w_names = br_w_names.tolist()
-            zi_names = [x for x in self.none_handler(zi_fit)]
+            zi_names = [x + ":inflated" for x in self.none_handler(self.zi_force_names)]
             names = fixednames+randvars+chol_names + \
                 br_w_names+chol + zi_names+hetro_long+dispersion_name
@@ -1057,7 +1110,7 @@ class ObjectiveFunction(object):
             randvars = [x for x in self.none_handler(rdm_fit)]
             chol_names = [x for x in self.none_handler(rdm_cor_fit)]
-            zi_names = [x for x in self.none_handler(zi_fit)]
+            zi_names = [x +': inflated' for x in self.none_handler(self.zi_force_names)]
             names = fixednames+randvars+chol_names + zi_names+big_hetro+dispersion_name
@@ -1074,7 +1127,7 @@ class ObjectiveFunction(object):
             except Exception as e:
                 print(e)
-    def summary_alternative(self, long_print=0, model=0, solution=None, save_state = 0):
+    def summary_alternative(self, long_print=0, model=0, solution=None, save_state = 1):
         fmt = "{:19} {:13} {:13.10f} {:13.10f}{:13.10f} {:13.3g} {:3}"
         coeff_name_str_length = 19
@@ -1138,7 +1191,10 @@ class ObjectiveFunction(object):
                 self.coeff_[-1] = np.abs(self.coeff_[-1])
                 if self.coeff_[-1] < 0.25:
-                    self.coeff_[-1] =.25 #min possible value for negbinom
+                    print(self.coeff_[-1], 'is this why')
+                    print(np.exp(self.coeff_[-1]))
+                    self.coeff_[-1] =np.exp(self.coeff_[-1]) #min possible value for negbinom
             self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
@@ -1365,7 +1421,11 @@ class ObjectiveFunction(object):
             x_data = self._x_data.copy()
         for col in x_data:
-            if all(x_data[col] <= 5):
+            if 'AADT' in self._characteristics_names[col]:
+                new_transform = [['log']]
+                transform_set = transform_set + new_transform
+            elif all(x_data[col] <= 5):
                 new_transform = [['no']]
                 transform_set = transform_set + new_transform
             elif col == "Offset":
@@ -1373,7 +1433,7 @@ class ObjectiveFunction(object):
                 transform_set = transform_set + new_transform
             else:
                 new_transform = transforms.copy()
-                if (x_data[col] > 0).all() and (x_data[col] >= 100000).any():
+                if (x_data[col] >= 0).all() and (x_data[col] >= 200).any():
                     unwanted = {'no', 2, 3, 'exp', 'fact'}
                     new_transform = [
                         ele for ele in new_transform if ele not in unwanted]
@@ -1593,17 +1653,33 @@ class ObjectiveFunction(object):
         alpha_hetro= [
             0 if x != 5 else 1 for x in vector[:self._characteristics]]
-        return {
-        'alpha': alpha,
-        'alpha_rdm': alpha_rdm,
-        'alpha_cor_rdm': alpha_cor_rdm,
-        'alpha_grouped': alpha_grouped,
-        'alpha_hetro': alpha_hetro,
-        'distributions': distributions,
-        'transformations': transformations,
-        'dispersion': dispersion
-    }
+        if self.zi_force == True:
+            return {
+            'alpha': alpha,
+            'alpha_rdm': alpha_rdm,
+            'alpha_cor_rdm': alpha_cor_rdm,
+            'alpha_grouped': alpha_grouped,
+            'alpha_hetro': alpha_hetro,
+            'distributions': distributions,
+            'transformations': transformations,
+            'exog_infl' : self.zi_force_names,
+            'dispersion': dispersion
+            }
+        else:
+            return {
+            'alpha': alpha,
+            'alpha_rdm': alpha_rdm,
+            'alpha_cor_rdm': alpha_cor_rdm,
+            'alpha_grouped': alpha_grouped,
+            'alpha_hetro': alpha_hetro,
+            'distributions': distributions,
+            'transformations': transformations,
+            'dispersion': dispersion
+            }
     # TODO implement the interactions
@@ -2408,7 +2484,7 @@ class ObjectiveFunction(object):
         if self.pvalues is None:
             self.reset_sln()
             return obj_1
-            print(1)
         sub_slns.append([obj_1.copy()])
@@ -2783,12 +2859,7 @@ class ObjectiveFunction(object):
             dparams = dparams.sum(axis = 1)
             dalpha = dalpha.sum(axis = 0)
             return np.r_[dparams.sum(0), dalpha.ravel()]
-            return score
-        score_obs = np.concatenate((dparams, dalpha),
-                                   axis=2)
-        score = np.sum(score_obs, axis=(1,2))
@@ -3053,8 +3124,8 @@ class ObjectiveFunction(object):
                 dparams = dparams.sum(axis = 1)
                 dalpha = dalpha.sum(axis = 0)
                 return np.r_[dparams.sum(0), dalpha]
-                dparams2 = dparms.sum(axis = 1)
-                dalpha1 =dalpha[:,None].sum(axis = 1)
+                #dparams2 = dparms.sum(axis = 1)
+               # dalpha1 =dalpha[:,None].sum(axis = 1)
                 return np.concatenate((dparams.sum(0),dalpha[:, None]), axis = 1)
             else:
                 dparams = dparams.sum(axis = 1)
@@ -3122,11 +3193,11 @@ class ObjectiveFunction(object):
             if obs_specific is False:
                 return np.r_[dparams.sum(0), dalpha_lindley.sum(), dalpha.sum()]
-                return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
+                #return np.r_[dparams.sum(0) + dparams_lindley.sum(0), dalpha_lindley.sum(), dalpha.sum()]
             else:
                 return np.concatenate((dparams, dalpha_lindley, dalpha), axis=1)
-                return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
-            return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
+                #return np.concatenate((dparams + dparams_lindley, dalpha_lindley, dalpha), axis=1)
+            #return np.r_[dparams.sum(0), dalpha, dparams_lindley.sum(0), dalpha_lindley]
         else:
             return np.r_[dparams.sum(0), dalpha]
@@ -3722,8 +3793,8 @@ class ObjectiveFunction(object):
             _type_: _description_
         """
-        if gamma <= 0.25: #min defined value for stable nb
-            gamma = 0.25
+       # if gamma <= 0.01: #min defined value for stable nb
+          #  gamma = 0.01
         endog = y
         mu = lam
@@ -3776,8 +3847,8 @@ class ObjectiveFunction(object):
         Returns:
             _type_: _description_
         """
-        if gamma <= 0.25:
-            gamma = 0.25
+       # if gamma <= 0.25:
+         #   gamma = 0.25
         endog = y
         mu = lam
@@ -3806,8 +3877,8 @@ class ObjectiveFunction(object):
             array: The negative binomial PMF for the given parameters.
         """
-        if gamma <= 0.25:
-            gamma = 0.25
+       # if gamma <= 0.01:
+         #   gamma = 0.01
         endog = y
         mu = lam
@@ -3896,17 +3967,16 @@ class ObjectiveFunction(object):
-            if abs(b_gam) < 0.05:
-                penalty += 1/np.abs(b_gam)
+            #if abs(b_gam) < 0.01:
+             #   penalty += 1/np.abs(b_gam)
-            if b_gam < 0:
-                penalty += 100
-            if b_gam >= 8:
+            if b_gam >= 4.5:
                 penalty += b_gam
-                b_gam = 7.9
+                b_gam = 4.61
+                #b_gam = 7.9
                    # penalty +=  model_nature['dispersion_penalty'] -b_gam
                     #penalty += 1/np.max((0.01,abs(b_gam)))
                    # b_gam = model_nature['dispersion_penalty']
@@ -3951,7 +4021,7 @@ class ObjectiveFunction(object):
         #b_gam = -.3
         if penalty < 0:
             raise Exception
         return penalty, b_gam
@@ -3959,6 +4029,7 @@ class ObjectiveFunction(object):
         #print('this was 0')
         eta = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
+        eta = np.array(eta)
         #eta  = np.float64(eta)
         #eta = np.dot(Xd, params_main)+offset[:,:,0]
         #eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3973,11 +4044,13 @@ class ObjectiveFunction(object):
             #eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
             # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1) #todo grab param
         else:
             #eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
             try:
-                eVd = np.exp(np.clip(eta, 0, EXP_UPPER_LIMIT))
+                eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
                 #eta_clip = np.clip(np.array(eta), np.float64(-1000.0), EXP_UPPER_LIMIT)
                # eVd = np.exp(eta_clip)
             except Exception as e:
@@ -4303,7 +4376,7 @@ class ObjectiveFunction(object):
         elif dispersion == 1:
             proba_r = self._nonlog_nbin(y, eVd, b_gam)
+           # print(1)
             #proba_d = self.dnegbimonli(y, eVd, b_gam )
        # print('fuck if this actually works')
@@ -4386,8 +4459,8 @@ class ObjectiveFunction(object):
         if panels is None:
             panels = self.panels
-        if alpha < 0:
-            alpha = np.abs(alpha)
+       # if alpha < 0:
+       #     alpha = np.abs(alpha)
         sig, omeg = self.get_dispersion_paramaters(betas, dispersion)
@@ -4765,7 +4838,7 @@ class ObjectiveFunction(object):
     def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
         penalty_val = 0.05
-        penalty_val_max = 100
+        penalty_val_max = 130
         # print('change_later')
         if dispersion != 0:
@@ -4866,8 +4939,18 @@ class ObjectiveFunction(object):
             stuff = tuple(new_stuff)
         return stuff
+    def _loglik_gradient2(self, betas, stuff, *args, **kwargs):
+       return self._loglik_gradient(self, betas, *stuff)
     def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None,  Xr=None, batch_size=None, return_gradient=False, return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None, zi_list=None, exog_infl=None, draws_grouped = None, Xgroup = None, model_nature = None, kwarg=None, **kwargs):
         """Fixed and random parameters are handled separately to speed up the estimation and the results are concatenated.
         """
@@ -4893,6 +4976,7 @@ class ObjectiveFunction(object):
             penalty = self._penalty_betas(
                 betas, dispersion, penalty, float(len(y)/10.0))
+            self.n_obs = len(y) #feeds into gradient
             if draws is None and draws_grouped is None and ('draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1]==0) :
                 if type(Xd) == dict:
@@ -4915,7 +4999,7 @@ class ObjectiveFunction(object):
                         penalty += - lindley_disp
                         lindley_disp = 0
-                eVd = self.eXB_calc(Bf, Xd, offset, dispersion, lindley_disp)
+                eVd = self.eXB_calc(Bf, Xd, offset, main_disper, lindley_disp)
                 if return_EV is True:
@@ -4927,16 +5011,38 @@ class ObjectiveFunction(object):
                 #self.lam = eVd
                 if self.is_dispersion(dispersion):
-                    penalty, betas[-1] = self._penalty_dispersion(dispersion, betas[-1], eVd, y, penalty, model_nature)
+                    penalty, main_disper = self._penalty_dispersion(dispersion, main_disper, eVd, y, penalty, model_nature)
+                betas[-1] = main_disper
                 llf_main = self.loglik_obs(
                         y, eVd, dispersion, main_disper, lindley_disp, betas)
                 #llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
                 loglik = llf_main.sum()
+                if 'exog_infl' in model_nature:
+                    params_infl = betas[Kf:Kf+len(model_nature.get('exog_infl'))]
+                    params_main = Bf
+                    #ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
+                    #exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
+                    exog_infl = model_nature.get('exog_inflX')
+                    llf_main = llf_main #TODO test this
+                    w = self.predict_logit_part(params_infl, exog_infl)
+                    w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
+                    zero_idx = np.nonzero(y == 0)[0]
+                    nonzero_idx = np.nonzero(y)[0] #FIXME should shape be unravelled
+                    llf = np.zeros_like(y, dtype=np.float64).reshape(-1,1) # TODO test this i added ravel to this code
+                    llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
+                    llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
+                    loglik = llf.sum()
                 loglik = np.clip(loglik, log_lik_min, log_lik_max)
                 if not np.isreal(loglik):
@@ -4965,7 +5071,7 @@ class ObjectiveFunction(object):
                 else:
                     return -loglik+penalty
             # Else, we have draws
+            self.n_obs = len(y) *self.Ndraws
             penalty = self._penalty_betas(
                 betas, dispersion, penalty, float(len(y)/10.0))
@@ -5202,9 +5308,28 @@ class ObjectiveFunction(object):
             #lik = np.nan_to_num(lik, )
             loglik = np.log(lik)
             llf_main = loglik
+            if 'exog_infl' in model_nature:
+                    params_infl = betas[Kf:Kf+len(model_nature.get('exog_infl'))]
+                    params_main = Bf
+                    exog_infl = model_nature.get('exog_inflX')
+                    llf_main = llf_main.ravel() #TODO test this
+                    w = self.predict_logit_part(params_infl, exog_infl)
+                    w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
+                    zero_idx = np.nonzero(y == 0)[0]
+                    nonzero_idx = np.nonzero(y)[0] #FIXME should shape be unravelled
+                    llf = np.zeros_like(y, dtype=np.float64).reshape(-1,1) # TODO test this i added ravel to this code
+                    llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
+                    llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
+                    loglik = llf.sum()
+            else:
-            loglik = loglik.sum()
+                loglik = loglik.sum()
             loglik = np.clip(loglik, log_lik_min, log_lik_max)
@@ -5453,7 +5578,40 @@ class ObjectiveFunction(object):
         return {'success': convergence, 'x': x, 'fun': res, 'message': message,
                 'hess_inv': Hinv, 'grad_n': grad_n, 'grad': g, 'nit': nit, 'nfev': nfev, 'njev': njev}
-    def _minimize(self, loglik_fn, x, args, method, tol, options, bounds = None):
+    def numerical_hessian(self, f, x0, eps=1.e-7):
+        """
+        Function to calculate numerical approximation to the Hessian.
+        Parameters:
+        f : function
+            The function for which the Hessian should be calculated.
+        x0 : ndarray
+            The point at which the Hessian should be calculated.
+        eps : float
+            The small change in x used to calculate the numerical derivative.
+        Returns:
+        H : ndarray
+            Numerical approximation to the Hessian.
+        """
+        n = len(x0)
+        H = np.zeros((n, n))
+        f1 = approx_fprime(x0, f, eps)
+        # Iterate over columns
+        for j in range(n):
+            x1 = np.copy(x0)
+            x1[j] += eps
+            f2 = approx_fprime(x1, f, eps)
+            H[:, j] = (f2 - f1)/eps
+        return H
+    def _minimize(self, loglik_fn, x, args, method, tol, options, bounds = None, hess_calc = None):
         if method == "BFGS":
             #return minimize(loglik_fn, x, args=args, jac=args[6], hess=True, method='BFGS', tol=tol, options=options)
@@ -5473,10 +5631,28 @@ class ObjectiveFunction(object):
         elif method == 'dogleg' or method == 'trust-exact':
             return minimize(loglik_fn, x, args=args, tol=tol, jac=True, hess='3-point', method='trust-constr', options=options)
-        elif method == 'Nelder-Mead':
-            return minimize(loglik_fn, x, args=args, method=method, options=options)
+        elif method == 'Nelder-Mead-BFGS':
+            argbs = list(args)
+            argbs[6] = False
+            argbs[7] = False
+            argbs = tuple(argbs)
+            result = minimize(loglik_fn, x, args=argbs, method='nelder-mead', options=options)
+            # Calculate numerical Hessian
+            if hess_calc is not None:
+                x = result.x
+                H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps = 1e-7 *self.n_obs)
+                result['Hessian'] = H
+                result['hess_inv'] =np.linalg.pinv(H)
+                print('to do, only if hessian is fhfhfhf')
+                standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
+                return result
+                #return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
+            else:
+                return result
         elif method == 'BFGS_2':
-            return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', tol=tol, options=options)
+            return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS')
         elif method == "L-BFGS-B":
             return minimize(loglik_fn, x, args=args, jac=args[6],  hess = args[7], method='L-BFGS-B', bounds =bounds, tol=tol, options=options)
@@ -5657,9 +5833,11 @@ class ObjectiveFunction(object):
         for i in coeff_: #pvalue penalty should handle this
             if abs(i) > 120:
                 penalty += abs(i)
-        covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
-            if robust else optim_res['hess_inv']
+        if 'hess_inv' in optim_res:
+            covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
+                if robust else optim_res['hess_inv']
+        else:
+            covariance = np.diag(np.ones(len(optim_res.x)))
         covariance = np.clip(covariance, 0, None)
         stderr = np.sqrt(np.diag(covariance))
         #stderr =  [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
@@ -5678,7 +5856,7 @@ class ObjectiveFunction(object):
            # if post_cor_pams - post_cor_pams > 1:  # if it's only one then we don't technically have any correlations
                 # this calculation takes into account the correlated rpms distinct values
-            for i in range(0, post_cor_pams):
+            for i in range(pre_cor_pams, post_cor_pams):
                 stderr[i] = stderr[i]/np.sqrt(sample_size)
@@ -5740,6 +5918,7 @@ class ObjectiveFunction(object):
     def fitRegression(self, mod,
                       dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
         """
         Fits a poisson regression given data and outcomes if dispersion is not declared
         if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
@@ -5751,7 +5930,9 @@ class ObjectiveFunction(object):
         """
         # Set defualt method
         sub_zi = None
-        exog_infl = None
+        exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
+        inf_betas = 0 if exog_infl is None else len(exog_infl)
         sol = Solution()
         log_ll = 10 ** 9
@@ -5761,11 +5942,11 @@ class ObjectiveFunction(object):
         y = mod.get('y')
         try:
             method = 'BFGS'
-            method2 = 'L-BFGS-B'
+            method2 = self.method
            # method2 = 'BFGS_2'
             if self.hess_yes == False:
                 method2 = 'BFGS_2'
-                method2 = 'L-BFGS-B'
+                method2 = self.method
             #method2 = 'BFGS_2'
             #method2 = 'BFGS_2'
@@ -5815,7 +5996,7 @@ class ObjectiveFunction(object):
                 bb = np.random.normal(
-                        0.1, 0.05, size=k + kr+kg+kh+dispersion_param_num)
+                        0, 0.01, size=k + kr+kg+kh+dispersion_param_num +inf_betas)
                 #bb = np.zeros(k + kr+kg+kh+dispersion_param_num)
@@ -5895,7 +6076,7 @@ class ObjectiveFunction(object):
                             bounds = []
                             for i in bb[:-1]:
                                 bounds = bounds + [(i-30, i+30)]
-                            bounds =bounds + [(0.25, 10)]
+                            bounds =bounds + [(-1, 5)]
                         elif dispersion == 2:
                             bounds = []
@@ -5906,14 +6087,55 @@ class ObjectiveFunction(object):
                         else:
                             bounds = None
                     else:
+                        bb[0] = self.constant_value
+                        if dispersion ==1:
+                            bb[-1] = self.negative_binomial_value
                         bounds = None
-                    hess_est = False if method2 == 'L-BFGS-B' else True
-                    initial_beta = self._minimize(self._loglik_gradient, bb,
-                                                    args=(XX, y, None, None, None, None, calc_gradient, hess_est,
-                                                        dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
-                                                    method=method2, tol=1e-5, options={'gtol': tol['gtol']}, bounds = bounds)
+                   # import numpy as np
+                    comment_out = 0
+                    if comment_out:
+                        import rpy2.rinterface as rinterface
+                        import rpy2.robjects as robjects
+                        from rpy2.robjects import numpy2ri
+                        import rpy2.robjects as ro
+                        from rpy2.robjects import pandas2ri
+                        r = robjects.r
+                        numpy2ri.activate()
+                        r['source']('testMAX.R')
+                        rMAX = robjects.globalenv['LLFUN']
+                        hess_est = False
+                        args = (XX, y, None, None, None, None, False, hess_est,
+                                                            dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod)
+                        #betas = 1
+                        # Store the reference to the function as an instance variable
+                        self.loglike = lambda p: self._loglik_gradient(p, *args)
+                        # Use the instance variable when calling the R function
+                        rMAX(self.loglike, start = bb)
+                        #loglik = ro.conversion._py2rpy(loglik)
+                        #rMAX(loglike, start = bb)
+                        # Print the result.
+                        #print(base.summary(result))
+                        hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2'] else True
+                    #intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
+                    hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
+                    initial_beta = self._minimize(self._loglik_gradient, bb,
+                                                        args=(XX, y, None, None, None, None, calc_gradient, hess_est,
+                                                            dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
+                                                        method=method2, tol=1e-5, options={'gtol': tol['gtol']}, bounds = bounds)
+                    #a = minimize_parallel(fun=self._loglik_gradient, x0=bb, args=(XX, y, None, None, None, None, calc_gradient, hess_est,
+                                                  #          dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod))
                     if method2 == 'L-BFGS-B':
                         if hasattr(initial_beta.hess_inv, 'todense'):
                             initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv, 'todense') else np.array([initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
@@ -6018,9 +6240,9 @@ class ObjectiveFunction(object):
                     while len(b) < self.get_param_num(dispersion):
                         if dispersion == 0:
-                            b = np.append(b, np.random.uniform(0.5, 1))
+                            b = np.append(b, np.random.uniform(0.05, 0.1))
                         else:
-                            b = np.insert(b, -1, np.random.uniform(0.5, 1))
+                            b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
                     if dispersion ==1:
                         b[-1] = np.abs(b[-1])
                         if b[-1] >10:
@@ -6185,11 +6407,11 @@ class ObjectiveFunction(object):
                         kgh = len(mod.get('hetro_hold'))
                         draws_hetro = self.prepare_halton(kgh, nh, self.Ndraws, styd, slice_this_way= self.group_halton)
                         mod['draws_hetro'] = draws_hetro.copy()
-                        XHtest = mod.get('XH_test')
-                        nht, pht, ______ = XHtest.shape
-                        draws_hetro_test = self.prepare_halton(kgh, nht, self.Ndraws, styd, slice_this_way= self.group_halton_test)
-                        mod['draws_hetro_test'] = draws_hetro_test.copy()
+                        if self.is_multi:
+                            XHtest = mod.get('XH_test')
+                            nht, pht, ______ = XHtest.shape
+                            draws_hetro_test = self.prepare_halton(kgh, nht, self.Ndraws, styd, slice_this_way= self.group_halton_test)
+                            mod['draws_hetro_test'] = draws_hetro_test.copy()
                     else:
                         draws_hetro = None
@@ -6217,14 +6439,14 @@ class ObjectiveFunction(object):
                         mod['dispersion_penalty'] = np.abs(b[-1])
                     grad_args = (X, y, draws, X, Xr, self.batch_size,False, False, dispersion, 0, False, 0, self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod)
                     #self.gradients_est_yes = (1, 1)
-                    if len(b) ==2:
-                        print(1)
                     if draws is None and draws_hetro is not None:
                         print('hold')
                     betas_est = self._minimize(self._loglik_gradient, b,  args=(X, y, draws, X, Xr, self.batch_size,self.grad_yes, self.hess_yes, dispersion, 0, False, 0, self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
                                                method=method2, tol=tol['ftol'],
-                                               options={'gtol': tol['gtol']}, bounds = bounds)
+                                               options={'gtol': tol['gtol']}, bounds = bounds, hess_calc = True if method2 == 'Nelder-Mead-BFGS' else False)
                     #self.numerical_hessian_calc = True
@@ -6433,8 +6655,8 @@ class ObjectiveFunction(object):
         self.rdm_cor_fit = [x for x, y in zip(
                 select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
-       # [x for x, y in zip(select_data, model_nature.get('hurdle_terms')) if y == 1]
+        #if self.zi_force:
+            #self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
         #if alpha_grouped is not None:
         self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
         self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
@@ -6619,6 +6841,11 @@ class ObjectiveFunction(object):
         #indices7 = layout[:]
         indices = self.get_named_indices(self.fixed_fit)
         indices5 = self.get_named_indices(self.hetro_fit)
+        if self.zi_force:
+            indices6 =  self.get_named_indices(self.zi_force_names)
+            model_nature['exog_inflX'] = df_tf[:, :, indices6]
         x_h_storage = []
         x_h_storage_test = []
         transform_hetro = []

metacountregressor 0.1.47__py3-none-any.whl → 0.1.49__py3-none-any.whl

metacountregressor 0.1.47py3-none-any.whl → 0.1.49py3-none-any.whl