PyPI - metacountregressor - Versions diffs - 0.1.78__py3-none-any.whl → 0.1.93__py3-none-any.whl - Mend

metacountregressor 0.1.78py3-none-any.whl → 0.1.93py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

metacountregressor/main.py CHANGED Viewed

@@ -29,6 +29,64 @@ def convert_df_columns_to_binary_and_wide(df):
 def main(args, **kwargs):
+    '''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
+    import statsmodels.api as sm
+    data = sm.datasets.sunspots.load_pandas().data
+    # print(data.exog)
+    data_exog = data['YEAR']
+    data_exog = sm.add_constant(data_exog)
+    data_endog = data['SUNACTIVITY']
+    # Instantiate a gamma family model with the default link function.
+    import numpy as np
+    gamma_model = sm.NegativeBinomial(data_endog, data_exog)
+    gamma_results = gamma_model.fit()
+    print(gamma_results.summary())
+    # NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
+    import metacountregressor
+    from importlib.metadata import version
+    print(version('metacountregressor'))
+    import pandas as pd
+    import numpy as np
+    from metacountregressor.solution import ObjectiveFunction
+    from metacountregressor.metaheuristics import (harmony_search,
+                                                   differential_evolution,
+                                                   simulated_annealing)
+    # Model Decisions,
+    manual_fit_spec = {
+        'fixed_terms': ['const', 'YEAR'],
+        'rdm_terms': [],
+        'rdm_cor_terms': [],
+        'grouped_terms': [],
+        'hetro_in_means': [],
+        'transformations': ['no', 'no'],
+        'dispersion': 1  # Negative Binomial
+    }
+    # Arguments
+    arguments = {
+        'algorithm': 'hs',
+        'test_percentage': 0,
+        'test_complexity': 6,
+        'instance_number': 'name',
+        'Manual_Fit': manual_fit_spec
+    }
+    obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
+    #exit()
     print('the args is:', args)
     print('the kwargs is', kwargs)
@@ -109,6 +167,16 @@ def main(args, **kwargs):
             'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
             'dispersion': 1
         }
+        print('overriding this delete, just want to test the NB')
+        manual_fit_spec = {
+            'fixed_terms': ['const'],
+            'rdm_terms': [],
+            'rdm_cor_terms': [],
+            'grouped_terms': [],
+            'hetro_in_means': [],
+            'transformations': ['no'],
+            'dispersion': 1
+        }
         df = pd.read_csv('./data/Ex-16-3.csv')  # read in the data
         y_df = df[['FREQ']].copy()  # only consider crashes
@@ -118,7 +186,7 @@ def main(args, **kwargs):
         x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
         x_df = x_df.drop(columns=['AADT', 'LENGTH'])
-        if args['separate_out_factors']:
+        if args.get('seperate_out_factors', 0):
             x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
                                                 exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
@@ -173,8 +241,8 @@ def main(args, **kwargs):
             x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
             x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
             x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
-        except:
+        except Exception as e:
+            print(e)
             x_df = df.drop(columns=['Y'])  # was dropped postcode
         group_grab = x_df['county']
@@ -215,7 +283,6 @@ def main(args, **kwargs):
         else:
             print('fitting manually')
             args['Manual_Fit'] = manual_fit_spec
     if args['problem_number'] == str(8) or args['problem_number'] == 8:
         print('Maine County Dataset.')
         args['group'] = 'county'
@@ -346,10 +413,8 @@ if __name__ == '__main__':
         override = True
         if override:
             print('todo turn off, in testing phase')
-            parser.add_argument('-problem_number', default='8')
+            parser.add_argument('-problem_number', default='4')
             print('did it make it')
         if 'algorithm' not in args:
             parser.add_argument('-algorithm', type=str, default='hs',
                                 help='optimization algorithm')
@@ -370,7 +435,7 @@ if __name__ == '__main__':
     parser.print_help()
     args = vars(parser.parse_args())
     print(type(args))
-    # TODO add in chi 2 and df in estimation and compare degrees of freedom
+    # TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
     # Print the args.
     profiler = cProfile.Profile()

metacountregressor/solution.py CHANGED Viewed

@@ -47,7 +47,7 @@ np.seterr(divide='ignore', invalid='ignore')
 warnings.simplefilter("ignore")
 # define the computation boundary limits
-min_comp_val = 1e-20
+min_comp_val = 1e-160
 max_comp_val = 1e+200
 log_lik_min = -1e+200
 log_lik_max = 1e+200
@@ -122,8 +122,9 @@ class ObjectiveFunction(object):
     def __init__(self, x_data, y_data, **kwargs):
-        self.reg_penalty = 1
+        self.reg_penalty = 0
         self.power_up_ll = False
         self.bic = None
         self.other_bic = False
         self.test_flag = 1
@@ -131,8 +132,8 @@ class ObjectiveFunction(object):
             print('change this to false latter ')
         # initialize values
-        self.constant_value = -5.5
-        self.negative_binomial_value = 0.05
+        self.constant_value = 0
+        self.negative_binomial_value = 1
         self.verbose_safe = True
         self.please_print = kwargs.get('please_print', 0)
@@ -169,7 +170,7 @@ class ObjectiveFunction(object):
         self._par = 0.3
         self._mpai = 1
         self._max_imp = 100000
-        self._WIC = 1000  # Number of Iterations without Multiobjective Improvement
+        self._WIC = 1000  # Number of Iterations without Multiobjective Improvement #tod chuck into solution
         self._panels = None
         self.is_multi = True
         self.method_ll = 'Nelder-Mead-BFGS'
@@ -389,6 +390,8 @@ class ObjectiveFunction(object):
         self.initial_sig = 1  # pass the test of a single model
         self.pvalue_sig_value = .1
         self.observations = self._x_data.shape[0]
+        self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
         self.batch_size = None
         # open the file in the write mode
         self.grab_transforms = 0
@@ -841,8 +844,12 @@ class ObjectiveFunction(object):
             return ([self._model_type_codes[dispersion]])
-    def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
-                            zi_fit=None, obj_1=None, model_nature=None):
+    def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
+        r'''
+        setup for naming of the model summary
+        '''
         self.name_deleter = []
         group_rpm = None
         group_dist = []
@@ -1015,7 +1022,7 @@ class ObjectiveFunction(object):
             signif_list = self.pvalue_asterix_add(self.pvalues)
             if model == 1:
-                self.coeff_[-1] = np.abs(self.coeff_[-1])
+                self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
                 if self.coeff_[-1] < 0.25:
                     print(self.coeff_[-1], 'Warning Check Dispersion')
                     print(np.exp(self.coeff_[-1]))
@@ -2683,7 +2690,7 @@ class ObjectiveFunction(object):
         grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
         return gradient, grad_n
-    def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
+    def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
         """
         Negative Binomial model score (gradient) vector of the log-likelihood
         Parameters
@@ -2703,9 +2710,47 @@ class ObjectiveFunction(object):
         """
-        try:
+        # Calculate common terms
+        '''
+        n = len(y)
+        n, p, d = X.shape  # n: observations, p: panels (1 in your case), d: explanatory variables
+        # Flatten the data since there's only one panel, simplifying the operations
+        X_flat = X.reshape(n * p, d)
+        y_flat = y.flatten()
+        mu_flat = mu.flatten()
+        # Prepare score array
+        score = np.zeros(d + 1)  # +1 for alpha
+        # Compute the gradient for regression coefficients
+        for j in range(d):  # Exclude the last parameter (alpha)
+            score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
+        # Compute the gradient for the dispersion parameter
+        if obs_specific:
+            # Adjust the calculation if observation-specific effects are considered
+            sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
+                        y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
+            score[-1] = np.sum(sum_terms)
+        else:
+            # Standard calculation
+            sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
+                        y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
+            score[-1] = np.sum(sum_terms)
+        return score
+        '''
+        #return score
-            alpha = params[-1]
+        try:
+            if alpha is None:
+                alpha = np.exp(params[-1])
+            else:
+                alpha = np.exp(params[-1])
             a1 = 1 / alpha * mu ** Q
             prob = a1 / (a1 + mu)
             exog = X
@@ -2747,7 +2792,8 @@ class ObjectiveFunction(object):
                 return np.concatenate((dparams, dalpha),
                                       axis=1)
         except Exception as e:
-            print('in ki nb probkemng')
+            print(e)
+            print('NB score exception problem..')
             exc_type, exc_obj, exc_tb = sys.exc_info()
             fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
             print(exc_type, fname, exc_tb.tb_lineno)
@@ -3404,24 +3450,44 @@ class ObjectiveFunction(object):
         # if gamma <= 0.01: #min defined value for stable nb
         #  gamma = 0.01
         endog = y
         mu = lam
-        alpha = gamma
-        size = 1.0 / alpha * mu ** Q
+        alpha = np.exp(gamma)
+        #size = 1.0 / alpha * mu ** Q
         alpha_size = alpha * mu ** Q
         # prob = size/(size+mu)
         prob = alpha / (alpha + mu)
         # prob = 1/(1+mu*alpha)
+        '''test'''
         try:
             # print(np.shape(y),np.shape(size), np.shape(prob))
-            # gg2 = self.negbinom_pmf(alpha_size, prob, y)
+            #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
+            #import time
+            #start_time = time.time()
+            # Measure time for negbinom_pmf
+            #start_time = time.time()
+            #for _ in range(10000):
+            #gg = self.negbinom_pmf(alpha_size, prob, y)
+            #end_time = time.time()
+            #print("Custom functieon time:", end_time - start_time)
+            #start_time = time.time()
+            #for _ in range(10000):
             gg = np.exp(
                 gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
                         y + alpha) * np.log(mu + alpha))
-            # gg1 = self.negbinom_pmf(alpha_size, prob, y)
-            # gg = nbinom.pmf(y ,alpha, prob)
+            gg[np.isnan(gg)] = 1
+            #gg = nbinom.pmf(y ,alpha, prob)
+            #end_time = time.time()
+            #print("Custom functieon time:", end_time - start_time)
         except Exception as e:
             print(e)
@@ -3492,7 +3558,7 @@ class ObjectiveFunction(object):
         endog = y
         mu = lam
-        alpha = gamma
+        alpha = np.exp(gamma)
         alpha = alpha * mu ** Q
         size = 1 / alpha * mu ** Q  # also r
         # self.rate_param = size
@@ -3572,21 +3638,8 @@ class ObjectiveFunction(object):
         if dispersion == 1 or dispersion == 4:  # nb
             # if model_nature is not None and 'dispersion_penalty' in model_nature:
-            # if b_gam < 0.8*model_nature['dispersion_penalty']:
-            # penalty +=  model_nature['dispersion_penalty'] -b_gam
-            # if abs(b_gam) < 0.01:
-            #   penalty += 1/np.abs(b_gam)
-            if b_gam >= 4.5:
-                penalty += b_gam
-                b_gam = 4.61
-                # b_gam = 7.9
-                # penalty +=  model_nature['dispersion_penalty'] -b_gam
-                # penalty += 1/np.max((0.01,abs(b_gam)))
-                # b_gam = model_nature['dispersion_penalty']
-            """
             if b_gam <= 0:
                 #penalty += 100
                 #penalty += abs(b_gam)
@@ -3594,21 +3647,21 @@ class ObjectiveFunction(object):
                 #b_gam = 1
                 # if b_gam < 0.03:
-                penalty += 10
+                penalty += min(1, np.abs(b_gam))
-                b_gam = 0.03
+                b_gam = 0.001
                 #
-            if b_gam >= 10:
-                penalty+= b_gam
+            #if b_gam >= 10:
+               # penalty+= b_gam
-            if b_gam == 0:
-                b_gam = min_comp_val
+           # if b_gam == 0:
+                #b_gam = min_comp_val
             #b_gam = 0.03
-            b_gam = abs(b_gam)
+           # b_gam = abs(b_gam)
-                """
         elif dispersion == 2:
             if b_gam >= 1:
@@ -3761,7 +3814,8 @@ class ObjectiveFunction(object):
         elif dispersion == 1:
             proba_r = self._nonlog_nbin(y, eVd, b_gam)
-        # print(1)
         # proba_d = self.dnegbimonli(y, eVd, b_gam )
         # print('fuck if this actually works')
@@ -3793,7 +3847,7 @@ class ObjectiveFunction(object):
             proba_p = self._prob_product_across_panels(
                 proba_r, self.panel_info)
             proba_r = proba_p
-        proba_r = np.clip(proba_r, min_comp_val, None)
+        proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
         loglik = np.log(proba_r)
         return loglik
@@ -4095,9 +4149,9 @@ class ObjectiveFunction(object):
         elif dispersion == 1:
-            der = -self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
+            der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
             if both:
-                grad_n = -self.NB_Score(betas, y, eVd, Xd, 0, True)
+                grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
                 return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
                                                                                            neginf=-140)
@@ -4351,7 +4405,7 @@ class ObjectiveFunction(object):
                         P += Xd[key].shape[1]
                         Kf += Xd[key].shape[2]
                 else:
-                    self.naming_for_printing(betas, 1, dispersion, zi_fit=zi_list, model_nature=model_nature)
+                    self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
                     N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
                 betas = np.array(betas)
                 Bf = betas[0:Kf]  # Fixed betas
@@ -4381,7 +4435,7 @@ class ObjectiveFunction(object):
                 llf_main = self.loglik_obs(
                     y, eVd, dispersion, main_disper, lindley_disp, betas)
-                # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
+                llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
                 loglik = llf_main.sum()
@@ -4402,14 +4456,19 @@ class ObjectiveFunction(object):
                     if return_gradient_n:
                         der, grad_n = self.simple_score_grad(
                             betas, y, eVd, Xd, dispersion, both=True)
-                        return (-loglik + penalty, -der, grad_n)
+                        #return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
+                        scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
+                        return scaled_tuple
                     else:
                         der = self.simple_score_grad(
                             betas, y, eVd, Xd, dispersion, both=False)
-                        return (-loglik + penalty, -der.ravel())
+                        scaled_tuple = tuple(
+                            x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
+                        return scaled_tuple
+                        #return (-loglik + penalty, -der.ravel())*self.minimize_scaler
                 else:
-                    return -loglik + penalty
+                    return (-loglik + penalty)*self.minimize_scaler
             # Else, we have draws
             self.n_obs = len(y) * self.Ndraws #todo is this problematic
             penalty += self._penalty_betas(
@@ -4420,7 +4479,7 @@ class ObjectiveFunction(object):
                 # Kf =0
             betas = np.array(betas)
             betas = dev.to_gpu(betas)  # TODO fix mepotnetially problem
-            self.naming_for_printing(betas, 0, dispersion, zi_fit=zi_list, model_nature=model_nature)
+            self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
             y = dev.to_gpu(y)
             if draws is not None and draws_grouped is not None:
                 draws = np.concatenate((draws_grouped, draws), axis=1)
@@ -4509,11 +4568,11 @@ class ObjectiveFunction(object):
                     #     brstd), draws_)  # Get random coefficients, old method
                     Br = self._transform_rand_betas(br,
                                                     brstd, draws_)  # Get random coefficients
-                    self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
+                    self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
                     self.Br = Br.copy()
                 else:
-                    self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
+                    self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
                     chol_mat = self._chol_mat(
                         len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
                     self.chol_mat = chol_mat.copy()
@@ -4633,34 +4692,18 @@ class ObjectiveFunction(object):
             # lik = np.nan_to_num(lik, )
             loglik = np.log(lik)
             llf_main = loglik
-            if 'exog_infl' in model_nature:
-                params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
-                params_main = Bf
-                exog_infl = model_nature.get('exog_inflX')
-                llf_main = llf_main.ravel()  # TODO test this
-                w = self.predict_logit_part(params_infl, exog_infl)
-                w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
-                zero_idx = np.nonzero(y == 0)[0]
-                nonzero_idx = np.nonzero(y)[0]  # FIXME should shape be unravelled
-                llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1)  # TODO test this i added ravel to this code
-                llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
-                llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
-                loglik = llf.sum()
-            else:
-                loglik = loglik.sum()
+            loglik = loglik.sum()
             loglik = np.clip(loglik, log_lik_min, log_lik_max)
             if self.power_up_ll:
                 penalty += self.regularise_l2(betas)
-                loglik = 2*loglik
             penalty += self.regularise_l2(betas)
             if not return_gradient:
-                output = (-loglik + penalty,)
+                output = ((-loglik + penalty)*self.minimize_scaler,)
                 if verbose > 1:
                     print(
                         f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
@@ -4690,19 +4733,24 @@ class ObjectiveFunction(object):
                     #    Hinv = np.linalg.inv(H)
                     # except Exception:
                     #    Hinv = np.linalg.pinv(H)
-                    output = (-loglik + penalty, -grad, grad_n)
+                    scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
+                    return scaled_tuple
+                    #output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
-                    return output
+                    #return output
                 else:
+                    scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
+                    return scaled_tuple
+                    #output = (-loglik + penalty, -grad)*self.minimize_scaler
-                    output = (-loglik + penalty, -grad)
-                    return output
+                    #return output
         except Exception as e:
             traceback.print_exc()
             print(e)
+    def minimize_function(self, loglike):
+        r'Takes the logliklihood function and tranforms it to a more handed minimization function'
+        return loglike/self.n_obs
     def print_chol_mat(self, betas):
         print(self.chol_mat)
         self.get_br_and_bstd(betas)
@@ -4938,12 +4986,16 @@ class ObjectiveFunction(object):
         return H
     def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
+        #method = 'BFGS'
         if method == "BFGS":
             try:
+                argbs = list(args)
-                return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)  # @IgnoreException
+                argbs[7] = True
+                argsb = tuple(argbs)
+                a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
+                return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
             except:
                 return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
@@ -5190,7 +5242,7 @@ class ObjectiveFunction(object):
         if self.power_up_ll:
             loglikelihood =-optim_res['fun']/2 - penalty
         else:
-            loglikelihood = -optim_res['fun'] - penalty
+            loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
         # self.coeff_names = coeff_names
         # self.total_iter = optim_res['nit']
@@ -5249,9 +5301,9 @@ class ObjectiveFunction(object):
         betas_est - array.  Coefficients which maximize the negative log-liklihood.
         """
         # Set defualt method
-        sub_zi = None
-        exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
-        inf_betas = 0 if exog_infl is None else len(exog_infl)
+        #TODO, the inital fit worked but it throws
         sol = Solution()
         log_ll = 10.0 ** 9
@@ -5266,10 +5318,7 @@ class ObjectiveFunction(object):
             if self.hess_yes == False:
                 method2 = 'BFGS_2'
                 method2 = self.method_ll
-            # method2 = 'BFGS_2'
-            # method2 = 'BFGS_2'
-            # method2 = 'dogleg'
             bic = None
             pvalue_alt = None
             zvalues = None
@@ -5287,7 +5336,7 @@ class ObjectiveFunction(object):
                 dispersion_param_num = self.is_dispersion(dispersion)
-                paramNum = self.get_param_num(dispersion)
+                #paramNum = self.get_param_num(dispersion)
                 self.no_random_paramaters = 0
                 if 'XG' in mod:
                     XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
@@ -5313,7 +5362,7 @@ class ObjectiveFunction(object):
                             XX_test = mod.get('Xr_test')
                 bb = np.random.uniform(
-                    -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num + inf_betas)
+                    -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
                 if method == 'L-BFGS-B':
                     if dispersion == 0:
@@ -5347,11 +5396,13 @@ class ObjectiveFunction(object):
                 hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
                 initial_beta = self._minimize(self._loglik_gradient, bb,
                                               args=(XX, y, None, None, None, None, calc_gradient, hess_est,
-                                                    dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None,
+                                                    dispersion, 0, False, 0, None, None, None, None, None,
                                                     mod),
                                               method=method2, tol=1e-5, options={'gtol': tol['gtol']},
                                               bounds=bounds)
                 if method2 == 'L-BFGS-B':
                     if hasattr(initial_beta.hess_inv, 'todense'):
                         initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
@@ -5363,7 +5414,7 @@ class ObjectiveFunction(object):
                 if initial_beta is not None and np.isnan(initial_beta['fun']):
                     initial_beta = self._minimize(self._loglik_gradient, bb,
                                                   args=(XX, y, None, None, None, None, True, True, dispersion,
-                                                        0, False, 0, None, sub_zi, exog_infl, None, None, mod),
+                                                        0, False, 0, None, None, None, None, None, mod),
                                                   method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
                 if initial_beta is not None and not np.isnan(initial_beta['fun']):
@@ -5387,24 +5438,24 @@ class ObjectiveFunction(object):
                                       loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
                     self.naming_for_printing(
-                        initial_beta['x'], 1, dispersion, zi_fit=sub_zi, model_nature=mod)
+                        initial_beta['x'], 1, dispersion, model_nature=mod)
                     if self.is_multi:
                         in_sample_mae = self.validation(
                             initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
-                            rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
+                            rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
                             testing=0)
                         sol.add_objective(TRAIN=in_sample_mae)
                         MAE_out = self.validation(
                             initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
-                            rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0)
+                            rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
                         sol.add_objective(TEST=MAE_out)
                         if self.val_percentage >0:
                             MAE_VAL = self.validation(
                                 initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
-                                rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
+                                rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
                                 validation=1)
                             sol.add_objective(VAL=MAE_VAL)
                     if sol[self._obj_1] <= self.best_obj_1:
@@ -5509,9 +5560,6 @@ class ObjectiveFunction(object):
                                         bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
                                         count += 1
                                     elif ii < jj:
                                         if bob2[count] > 0:
@@ -5584,14 +5632,14 @@ class ObjectiveFunction(object):
                         mod['dispersion_penalty'] = np.abs(b[-1])
                     grad_args = (
                         X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
-                        None, exog_infl, draws_grouped, XG, mod)
+                        None, None, draws_grouped, XG, mod)
                     # self.gradients_est_yes = (1, 1)
                     if draws is None and draws_hetro is not None:
                         print('hold')
                     betas_est = self._minimize(self._loglik_gradient, b, args=(
                         X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
-                        self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
+                        self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
                                                method=method2, tol=tol['ftol'],
                                                options={'gtol': tol['gtol']}, bounds=bounds,
                                                hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
@@ -5610,7 +5658,7 @@ class ObjectiveFunction(object):
                         betas_est = self._minimize(self._loglik_gradient, b, args=(
                             X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
                             self.rdm_cor_fit,
-                            None, exog_infl, draws_grouped, XG, mod),
+                            None, None, draws_grouped, XG, mod),
                                                    method=method2, tol=tol['ftol'],
                                                    options={'gtol': tol['gtol']})
@@ -5646,7 +5694,7 @@ class ObjectiveFunction(object):
                 paramNum = len(betas_est['x'])
                 self.naming_for_printing(
-                    betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
+                    betas_est['x'], 0, dispersion, model_nature=mod)
                 sol.add_objective(bic=bic, aic=aic,
                                   loglik=log_ll, num_parm=paramNum, GOF=other_measures)
@@ -5656,19 +5704,19 @@ class ObjectiveFunction(object):
                     try:
                         in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
-                                                        rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
+                                                        rdm_cor_fit=self.rdm_cor_fit,
                                                         model_nature=mod, testing=0)
                         sol.add_objective(TRAIN=in_sample_mae)
                         y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
                         Xr_grouped_test = mod.get('Xrtest')
                         MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
-                                                   rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
+                                                   rdm_cor_fit=self.rdm_cor_fit,
                                                    model_nature=mod)
                         sol.add_objective(TEST=MAE_test)
-                        if self.val_percentage >0:
+                        if self.val_percentage > 0:
                             MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
-                                                      rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
+                                                      rdm_cor_fit=self.rdm_cor_fit,
                                                       model_nature=mod, validation=1)
                             sol.add_objective(VAL=MAE_val)
@@ -6105,17 +6153,17 @@ class ObjectiveFunction(object):
         if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
             self.bic = obj_1['bic']
             self.pvalues = pvalues
-            if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", "zi", 'grp', 'xh']):
+            if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
                 # todo: probably delete
                 self.naming_for_printing(
-                    pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'], obj_1['zi_fit'],
+                    pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
                     obj_1, model_nature)
             else:
                 if is_delete == 0:
                     # todo: probably delete
                     self.naming_for_printing(
                         pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
-                        obj_1['zi_fit'], obj_1, model_nature)
+                         obj_1, model_nature)
             self.coeff_ = betas
             self.stderr = stderr
             self.zvalues = zvalues

{metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: metacountregressor
-Version: 0.1.78
+Version: 0.1.93
 Summary: Extensions for a Python package for estimation of count models.
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern
@@ -11,12 +11,18 @@ Description-Content-Type: text/markdown
 License-File: LICENSE.txt
 Requires-Dist: numpy >=1.13.1
 Requires-Dist: scipy >=1.0.0
+Requires-Dist: requests
 <div style="display: flex; align-items: center;">
-    <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 200px; margin-right: 20px;">
+    <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
     <p><span style="font-size: 60px;"><strong>MetaCountRegressor</strong></span></p>
 </div>
+# Tutorial also available as a jupyter notebook
+[Download Example Notebook](https://github.com/zahern/CountDataEstimation/blob/main/Tutorial.ipynb)
+The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
 ##### Quick Setup
 The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
@@ -35,8 +41,15 @@ from metacountregressor.solution import ObjectiveFunction
 from metacountregressor.metaheuristics import (harmony_search,
                                             differential_evolution,
                                             simulated_annealing)
 ```
+    loaded standard packages
+    loaded helper
+    testing
 #### Basic setup.
 The initial setup involves reading in the data and selecting an optimization algorithm. As the runtime progresses, new solutions will be continually evaluated. Finally, at the end of the runtime, the best solution will be identified and printed out. In the case of multiple objectives all of the best solutions will be printed out that belong to the Pareto frontier.
@@ -53,7 +66,7 @@ X = df.drop(columns=['FREQ', 'ID', 'AADT'])
 #some example argument, these are defualt so the following line is just for claritity. See the later agruments section for detials.
 arguments = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number':1,
-             'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
+             'val_percentage':0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "_max_time": 6}
 # Fit the model with metacountregressor
 obj_fun = ObjectiveFunction(X, y, **arguments)
 #replace with other metaheuristics if desired
@@ -71,7 +84,7 @@ Note: Please Consider the main arguments to change.
 - `val_percentage`: This parameter represents the percentage of data used to validate the model. The value 0.15 corresponds to 15% of the data.
 - `test_complexity`: This parameter defines the complexity level for testing. The value 6 tests all complexities. Alternatively, you can provide a list of numbers to consider different complexities. The complexities are further explained later in this document.
 - `instance_number`: This parameter is used to give a name to the outputs.
-- `obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
+- `_obj_1`: This parameter has multiple choices for obj_1, such as 'bic', 'aic', and 'hqic'. Only one choice should be defined as a string value.
 - `_obj_2`: This parameter has multiple choices for objective 2, such as 'RMSE_TEST', 'MSE_TEST', and 'MAE_TEST'.
 - `_max_time`: This parameter specifies the maximum number of seconds for the total estimation before stopping.
 - `distribution`: This parameter is a list of distributions to consider. Please select all of the available options and put them into a list of valid options if you want to to consider the distribution type for use when modellign with random parameters. The valid options include: 'Normal', 'LnNormal', 'Triangular', and 'Uniform'.
@@ -80,7 +93,7 @@ Note: Please Consider the main arguments to change.
-### An Example of changing the arguments.
+### Example of changing the arguments:
 Modify the arguments according to your preferences using the commented code as a guide.
@@ -108,16 +121,18 @@ Listed below is an example of how to specify an initial solution within the fram
 ```python
- #Model Decisions, Specify for Intial Optimization
+ #Model Decisions, Specify for initial solution that will be optimised.
 manual_fit_spec = {
     'fixed_terms': ['SINGLE', 'LENGTH'],
     'rdm_terms': ['AADT:normal'],
-    'rdm_cor_terms': ['GRADEBR:uniform', 'CURVES:triangular'],
+    'rdm_cor_terms': ['GRADEBR:normal', 'CURVES:normal'],
     'grouped_terms': [],
     'hetro_in_means': ['ACCESS:normal', 'MINRAD:normal'],
     'transformations': ['no', 'no', 'log', 'no', 'no', 'no', 'no'],
-    'dispersion': 1
+    'dispersion': 0
 }
 #Search Arguments
 arguments = {
     'algorithm': 'hs',
@@ -129,7 +144,47 @@ arguments = {
 obj_fun = ObjectiveFunction(X, y, **arguments)
 ```
- simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
+    Setup Complete...
+    Benchmaking test with Seed 42
+    --------------------------------------------------------------------------------
+    Log-Likelihood:  -1339.1862434675106
+    --------------------------------------------------------------------------------
+    bic: 2732.31
+    --------------------------------------------------------------------------------
+    MSE: 650856.32
+    +--------------------------+--------+-------+----------+----------+------------+
+    |          Effect          | $\tau$ | Coeff | Std. Err | z-values | Prob |z|>Z |
+    +==========================+========+=======+==========+==========+============+
+    | LENGTH                   | no     | -0.15 |   0.01   |  -12.98  | 0.00***    |
+    +--------------------------+--------+-------+----------+----------+------------+
+    | SINGLE                   | no     | -2.46 |   0.04   |  -50.00  | 0.00***    |
+    +--------------------------+--------+-------+----------+----------+------------+
+    | GRADEBR                  | log    | 4.23  |   0.10   |  42.17   | 0.00***    |
+    +--------------------------+--------+-------+----------+----------+------------+
+    | CURVES                   | no     | 0.51  |   0.01   |  34.78   | 0.00***    |
+    +--------------------------+--------+-------+----------+----------+------------+
+    |  Chol: GRADEBR (Std.     |        | 2.21  |   0.00   |  50.00   | 0.00***    |
+    | Dev. normal) )           |        |       |          |          |            |
+    +--------------------------+--------+-------+----------+----------+------------+
+    |  Chol: CURVES (Std. Dev. |        | -0.51 |   0.00   |  -50.00  | 0.00***    |
+    | normal) )                |        |       |          |          |            |
+    +--------------------------+--------+-------+----------+----------+------------+
+    |  Chol: CURVES (Std. Dev. | no     | 0.55  |   0.00   |  50.00   | 0.00***    |
+    | normal) . GRADEBR (Std.  |        |       |          |          |            |
+    | Dev.   normal )          |        |       |          |          |            |
+    +--------------------------+--------+-------+----------+----------+------------+
+    | main: MINRAD: hetro      | no     | -0.00 |   0.00   |  -44.36  | 0.00***    |
+    | group 0                  |        |       |          |          |            |
+    +--------------------------+--------+-------+----------+----------+------------+
+    | ACCESS: hetro group 0    |        | 0.68  |   0.09   |   7.68   | 0.00***    |
+    +--------------------------+--------+-------+----------+----------+------------+
+    | main: MINRAD: hetro      |        | -0.00 |   0.00   |  -44.86  | 0.00***    |
+    | group 0:normal:sd  hetro |        |       |          |          |            |
+    | group 0                  |        |       |          |          |            |
+    +--------------------------+--------+-------+----------+----------+------------+
+ Simarly to return the results feed the objective function into a metaheuristic solution algorithm. An example of this is provided below:
 ```python
@@ -137,7 +192,7 @@ results = harmony_search(obj_fun)
 print(results)
 ```
-## Notes:
+# Notes:
 ### Capabilities of the software include:
 * Handling of Panel Data
 * Support for Data Transformations
@@ -155,11 +210,11 @@ Capability to handle heterogeneity in the means of the random parameters
 * Customization of Hyper-parameters to solve problems tailored to your dataset
 * Out-of-the-box optimization capability using default metaheuristics
-### Intreting the output of the model:
+### Intepreting the output of the model:
 A regression table is produced. The following text elements are explained:
 - Std. Dev.: This column appears for effects that are related to random paramters and displays the assument distributional assumption next to it
 - Chol: This term refers to Cholesky decomposition element, to show the correlation between two random paramaters. The combination of the cholesky element on iyself is equivalent to a normal random parameter.
-- hetro group #: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
+- hetro group: This term represents the heterogeneity group number, which refers all of the contributing factors that share hetrogentiy in the means to each other under the same numbered value.
 - $\tau$: This column, displays the type of transformation that was applied to the specific contributing factor in the data.
@@ -211,10 +266,10 @@ The following list describes the arguments available in this function. By defaul
 8. **`_max_time`**: This argument is used to add a termination time in the algorithm. It takes values as seconds. Note the time is only dependenant on the time after intial population of solutions are generated.
-# Example
+## Example: Assistance by Harmony Search
-Let's start by fitting very simple models, use those model sto help and define the objectives, then perform more of an extensive search on the variables that are identified more commonly
+Let's begin by fitting very simple models and use the structure of these models to define our objectives. Then, we can conduct a more extensive search on the variables that are more frequently identified. For instance, in the case below, the complexity is level 3, indicating that we will consider, at most randomly correlated parameters. This approach is useful for initially identifying a suitable set of contributing factors for our search.
@@ -241,27 +296,30 @@ arguments = {
         '_max_time': 10000
     }
 obj_fun = ObjectiveFunction(X, y, **arguments)
 results = harmony_search(obj_fun)
 print(results)
 ```
+## Paper
+The following tutorial is in conjunction with our latest paper. A link the current paper can be found here [MetaCountRegressor](https://www.overleaf.com/read/mszwpwzcxsng#c5eb0c)
 ## Contact
 If you have any questions, ideas to improve MetaCountRegressor, or want to report a bug, just open a new issue in [GitHub repository](https://github.com/zahern/CountDataEstimation).
 ## Citing MetaCountRegressor
 Please cite MetaCountRegressor as follows:
-Ahern, Z., Corry P., Paz A. (2023). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
+Ahern, Z., Corry P., Paz A. (2024). MetaCountRegressor [Computer software]. [https://pypi.org/project/metacounregressor/](https://pypi.org/project/metacounregressor/)
 Or using BibTex as follows:
 ```bibtex
-@misc{Ahern2023,
-   author = {Zeke Ahern and Paul Corry and Alexander Paz},
+@misc{Ahern2024Meta,
+   author = {Zeke Ahern, Paul Corry and Alexander Paz},
    journal = {PyPi},
    title = {metacountregressor · PyPI},
-   url = {https://pypi.org/project/metacountregressor/0.1.47/},
-   year = {2023},
+   url = {https://pypi.org/project/metacountregressor/0.1.80/},
+   year = {2024},
 }

{metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/RECORD RENAMED Viewed

@@ -3,17 +3,17 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
 metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
 metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
 metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
-metacountregressor/main.py,sha256=asQL1Gey2izglX5FOQFZOaEqzzVacRf88EuSJnCVPKs,16289
+metacountregressor/main.py,sha256=7ln6YvX2Nmesw1ose7T-2BQdLfDz0XmiLnP991AgQHw,18273
 metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
 metacountregressor/metaheuristics.py,sha256=2MW3qlgs7BFbe_w64snLSKc4Y0-e_9sa3s_96rUm_SE,105887
 metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
 metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
 metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
 metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
-metacountregressor/solution.py,sha256=OPwMkT1seW06zBYjs3N6vil79k1CE3of7Ua1-SajG0M,265586
+metacountregressor/solution.py,sha256=wigjQ4tJrMS0EvbzmRMb2JRT7s0guvPdpCXRwEWUGQg,266891
 metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
-metacountregressor-0.1.78.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-metacountregressor-0.1.78.dist-info/METADATA,sha256=htRwRhVDIqwgapI4uen7XPxoX5EtabdmlXWpkNGK62E,14341
-metacountregressor-0.1.78.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
-metacountregressor-0.1.78.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
-metacountregressor-0.1.78.dist-info/RECORD,,
+metacountregressor-0.1.93.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+metacountregressor-0.1.93.dist-info/METADATA,sha256=lxko7pOT-xFIpuqN3cUYr9hF3SIWszwVmGcfASHysOY,18165
+metacountregressor-0.1.93.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
+metacountregressor-0.1.93.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
+metacountregressor-0.1.93.dist-info/RECORD,,

{metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (70.3.0)
+Generator: setuptools (72.1.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.78.dist-info → metacountregressor-0.1.93.dist-info}/top_level.txt RENAMED Viewed

File without changes

metacountregressor 0.1.78__py3-none-any.whl → 0.1.93__py3-none-any.whl

metacountregressor 0.1.78py3-none-any.whl → 0.1.93py3-none-any.whl