metacountregressor 0.1.108__py3-none-any.whl → 0.1.119__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +9 -4
- metacountregressor/main.py +40 -7
- metacountregressor/solution.py +182 -62
- {metacountregressor-0.1.108.dist-info → metacountregressor-0.1.119.dist-info}/METADATA +29 -6
- {metacountregressor-0.1.108.dist-info → metacountregressor-0.1.119.dist-info}/RECORD +8 -8
- {metacountregressor-0.1.108.dist-info → metacountregressor-0.1.119.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.108.dist-info → metacountregressor-0.1.119.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.108.dist-info → metacountregressor-0.1.119.dist-info}/top_level.txt +0 -0
    
        metacountregressor/app_main.py
    CHANGED
    
    | @@ -69,15 +69,20 @@ def main(args, **kwargs): | |
| 69 69 | 
             
                #data_info['data']['Group'][0]
         | 
| 70 70 | 
             
                #data_info['data']['Panel'][0]
         | 
| 71 71 | 
             
                args['decisions'] = data_info['analyst']
         | 
| 72 | 
            -
             | 
| 73 | 
            -
                if  | 
| 72 | 
            +
                grouped_c = data_info['data']['Grouped'][0]
         | 
| 73 | 
            +
                if isinstance(data_info['data']['Grouped'][0],str):
         | 
| 74 74 | 
             
                    args['group'] = data_info['data']['Grouped'][0]
         | 
| 75 | 
            -
                    args['ID'] = data_info['data'][' | 
| 76 | 
            -
                if  | 
| 75 | 
            +
                    args['ID'] = data_info['data']['Panel'][0]
         | 
| 76 | 
            +
                if isinstance(data_info['data']['Panel'][0],str):
         | 
| 77 77 | 
             
                    args['panels'] = data_info['data']['Panel'][0]
         | 
| 78 78 |  | 
| 79 79 | 
             
                df = pd.read_csv(str(data_info['data']['Problem'][0]))
         | 
| 80 80 | 
             
                x_df = df.drop(columns=[data_info['data']['Y'][0]])
         | 
| 81 | 
            +
                # drop the columns of x_df where column is string exclude the column stype args['group']
         | 
| 82 | 
            +
                exclude_column = args['group']
         | 
| 83 | 
            +
                columns_to_keep = x_df.dtypes != 'object'
         | 
| 84 | 
            +
                columns_to_keep |= (x_df.columns == exclude_column)
         | 
| 85 | 
            +
                x_df = x_df.loc[:, columns_to_keep]
         | 
| 81 86 | 
             
                y_df = df[[data_info['data']['Y'][0]]]
         | 
| 82 87 | 
             
                y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
         | 
| 83 88 |  | 
    
        metacountregressor/main.py
    CHANGED
    
    | @@ -41,6 +41,12 @@ def process_arguments(): | |
| 41 41 | 
             
                            'hyper': hyper}
         | 
| 42 42 | 
             
                return new_data
         | 
| 43 43 |  | 
| 44 | 
            +
            def process_package_argumemnts():
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                new_data = {}
         | 
| 47 | 
            +
                pass
         | 
| 48 | 
            +
             | 
| 49 | 
            +
             | 
| 44 50 | 
             
            def main(args, **kwargs):
         | 
| 45 51 | 
             
                '''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
         | 
| 46 52 |  | 
| @@ -162,8 +168,8 @@ def main(args, **kwargs): | |
| 162 168 | 
             
                        'rdm_cor_terms': [],
         | 
| 163 169 | 
             
                        'grouped_terms': [],
         | 
| 164 170 | 
             
                        'hetro_in_means': [],
         | 
| 165 | 
            -
                        'transformations': ['no', 'log', ' | 
| 166 | 
            -
                        'dispersion':  | 
| 171 | 
            +
                        'transformations': ['no', 'log', 'no', 'no', 'no', 'no', 'no'],
         | 
| 172 | 
            +
                        'dispersion': 0
         | 
| 167 173 | 
             
                    }
         | 
| 168 174 |  | 
| 169 175 | 
             
                    keep = ['Constant', 'US', 'RSMS', 'MCV', 'RSHS', 'AADT', 'Curve50', 'Offset']
         | 
| @@ -172,13 +178,27 @@ def main(args, **kwargs): | |
| 172 178 | 
             
                elif dataset == 4:
         | 
| 173 179 | 
             
                    manual_fit_spec = {
         | 
| 174 180 | 
             
                        'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION'],
         | 
| 175 | 
            -
                        'rdm_terms': [' | 
| 181 | 
            +
                        'rdm_terms': ['EXPOSE:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
         | 
| 182 | 
            +
                        'rdm_cor_terms': [],
         | 
| 183 | 
            +
                        'grouped_terms': [],
         | 
| 184 | 
            +
                        'hetro_in_means': [],
         | 
| 185 | 
            +
                        'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
         | 
| 186 | 
            +
                        'dispersion': 1
         | 
| 187 | 
            +
                    }
         | 
| 188 | 
            +
                    '''
         | 
| 189 | 
            +
                    manual_fit_spec = {
         | 
| 190 | 
            +
                        'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION', 'EXPOSE', 'INTPM', 'CPM', 'HISNOW'],
         | 
| 191 | 
            +
                        'rdm_terms': [],
         | 
| 176 192 | 
             
                        'rdm_cor_terms': [],
         | 
| 177 193 | 
             
                        'grouped_terms': [],
         | 
| 178 194 | 
             
                        'hetro_in_means': [],
         | 
| 179 195 | 
             
                        'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
         | 
| 180 196 | 
             
                        'dispersion': 1
         | 
| 181 197 | 
             
                    }
         | 
| 198 | 
            +
                    '''
         | 
| 199 | 
            +
             | 
| 200 | 
            +
             | 
| 201 | 
            +
                    '''
         | 
| 182 202 | 
             
                    print('overriding this delete, just want to test the NB')
         | 
| 183 203 | 
             
                    manual_fit_spec = {
         | 
| 184 204 | 
             
                        'fixed_terms': ['const'],
         | 
| @@ -189,7 +209,7 @@ def main(args, **kwargs): | |
| 189 209 | 
             
                        'transformations': ['no'],
         | 
| 190 210 | 
             
                        'dispersion': 1
         | 
| 191 211 | 
             
                    }
         | 
| 192 | 
            -
             | 
| 212 | 
            +
                    '''
         | 
| 193 213 | 
             
                    df = pd.read_csv('./data/Ex-16-3.csv')  # read in the data
         | 
| 194 214 | 
             
                    y_df = df[['FREQ']].copy()  # only consider crashes
         | 
| 195 215 | 
             
                    y_df.rename(columns={"FREQ": "Y"}, inplace=True)
         | 
| @@ -262,6 +282,17 @@ def main(args, **kwargs): | |
| 262 282 | 
             
                    x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
         | 
| 263 283 | 
             
                    x_df['county'] = group_grab
         | 
| 264 284 |  | 
| 285 | 
            +
                    print('benchmark specification')
         | 
| 286 | 
            +
                    manual_fit_spec = {
         | 
| 287 | 
            +
                        'fixed_terms': ['const', 'monthly_AADT', 'segment_length', 'speed', 'paved_shoulder', 'curve'],
         | 
| 288 | 
            +
                        'rdm_terms': [],
         | 
| 289 | 
            +
                        'rdm_cor_terms': [],
         | 
| 290 | 
            +
                        'grouped_terms': ['DP01:normal', 'DX32:normal'],
         | 
| 291 | 
            +
                        'hetro_in_means': [],
         | 
| 292 | 
            +
                        'transformations': ['no', 'no', 'no', 'no', 'no', 'no'],
         | 
| 293 | 
            +
                        'dispersion': 0
         | 
| 294 | 
            +
                    }
         | 
| 295 | 
            +
             | 
| 265 296 | 
             
                elif dataset == 9:
         | 
| 266 297 | 
             
                    df = pd.read_csv('panel_synth.csv')  # read in the data
         | 
| 267 298 | 
             
                    y_df = df[['Y']].copy()  # only consider crashes
         | 
| @@ -286,6 +317,8 @@ def main(args, **kwargs): | |
| 286 317 | 
             
                    keep = ['group', 'constant', 'element_ID']
         | 
| 287 318 |  | 
| 288 319 | 
             
                    x_df = helperprocess.interactions(x_df, keep)
         | 
| 320 | 
            +
             | 
| 321 | 
            +
             | 
| 289 322 | 
             
                else:  # the dataset has been selected in the program as something else
         | 
| 290 323 | 
             
                    data_info = process_arguments()
         | 
| 291 324 | 
             
                    data_info['hyper']
         | 
| @@ -442,11 +475,11 @@ if __name__ == '__main__': | |
| 442 475 | 
             
                        if "-algorithm" in action.option_strings:
         | 
| 443 476 | 
             
                            parser._optionals._actions[i].help = "optimization algorithm"
         | 
| 444 477 |  | 
| 445 | 
            -
                    override =  | 
| 478 | 
            +
                    override = False
         | 
| 446 479 | 
             
                    if override:
         | 
| 447 | 
            -
                        print(' | 
| 480 | 
            +
                        print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
         | 
| 448 481 | 
             
                        parser.add_argument('-problem_number', default='10')
         | 
| 449 | 
            -
             | 
| 482 | 
            +
             | 
| 450 483 | 
             
                    if 'algorithm' not in args:
         | 
| 451 484 | 
             
                        parser.add_argument('-algorithm', type=str, default='hs',
         | 
| 452 485 | 
             
                                            help='optimization algorithm')
         | 
    
        metacountregressor/solution.py
    CHANGED
    
    | @@ -30,7 +30,7 @@ from scipy.special import gammaln | |
| 30 30 | 
             
            from sklearn.metrics import mean_absolute_error as MAE
         | 
| 31 31 | 
             
            from sklearn.metrics import mean_squared_error as MSPE
         | 
| 32 32 | 
             
            from statsmodels.tools.numdiff import approx_fprime, approx_hess
         | 
| 33 | 
            -
            from sklearn.preprocessing import StandardScaler
         | 
| 33 | 
            +
            from sklearn.preprocessing import StandardScaler, MinMaxScaler
         | 
| 34 34 | 
             
            from texttable import Texttable
         | 
| 35 35 |  | 
| 36 36 | 
             
            try:
         | 
| @@ -42,7 +42,7 @@ except ImportError: | |
| 42 42 | 
             
                from pareto_file import Pareto, Solution
         | 
| 43 43 | 
             
                from data_split_helper import DataProcessor
         | 
| 44 44 |  | 
| 45 | 
            -
             | 
| 45 | 
            +
            from scipy import stats
         | 
| 46 46 | 
             
            np.seterr(divide='ignore', invalid='ignore')
         | 
| 47 47 | 
             
            warnings.simplefilter("ignore")
         | 
| 48 48 |  | 
| @@ -124,10 +124,11 @@ class ObjectiveFunction(object): | |
| 124 124 |  | 
| 125 125 | 
             
                    self.reg_penalty = 0
         | 
| 126 126 | 
             
                    self.power_up_ll = False
         | 
| 127 | 
            -
             | 
| 127 | 
            +
                    self.nb_parma = 1
         | 
| 128 128 | 
             
                    self.bic = None
         | 
| 129 129 | 
             
                    self.other_bic = False
         | 
| 130 130 | 
             
                    self.test_flag = 1
         | 
| 131 | 
            +
                    self.no_extra_param =1 #if true, fix dispersion. w
         | 
| 131 132 | 
             
                    if self.other_bic:
         | 
| 132 133 | 
             
                        print('change this to false latter ')
         | 
| 133 134 |  | 
| @@ -138,7 +139,7 @@ class ObjectiveFunction(object): | |
| 138 139 | 
             
                    self.verbose_safe = True
         | 
| 139 140 | 
             
                    self.please_print = kwargs.get('please_print', 0)
         | 
| 140 141 | 
             
                    self.group_halton = None
         | 
| 141 | 
            -
                    self.grad_yes =  | 
| 142 | 
            +
                    self.grad_yes = True
         | 
| 142 143 | 
             
                    self.hess_yes = False
         | 
| 143 144 | 
             
                    self.group_halton_test = None
         | 
| 144 145 | 
             
                    self.panels = None
         | 
| @@ -174,8 +175,10 @@ class ObjectiveFunction(object): | |
| 174 175 | 
             
                    self._panels = None
         | 
| 175 176 | 
             
                    self.is_multi = True
         | 
| 176 177 | 
             
                    self.method_ll = 'Nelder-Mead-BFGS'
         | 
| 178 | 
            +
             | 
| 177 179 | 
             
                    self.method_ll = 'L-BFGS-B'  # alternatives 'BFGS_2', 'BFGS
         | 
| 178 180 | 
             
                    self.method_ll = 'BFGS_2'
         | 
| 181 | 
            +
                    #self.method_ll = 'Nelder-Mead-BFGS'
         | 
| 179 182 | 
             
                    self.Keep_Fit = 2
         | 
| 180 183 | 
             
                    self.MP = 0
         | 
| 181 184 | 
             
                    # Nelder-Mead-BFGS
         | 
| @@ -214,6 +217,11 @@ class ObjectiveFunction(object): | |
| 214 217 | 
             
                    self._maximize = False  # do we maximize or minimize?
         | 
| 215 218 |  | 
| 216 219 | 
             
                    x_data = sm.add_constant(x_data)
         | 
| 220 | 
            +
                    standardize_the_data = 0
         | 
| 221 | 
            +
                    if standardize_the_data:
         | 
| 222 | 
            +
                        print('we are standardize the data')
         | 
| 223 | 
            +
                        x_data = self.self_standardize_positive(x_data)
         | 
| 224 | 
            +
             | 
| 217 225 | 
             
                    self._input_data(x_data, y_data)
         | 
| 218 226 |  | 
| 219 227 |  | 
| @@ -232,7 +240,7 @@ class ObjectiveFunction(object): | |
| 232 240 | 
             
                        if self.test_percentage == 0:
         | 
| 233 241 | 
             
                            self.is_multi = False
         | 
| 234 242 |  | 
| 235 | 
            -
                        if 'panels' in kwargs and not  | 
| 243 | 
            +
                        if 'panels' in kwargs and not (kwargs.get('panels') == None):
         | 
| 236 244 | 
             
                            self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
         | 
| 237 245 |  | 
| 238 246 | 
             
                            x_data[kwargs['group']] = x_data[kwargs['group']].astype(
         | 
| @@ -275,11 +283,11 @@ class ObjectiveFunction(object): | |
| 275 283 |  | 
| 276 284 | 
             
                    #self.n_obs = N
         | 
| 277 285 | 
             
                    self._characteristics_names = list(self._x_data.columns)
         | 
| 278 | 
            -
                    self._max_group_all_means =  | 
| 286 | 
            +
                    self._max_group_all_means = 2
         | 
| 279 287 |  | 
| 280 288 | 
             
                    exclude_this_test = [4]
         | 
| 281 289 |  | 
| 282 | 
            -
                    if 'panels' in kwargs and not  | 
| 290 | 
            +
                    if 'panels' in kwargs and not (kwargs.get('panels') == None):
         | 
| 283 291 | 
             
                        self.panels = np.asarray(df_train[kwargs['panels']])
         | 
| 284 292 | 
             
                        self.panels_test = np.asarray(df_test[kwargs['panels']])
         | 
| 285 293 | 
             
                        self.ids = np.asarray(
         | 
| @@ -295,6 +303,8 @@ class ObjectiveFunction(object): | |
| 295 303 | 
             
                        self.group_halton = group.copy()
         | 
| 296 304 | 
             
                        self.group_dummies = pd.get_dummies(group)
         | 
| 297 305 | 
             
                        Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
         | 
| 306 | 
            +
             | 
| 307 | 
            +
                        Xnew = pd.DataFrame(Xnew, columns=X.columns)
         | 
| 298 308 | 
             
                        self.panel_info = panel_info
         | 
| 299 309 | 
             
                        self.N, self.P = panel_info.shape
         | 
| 300 310 | 
             
                        Xnew.drop(kwargs['panels'], axis=1, inplace=True)
         | 
| @@ -385,7 +395,7 @@ class ObjectiveFunction(object): | |
| 385 395 |  | 
| 386 396 |  | 
| 387 397 |  | 
| 388 | 
            -
                    self.Ndraws =  | 
| 398 | 
            +
                    self.Ndraws = 1400  # todo: change back
         | 
| 389 399 | 
             
                    self.draws1 = None
         | 
| 390 400 | 
             
                    self.initial_sig = 1  # pass the test of a single model
         | 
| 391 401 | 
             
                    self.pvalue_sig_value = .1
         | 
| @@ -408,7 +418,7 @@ class ObjectiveFunction(object): | |
| 408 418 | 
             
                    # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
         | 
| 409 419 | 
             
                    self._transformations = ["no", "sqrt", "log", "arcsinh"]
         | 
| 410 420 | 
             
                    self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
         | 
| 411 | 
            -
             | 
| 421 | 
            +
                    self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
         | 
| 412 422 | 
             
                    # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
         | 
| 413 423 |  | 
| 414 424 | 
             
                    self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
         | 
| @@ -766,6 +776,8 @@ class ObjectiveFunction(object): | |
| 766 776 | 
             
                    if dispersion == 0:
         | 
| 767 777 | 
             
                        return None, None
         | 
| 768 778 | 
             
                    elif dispersion == 2 or dispersion == 1:
         | 
| 779 | 
            +
                        if self.no_extra_param:
         | 
| 780 | 
            +
                            return self.nb_parma, None
         | 
| 769 781 | 
             
                        return betas[-1], None
         | 
| 770 782 |  | 
| 771 783 | 
             
                    elif dispersion == 3:
         | 
| @@ -817,6 +829,8 @@ class ObjectiveFunction(object): | |
| 817 829 | 
             
                                distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
         | 
| 818 830 | 
             
                                distro = self.rename_distro(distro)
         | 
| 819 831 | 
             
                                set_alpha = set_alpha+[distro]
         | 
| 832 | 
            +
                            elif col == 'const':
         | 
| 833 | 
            +
                                set_alpha = set_alpha +[['normal']]
         | 
| 820 834 | 
             
                        return set_alpha
         | 
| 821 835 | 
             
                    return  [[x for x in self._distribution]] * self._characteristics
         | 
| 822 836 |  | 
| @@ -897,10 +911,12 @@ class ObjectiveFunction(object): | |
| 897 911 | 
             
                        return ([self._model_type_codes[dispersion]])
         | 
| 898 912 |  | 
| 899 913 | 
             
                def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
         | 
| 900 | 
            -
                     | 
| 914 | 
            +
                    '''
         | 
| 901 915 | 
             
                    setup for naming of the model summary
         | 
| 902 916 | 
             
                    '''
         | 
| 917 | 
            +
                    if self.no_extra_param and dispersion ==1:
         | 
| 903 918 |  | 
| 919 | 
            +
                        betas = np.append(betas, self.nb_parma)
         | 
| 904 920 |  | 
| 905 921 | 
             
                    self.name_deleter = []
         | 
| 906 922 | 
             
                    group_rpm = None
         | 
| @@ -1025,7 +1041,7 @@ class ObjectiveFunction(object): | |
| 1025 1041 | 
             
                        try:
         | 
| 1026 1042 | 
             
                            if len(betas) != len(names):
         | 
| 1027 1043 | 
             
                                print('no draws is', no_draws)
         | 
| 1028 | 
            -
             | 
| 1044 | 
            +
             | 
| 1029 1045 | 
             
                        except Exception as e:
         | 
| 1030 1046 | 
             
                            print(e)
         | 
| 1031 1047 |  | 
| @@ -1052,7 +1068,8 @@ class ObjectiveFunction(object): | |
| 1052 1068 | 
             
                    if not isinstance(self.pvalues, np.ndarray):
         | 
| 1053 1069 | 
             
                        raise Exception
         | 
| 1054 1070 |  | 
| 1055 | 
            -
             | 
| 1071 | 
            +
                    if 'nb' in self.coeff_names and self.no_extra_param:
         | 
| 1072 | 
            +
                        self.pvalues = np.append(self.pvalues,0)
         | 
| 1056 1073 |  | 
| 1057 1074 | 
             
                    if self.please_print or save_state:
         | 
| 1058 1075 |  | 
| @@ -1068,17 +1085,22 @@ class ObjectiveFunction(object): | |
| 1068 1085 |  | 
| 1069 1086 | 
             
                            if solution is not None:
         | 
| 1070 1087 | 
             
                                print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
         | 
| 1071 | 
            -
             | 
| 1088 | 
            +
                        
         | 
| 1072 1089 | 
             
                        self.pvalues = [self.round_with_padding(
         | 
| 1073 1090 | 
             
                            x, 2) for x in self.pvalues]
         | 
| 1074 1091 | 
             
                        signif_list = self.pvalue_asterix_add(self.pvalues)
         | 
| 1075 1092 | 
             
                        if model == 1:
         | 
| 1076 1093 |  | 
| 1077 | 
            -
                            self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
         | 
| 1078 | 
            -
                            if self. | 
| 1094 | 
            +
                            #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
         | 
| 1095 | 
            +
                            if self.no_extra_param:
         | 
| 1096 | 
            +
                                self.coeff_ = np.append(self.coeff_, self.nb_parma)
         | 
| 1097 | 
            +
                                self.stderr = np.append(self.stderr, 0.00001)
         | 
| 1098 | 
            +
                                self.zvalues = np.append(self.zvalues, 50)
         | 
| 1099 | 
            +
             | 
| 1100 | 
            +
                            elif self.coeff_[-1] < 0.25:
         | 
| 1079 1101 | 
             
                                print(self.coeff_[-1], 'Warning Check Dispersion')
         | 
| 1080 1102 | 
             
                                print(np.exp(self.coeff_[-1]))
         | 
| 1081 | 
            -
                                self.coeff_[-1] = np.exp(self.coeff_[-1])  # min possible value for negbinom
         | 
| 1103 | 
            +
                                #self.coeff_[-1] = np.exp(self.coeff_[-1])  # min possible value for negbinom
         | 
| 1082 1104 |  | 
| 1083 1105 | 
             
                        self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
         | 
| 1084 1106 |  | 
| @@ -1301,6 +1323,7 @@ class ObjectiveFunction(object): | |
| 1301 1323 |  | 
| 1302 1324 | 
             
                        if 'AADT' in self._characteristics_names[col]:
         | 
| 1303 1325 | 
             
                            new_transform = [['log']]
         | 
| 1326 | 
            +
                            #new_transform = [['no']]
         | 
| 1304 1327 | 
             
                            transform_set = transform_set + new_transform
         | 
| 1305 1328 |  | 
| 1306 1329 | 
             
                        elif all(x_data[col] <= 5):
         | 
| @@ -1340,6 +1363,18 @@ class ObjectiveFunction(object): | |
| 1340 1363 |  | 
| 1341 1364 | 
             
                    return transform_set
         | 
| 1342 1365 |  | 
| 1366 | 
            +
                def poisson_mean_get_dispersion(self, betas, X, y):
         | 
| 1367 | 
            +
                    eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
         | 
| 1368 | 
            +
                                                return_EV=True,
         | 
| 1369 | 
            +
                                                zi_list=None, draws_grouped=None, Xgroup=None)
         | 
| 1370 | 
            +
                    
         | 
| 1371 | 
            +
                    ab = ((y - eVy)**2 - eVy)/eVy
         | 
| 1372 | 
            +
                    bb = eVy -1
         | 
| 1373 | 
            +
                    disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
         | 
| 1374 | 
            +
                    gamma = disp.params[0]
         | 
| 1375 | 
            +
                    print(f'dispersion is {gamma}')
         | 
| 1376 | 
            +
                    return gamma
         | 
| 1377 | 
            +
             | 
| 1343 1378 | 
             
                def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
         | 
| 1344 1379 | 
             
                               model_nature=None, halton=1, testing=1, validation=0):
         | 
| 1345 1380 | 
             
                    'validation if mu needs to be calculated'
         | 
| @@ -1373,7 +1408,7 @@ class ObjectiveFunction(object): | |
| 1373 1408 | 
             
                                    XG = model_nature.get('XGtest')[:total_percent, :, :]
         | 
| 1374 1409 | 
             
                                else:
         | 
| 1375 1410 | 
             
                                    XG = model_nature.get('XGtest')[total_percent:, :, :]
         | 
| 1376 | 
            -
             | 
| 1411 | 
            +
             | 
| 1377 1412 | 
             
                        else:
         | 
| 1378 1413 | 
             
                            if 'XG' in model_nature:
         | 
| 1379 1414 | 
             
                                XG = model_nature.get('XG')
         | 
| @@ -1495,7 +1530,7 @@ class ObjectiveFunction(object): | |
| 1495 1530 | 
             
                    5: herogeneity_in _means
         | 
| 1496 1531 |  | 
| 1497 1532 |  | 
| 1498 | 
            -
                    a: how to  | 
| 1533 | 
            +
                    a: how to transform the original data
         | 
| 1499 1534 | 
             
                    b: grab dispersion '''
         | 
| 1500 1535 |  | 
| 1501 1536 | 
             
                    # todo: better way
         | 
| @@ -1843,7 +1878,10 @@ class ObjectiveFunction(object): | |
| 1843 1878 | 
             
                    elif dispersion == 4:
         | 
| 1844 1879 | 
             
                        return 2
         | 
| 1845 1880 | 
             
                    else:
         | 
| 1846 | 
            -
                         | 
| 1881 | 
            +
                        if self.no_extra_param:
         | 
| 1882 | 
            +
                            return 0
         | 
| 1883 | 
            +
                        else:
         | 
| 1884 | 
            +
                            return 1
         | 
| 1847 1885 |  | 
| 1848 1886 | 
             
                def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
         | 
| 1849 1887 | 
             
                                        return_violated_terms=0):
         | 
| @@ -1858,6 +1896,7 @@ class ObjectiveFunction(object): | |
| 1858 1896 |  | 
| 1859 1897 | 
             
                    else:
         | 
| 1860 1898 | 
             
                        slice_this_amount = self.num_dispersion_params(dispersion)
         | 
| 1899 | 
            +
                        slice_this_amount = 1 #TODO handle this
         | 
| 1861 1900 | 
             
                        if pvalues[-1] > sig_value:
         | 
| 1862 1901 | 
             
                            vio_counts += 1
         | 
| 1863 1902 | 
             
                        subpvalues = pvalues[:-slice_this_amount].copy()
         | 
| @@ -3502,21 +3541,38 @@ class ObjectiveFunction(object): | |
| 3502 3541 | 
             
                    # if gamma <= 0.01: #min defined value for stable nb
         | 
| 3503 3542 | 
             
                    #  gamma = 0.01
         | 
| 3504 3543 |  | 
| 3544 | 
            +
                    #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
         | 
| 3505 3545 |  | 
| 3546 | 
            +
                    #gg = stats.poisson.rvs(g)
         | 
| 3506 3547 |  | 
| 3548 | 
            +
                    
         | 
| 3507 3549 |  | 
| 3550 | 
            +
                    
         | 
| 3508 3551 | 
             
                    endog = y
         | 
| 3509 3552 | 
             
                    mu = lam
         | 
| 3553 | 
            +
                    ''''
         | 
| 3554 | 
            +
                    mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
         | 
| 3510 3555 | 
             
                    alpha = np.exp(gamma)
         | 
| 3511 | 
            -
                     | 
| 3512 | 
            -
                     | 
| 3513 | 
            -
                     | 
| 3514 | 
            -
                     | 
| 3515 | 
            -
             | 
| 3556 | 
            +
                    
         | 
| 3557 | 
            +
                    '''
         | 
| 3558 | 
            +
                    alpha = gamma
         | 
| 3559 | 
            +
                    size = 1.0 / alpha * mu ** Q
         | 
| 3560 | 
            +
             | 
| 3561 | 
            +
                    prob = size/(size+mu)
         | 
| 3562 | 
            +
             | 
| 3563 | 
            +
             | 
| 3516 3564 |  | 
| 3517 3565 | 
             
                    '''test'''
         | 
| 3518 3566 |  | 
| 3519 3567 |  | 
| 3568 | 
            +
                    '''
         | 
| 3569 | 
            +
                    size = 1 / np.exp(gamma) * mu ** 0
         | 
| 3570 | 
            +
                    prob = size / (size + mu)
         | 
| 3571 | 
            +
                    coeff = (gammaln(size + y) - gammaln(y + 1) -
         | 
| 3572 | 
            +
                         gammaln(size)) 
         | 
| 3573 | 
            +
                    llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
         | 
| 3574 | 
            +
                    '''
         | 
| 3575 | 
            +
             | 
| 3520 3576 | 
             
                    try:
         | 
| 3521 3577 | 
             
                        # print(np.shape(y),np.shape(size), np.shape(prob))
         | 
| 3522 3578 | 
             
                        #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
         | 
| @@ -3528,22 +3584,28 @@ class ObjectiveFunction(object): | |
| 3528 3584 | 
             
                        #start_time = time.time()
         | 
| 3529 3585 | 
             
                        #for _ in range(10000):
         | 
| 3530 3586 |  | 
| 3531 | 
            -
             | 
| 3587 | 
            +
             | 
| 3532 3588 | 
             
                        #end_time = time.time()
         | 
| 3533 3589 | 
             
                        #print("Custom functieon time:", end_time - start_time)
         | 
| 3534 3590 | 
             
                        #start_time = time.time()
         | 
| 3535 3591 | 
             
                        #for _ in range(10000):
         | 
| 3592 | 
            +
                        '''
         | 
| 3536 3593 | 
             
                        gg = np.exp(
         | 
| 3537 3594 | 
             
                            gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
         | 
| 3538 3595 | 
             
                                    y + alpha) * np.log(mu + alpha))
         | 
| 3539 3596 | 
             
                        gg[np.isnan(gg)] = 1
         | 
| 3597 | 
            +
                        '''
         | 
| 3598 | 
            +
                        gg_alt = nbinom.pmf(y ,1/alpha, prob)
         | 
| 3599 | 
            +
                        #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
         | 
| 3600 | 
            +
                         #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
         | 
| 3601 | 
            +
                        #print('check theses')
         | 
| 3540 3602 | 
             
                        #gg = nbinom.pmf(y ,alpha, prob)
         | 
| 3541 3603 | 
             
                        #end_time = time.time()
         | 
| 3542 3604 | 
             
                        #print("Custom functieon time:", end_time - start_time)
         | 
| 3543 3605 |  | 
| 3544 3606 | 
             
                    except Exception as e:
         | 
| 3545 3607 | 
             
                        print(e)
         | 
| 3546 | 
            -
                    return  | 
| 3608 | 
            +
                    return gg_alt
         | 
| 3547 3609 |  | 
| 3548 3610 | 
             
                def lindley_pmf(self, x, r, theta, k=50):
         | 
| 3549 3611 | 
             
                    """
         | 
| @@ -3690,8 +3752,8 @@ class ObjectiveFunction(object): | |
| 3690 3752 |  | 
| 3691 3753 | 
             
                    if dispersion == 1 or dispersion == 4:  # nb
         | 
| 3692 3754 | 
             
                        # if model_nature is not None and 'dispersion_penalty' in model_nature:
         | 
| 3693 | 
            -
             | 
| 3694 | 
            -
             | 
| 3755 | 
            +
                        #b_gam = 1/np.exp(b_gam)
         | 
| 3756 | 
            +
                        #print(b_gam)
         | 
| 3695 3757 | 
             
                        if b_gam <= 0:
         | 
| 3696 3758 | 
             
                            #penalty += 100
         | 
| 3697 3759 | 
             
                            #penalty += abs(b_gam)
         | 
| @@ -3699,9 +3761,9 @@ class ObjectiveFunction(object): | |
| 3699 3761 | 
             
                            #b_gam = 1
         | 
| 3700 3762 |  | 
| 3701 3763 | 
             
                            # if b_gam < 0.03:
         | 
| 3702 | 
            -
                            penalty += min(1, np.abs(b_gam))
         | 
| 3764 | 
            +
                            penalty += min(1, np.abs(b_gam), 0)
         | 
| 3703 3765 |  | 
| 3704 | 
            -
                            b_gam = 0.001
         | 
| 3766 | 
            +
                            #b_gam = 0.001
         | 
| 3705 3767 | 
             
                            #
         | 
| 3706 3768 |  | 
| 3707 3769 | 
             
                        #if b_gam >= 10:
         | 
| @@ -3733,8 +3795,15 @@ class ObjectiveFunction(object): | |
| 3733 3795 | 
             
                def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
         | 
| 3734 3796 |  | 
| 3735 3797 | 
             
                    # print('this was 0')
         | 
| 3736 | 
            -
                     | 
| 3798 | 
            +
                    if dispersion:
         | 
| 3799 | 
            +
                        eta=  np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
         | 
| 3800 | 
            +
             | 
| 3801 | 
            +
                        #eta=  np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
         | 
| 3802 | 
            +
                        #print('check if this holds size')
         | 
| 3803 | 
            +
                    else:
         | 
| 3804 | 
            +
                        eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
         | 
| 3737 3805 | 
             
                    eta = np.array(eta)
         | 
| 3806 | 
            +
             | 
| 3738 3807 | 
             
                    # eta  = np.float64(eta)
         | 
| 3739 3808 | 
             
                    # eta = np.dot(Xd, params_main)+offset[:,:,0]
         | 
| 3740 3809 | 
             
                    # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
         | 
| @@ -3907,6 +3976,8 @@ class ObjectiveFunction(object): | |
| 3907 3976 | 
             
                    if dispersion == 0 or dispersion == 3:
         | 
| 3908 3977 | 
             
                        return 0
         | 
| 3909 3978 | 
             
                    else:
         | 
| 3979 | 
            +
                        
         | 
| 3980 | 
            +
             | 
| 3910 3981 | 
             
                        return 1
         | 
| 3911 3982 |  | 
| 3912 3983 | 
             
                def _prob_product_across_panels(self, pch, panel_info):
         | 
| @@ -3962,7 +4033,7 @@ class ObjectiveFunction(object): | |
| 3962 4033 | 
             
                            if y[i] == 0:
         | 
| 3963 4034 | 
             
                                gr_e[i] = 0
         | 
| 3964 4035 |  | 
| 3965 | 
            -
                    if self.is_dispersion(dispersion):
         | 
| 4036 | 
            +
                    if self.is_dispersion(dispersion) and not self.no_extra_param:
         | 
| 3966 4037 | 
             
                        gr_d = np.zeros((N, 1))
         | 
| 3967 4038 | 
             
                        if dispersion == 1:
         | 
| 3968 4039 | 
             
                            # trying alt
         | 
| @@ -4067,9 +4138,9 @@ class ObjectiveFunction(object): | |
| 4067 4138 | 
             
                    dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
         | 
| 4068 4139 | 
             
                                            einsum_model_form, dtype=np.float64)  # (N,K,R)
         | 
| 4069 4140 | 
             
                    der_prod_r = dprod_r * der * proba_n[:, None, :]  # (N,K,R)
         | 
| 4070 | 
            -
                    der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :]  # I think this is the case check
         | 
| 4141 | 
            +
                    #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :]  # I think this is the case check
         | 
| 4071 4142 | 
             
                    der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :]  # or this one
         | 
| 4072 | 
            -
                    print('which one of these')
         | 
| 4143 | 
            +
                    #print('which one of these')
         | 
| 4073 4144 | 
             
                    der_t = self._compute_derivatives(
         | 
| 4074 4145 | 
             
                        br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit)  # (N,K,R)
         | 
| 4075 4146 | 
             
                    # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
         | 
| @@ -4132,8 +4203,12 @@ class ObjectiveFunction(object): | |
| 4132 4203 | 
             
                        grad_n = self._concat_gradients(
         | 
| 4133 4204 | 
             
                            (gr_f, gr_u, gr_s, gr_e)) / Rlik  # (N,K)
         | 
| 4134 4205 | 
             
                    else:
         | 
| 4135 | 
            -
                         | 
| 4136 | 
            -
                             | 
| 4206 | 
            +
                        if self.no_extra_param:
         | 
| 4207 | 
            +
                            grad_n = self._concat_gradients(
         | 
| 4208 | 
            +
                                (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik  # (N,K)
         | 
| 4209 | 
            +
                        else:    
         | 
| 4210 | 
            +
                            grad_n = self._concat_gradients(
         | 
| 4211 | 
            +
                                (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik  # (N,K)
         | 
| 4137 4212 | 
             
                    grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
         | 
| 4138 4213 | 
             
                    grad_n = np.clip(grad_n, -1000, 1000)
         | 
| 4139 4214 | 
             
                    n = np.shape(grad_n)[0]
         | 
| @@ -4290,7 +4365,7 @@ class ObjectiveFunction(object): | |
| 4290 4365 | 
             
                        return proba_r.sum(axis=1), np.squeeze(proba_r)
         | 
| 4291 4366 |  | 
| 4292 4367 | 
             
                def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
         | 
| 4293 | 
            -
                    penalty_val = 0. | 
| 4368 | 
            +
                    penalty_val = 0.1
         | 
| 4294 4369 | 
             
                    penalty_val_max = 130
         | 
| 4295 4370 |  | 
| 4296 4371 | 
             
                    # print('change_later')
         | 
| @@ -4306,8 +4381,8 @@ class ObjectiveFunction(object): | |
| 4306 4381 | 
             
                        if abs(i) > penalty_val_max:
         | 
| 4307 4382 | 
             
                            penalty += abs(i)
         | 
| 4308 4383 |  | 
| 4309 | 
            -
                    # | 
| 4310 | 
            -
                    # | 
| 4384 | 
            +
                    #if abs(i) < penalty_val:
         | 
| 4385 | 
            +
                    #    penalty += 5
         | 
| 4311 4386 |  | 
| 4312 4387 | 
             
                    # penalty = 0
         | 
| 4313 4388 | 
             
                    return penalty
         | 
| @@ -4414,8 +4489,7 @@ class ObjectiveFunction(object): | |
| 4414 4489 | 
             
                            index += 1
         | 
| 4415 4490 |  | 
| 4416 4491 | 
             
                    brstd = br_std
         | 
| 4417 | 
            -
             | 
| 4418 | 
            -
                    print(brstd)
         | 
| 4492 | 
            +
             | 
| 4419 4493 |  | 
| 4420 4494 |  | 
| 4421 4495 | 
             
                def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
         | 
| @@ -4447,7 +4521,7 @@ class ObjectiveFunction(object): | |
| 4447 4521 | 
             
                        penalty = self._penalty_betas(
         | 
| 4448 4522 | 
             
                            betas, dispersion, penalty, float(len(y) / 10.0))
         | 
| 4449 4523 | 
             
                        self.n_obs = len(y)  # feeds into gradient
         | 
| 4450 | 
            -
                        if draws is None and draws_grouped is None and (
         | 
| 4524 | 
            +
                        if draws is None and draws_grouped is None and (model_nature is None or
         | 
| 4451 4525 | 
             
                                'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
         | 
| 4452 4526 | 
             
                            #TODO do i shuffle the draws
         | 
| 4453 4527 | 
             
                            if type(Xd) == dict:
         | 
| @@ -4594,7 +4668,9 @@ class ObjectiveFunction(object): | |
| 4594 4668 | 
             
                            Kf = 0
         | 
| 4595 4669 | 
             
                        else:
         | 
| 4596 4670 | 
             
                            if n_coeff != len(betas):
         | 
| 4597 | 
            -
                                raise Exception
         | 
| 4671 | 
            +
                                raise Exception(
         | 
| 4672 | 
            +
             | 
| 4673 | 
            +
                                )
         | 
| 4598 4674 | 
             
                            Bf = betas[0:Kf]  # Fixed betas
         | 
| 4599 4675 |  | 
| 4600 4676 |  | 
| @@ -4696,7 +4772,8 @@ class ObjectiveFunction(object): | |
| 4696 4772 | 
             
                            eVd = self.lam_transform(eVd, dispersion, betas[-1])
         | 
| 4697 4773 |  | 
| 4698 4774 | 
             
                        if self.is_dispersion(dispersion):
         | 
| 4699 | 
            -
                             | 
| 4775 | 
            +
                            if not self.no_extra_param:
         | 
| 4776 | 
            +
                                penalty, betas[-1] = self._penalty_dispersion(
         | 
| 4700 4777 | 
             
                                dispersion, betas[-1], eVd, y, penalty, model_nature)
         | 
| 4701 4778 |  | 
| 4702 4779 | 
             
                        ''' 
         | 
| @@ -5341,7 +5418,7 @@ class ObjectiveFunction(object): | |
| 5341 5418 | 
             
                    return a
         | 
| 5342 5419 |  | 
| 5343 5420 | 
             
                def fitRegression(self, mod,
         | 
| 5344 | 
            -
                                  dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
         | 
| 5421 | 
            +
                                  dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
         | 
| 5345 5422 |  | 
| 5346 5423 | 
             
                    """
         | 
| 5347 5424 | 
             
                    Fits a poisson regression given data and outcomes if dispersion is not declared
         | 
| @@ -5387,6 +5464,8 @@ class ObjectiveFunction(object): | |
| 5387 5464 | 
             
                                _g, pg, kg = 0, 0, 0
         | 
| 5388 5465 |  | 
| 5389 5466 | 
             
                            dispersion_param_num = self.is_dispersion(dispersion)
         | 
| 5467 | 
            +
                            if self.no_extra_param:
         | 
| 5468 | 
            +
                                dispersion_param_num =0
         | 
| 5390 5469 |  | 
| 5391 5470 | 
             
                            #paramNum = self.get_param_num(dispersion)
         | 
| 5392 5471 | 
             
                            self.no_random_paramaters = 0
         | 
| @@ -5441,17 +5520,26 @@ class ObjectiveFunction(object): | |
| 5441 5520 | 
             
                            else:
         | 
| 5442 5521 | 
             
                                bb[0] = self.constant_value
         | 
| 5443 5522 | 
             
                                if dispersion == 1:
         | 
| 5444 | 
            -
                                     | 
| 5523 | 
            +
                                    if not self.no_extra_param:
         | 
| 5524 | 
            +
                                        bb[-1] = self.negative_binomial_value
         | 
| 5445 5525 | 
             
                                bounds = None
         | 
| 5446 5526 |  | 
| 5527 | 
            +
             | 
| 5528 | 
            +
             | 
| 5447 5529 | 
             
                            # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
         | 
| 5448 5530 | 
             
                            hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
         | 
| 5449 | 
            -
                             | 
| 5531 | 
            +
                            
         | 
| 5532 | 
            +
                            if self.no_extra_param:
         | 
| 5533 | 
            +
                                dispersion_poisson = 0
         | 
| 5534 | 
            +
                                initial_beta = self._minimize(self._loglik_gradient, bb,
         | 
| 5450 5535 | 
             
                                                          args=(XX, y, None, None, None, None, calc_gradient, hess_est,
         | 
| 5451 | 
            -
                                                                 | 
| 5536 | 
            +
                                                                dispersion_poisson, 0, False, 0, None, None, None, None, None,
         | 
| 5452 5537 | 
             
                                                                mod),
         | 
| 5453 5538 | 
             
                                                          method=method2, tol=1e-5, options={'gtol': tol['gtol']},
         | 
| 5454 5539 | 
             
                                                          bounds=bounds)
         | 
| 5540 | 
            +
                                if dispersion:
         | 
| 5541 | 
            +
                                    nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
         | 
| 5542 | 
            +
                                
         | 
| 5455 5543 |  | 
| 5456 5544 |  | 
| 5457 5545 |  | 
| @@ -5551,7 +5639,7 @@ class ObjectiveFunction(object): | |
| 5551 5639 |  | 
| 5552 5640 | 
             
                                    b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
         | 
| 5553 5641 | 
             
                                        self.none_handler(self.rdm_fit)) + len(
         | 
| 5554 | 
            -
                                        self.none_handler(self.rdm_cor_fit)) else b[i] / 1 | 
| 5642 | 
            +
                                        self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
         | 
| 5555 5643 | 
             
                                else:
         | 
| 5556 5644 | 
             
                                    b = bb
         | 
| 5557 5645 |  | 
| @@ -5561,9 +5649,10 @@ class ObjectiveFunction(object): | |
| 5561 5649 | 
             
                                    else:
         | 
| 5562 5650 | 
             
                                        b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
         | 
| 5563 5651 | 
             
                                if dispersion == 1:
         | 
| 5564 | 
            -
                                     | 
| 5565 | 
            -
             | 
| 5566 | 
            -
                                        b[-1]  | 
| 5652 | 
            +
                                    if not self.no_extra_param:
         | 
| 5653 | 
            +
                                        b[-1] = np.abs(b[-1])
         | 
| 5654 | 
            +
                                        if b[-1] > 10:
         | 
| 5655 | 
            +
                                            b[-1] = 5
         | 
| 5567 5656 | 
             
                                elif dispersion == 2:
         | 
| 5568 5657 | 
             
                                    b[-1] = .5
         | 
| 5569 5658 | 
             
                                if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
         | 
| @@ -5689,13 +5778,30 @@ class ObjectiveFunction(object): | |
| 5689 5778 |  | 
| 5690 5779 | 
             
                                if draws is None and draws_hetro is not None:
         | 
| 5691 5780 | 
             
                                    print('hold')
         | 
| 5692 | 
            -
                                 | 
| 5693 | 
            -
             | 
| 5694 | 
            -
                                    self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
         | 
| 5695 | 
            -
                                                           method=method2, tol=tol['ftol'],
         | 
| 5696 | 
            -
                                                           options={'gtol': tol['gtol']}, bounds=bounds,
         | 
| 5697 | 
            -
                                                           hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
         | 
| 5781 | 
            +
                                #self.grad_yes = True
         | 
| 5782 | 
            +
                                #self.hess_yes = True
         | 
| 5698 5783 |  | 
| 5784 | 
            +
                                if self.no_extra_param:
         | 
| 5785 | 
            +
                                    dispersion_poisson = 0
         | 
| 5786 | 
            +
                                    betas_est = self._minimize(self._loglik_gradient, b, args=(
         | 
| 5787 | 
            +
                                        X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
         | 
| 5788 | 
            +
                                        self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
         | 
| 5789 | 
            +
                                                            method=method2, tol=tol['ftol'],
         | 
| 5790 | 
            +
                                                            options={'gtol': tol['gtol']}, bounds=bounds,
         | 
| 5791 | 
            +
                                                            hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
         | 
| 5792 | 
            +
                                    if dispersion:
         | 
| 5793 | 
            +
                                        initial_fit_beta = betas_est.x
         | 
| 5794 | 
            +
                                        parmas = np.append(initial_fit_beta, nb_parma)
         | 
| 5795 | 
            +
                                        self.nb_parma = nb_parma
         | 
| 5796 | 
            +
                                        print(f'neg binomi,{self.nb_parma}')
         | 
| 5797 | 
            +
                                        betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
         | 
| 5798 | 
            +
                                        X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
         | 
| 5799 | 
            +
                                        self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
         | 
| 5800 | 
            +
                                                            method=method2, tol=tol['ftol'],
         | 
| 5801 | 
            +
                                                            options={'gtol': tol['gtol']}, bounds=bounds,
         | 
| 5802 | 
            +
                                                            hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
         | 
| 5803 | 
            +
                                        
         | 
| 5804 | 
            +
                                        print('refit with estimation of NB')
         | 
| 5699 5805 | 
             
                                # self.numerical_hessian_calc = True
         | 
| 5700 5806 | 
             
                                if self.numerical_hessian_calc:
         | 
| 5701 5807 | 
             
                                    try:
         | 
| @@ -5994,7 +6100,7 @@ class ObjectiveFunction(object): | |
| 5994 6100 | 
             
                    return delim + self._model_type_codes[dispersion]
         | 
| 5995 6101 |  | 
| 5996 6102 | 
             
                def self_standardize_positive(self, X):
         | 
| 5997 | 
            -
                    scaler =  | 
| 6103 | 
            +
                    scaler = MinMaxScaler()
         | 
| 5998 6104 | 
             
                    if type(X) == list:
         | 
| 5999 6105 | 
             
                        return X
         | 
| 6000 6106 |  | 
| @@ -6004,12 +6110,26 @@ class ObjectiveFunction(object): | |
| 6004 6110 | 
             
                        # Reshaping to 2D - combining the last two dimensions
         | 
| 6005 6111 | 
             
                        df_tf_reshaped = X.reshape(original_shape[0], -1)
         | 
| 6006 6112 | 
             
                        df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
         | 
| 6007 | 
            -
                        df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
         | 
| 6113 | 
            +
                        #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
         | 
| 6008 6114 | 
             
                        # Reshape back to original 3D shape if necessary
         | 
| 6009 6115 | 
             
                        df_tf = df_tf_scaled.reshape(original_shape)
         | 
| 6010 6116 | 
             
                        return df_tf
         | 
| 6011 6117 | 
             
                    else:
         | 
| 6012 | 
            -
                         | 
| 6118 | 
            +
                        # Initialize the MinMaxScaler
         | 
| 6119 | 
            +
                        scaler = MinMaxScaler()
         | 
| 6120 | 
            +
                        float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
         | 
| 6121 | 
            +
                        non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
         | 
| 6122 | 
            +
             | 
| 6123 | 
            +
                        # Fit the scaler to the float columns and transform them
         | 
| 6124 | 
            +
                        X[float_columns] = scaler.fit_transform(X[float_columns])
         | 
| 6125 | 
            +
                        # Fit the scaler to the data and transform it
         | 
| 6126 | 
            +
                        #scaled_data = scaler.fit_transform(X)
         | 
| 6127 | 
            +
             | 
| 6128 | 
            +
                        # Convert the result back to a DataFrame
         | 
| 6129 | 
            +
                        #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
         | 
| 6130 | 
            +
             | 
| 6131 | 
            +
             | 
| 6132 | 
            +
                        return X
         | 
| 6013 6133 |  | 
| 6014 6134 | 
             
                def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
         | 
| 6015 6135 | 
             
                                               *args, **kwargs):
         | 
| @@ -6065,7 +6185,7 @@ class ObjectiveFunction(object): | |
| 6065 6185 | 
             
                                t, idx, df_test[:, :, idx])
         | 
| 6066 6186 | 
             
                        if np.max(df_tf[:, :, idx]) >= 77000:
         | 
| 6067 6187 |  | 
| 6068 | 
            -
                             | 
| 6188 | 
            +
                            print('should not be possible')
         | 
| 6069 6189 |  | 
| 6070 6190 | 
             
                    self.define_selfs_fixed_rdm_cor(model_nature)
         | 
| 6071 6191 | 
             
                    indices = self.get_named_indices(self.fixed_fit)
         | 
| @@ -1,6 +1,6 @@ | |
| 1 | 
            -
            Metadata-Version: 2. | 
| 1 | 
            +
            Metadata-Version: 2.2
         | 
| 2 2 | 
             
            Name: metacountregressor
         | 
| 3 | 
            -
            Version: 0.1. | 
| 3 | 
            +
            Version: 0.1.119
         | 
| 4 4 | 
             
            Summary: Extensions for a Python package for estimation of count models.
         | 
| 5 5 | 
             
            Home-page: https://github.com/zahern/CountDataEstimation
         | 
| 6 6 | 
             
            Author: Zeke Ahern
         | 
| @@ -9,9 +9,18 @@ License: QUT | |
| 9 9 | 
             
            Requires-Python: >=3.10
         | 
| 10 10 | 
             
            Description-Content-Type: text/markdown
         | 
| 11 11 | 
             
            License-File: LICENSE.txt
         | 
| 12 | 
            -
            Requires-Dist: numpy | 
| 13 | 
            -
            Requires-Dist: scipy | 
| 12 | 
            +
            Requires-Dist: numpy>=1.13.1
         | 
| 13 | 
            +
            Requires-Dist: scipy>=1.0.0
         | 
| 14 14 | 
             
            Requires-Dist: requests
         | 
| 15 | 
            +
            Dynamic: author
         | 
| 16 | 
            +
            Dynamic: author-email
         | 
| 17 | 
            +
            Dynamic: description
         | 
| 18 | 
            +
            Dynamic: description-content-type
         | 
| 19 | 
            +
            Dynamic: home-page
         | 
| 20 | 
            +
            Dynamic: license
         | 
| 21 | 
            +
            Dynamic: requires-dist
         | 
| 22 | 
            +
            Dynamic: requires-python
         | 
| 23 | 
            +
            Dynamic: summary
         | 
| 15 24 |  | 
| 16 25 | 
             
            <div style="display: flex; align-items: center;">
         | 
| 17 26 | 
             
                <img src="https://github.com/zahern/data/raw/main/m.png" alt="My Image" style="width: 100px; margin-right: 20px;">
         | 
| @@ -23,10 +32,24 @@ Requires-Dist: requests | |
| 23 32 |  | 
| 24 33 | 
             
            The tutorial provides more extensive examples on how to run the code and perform experiments. Further documentation is currently in development.
         | 
| 25 34 |  | 
| 26 | 
            -
             | 
| 35 | 
            +
            # For an Application Setup Download the following GUI
         | 
| 36 | 
            +
            [Download Application](https://github.com/zahern/MetaCount/tree/master/metacountregressor/application_gui/dist/meta_app)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            The application involves setting up a problem instance to run the models.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            ### Entire [Git Repository](https://github.com/zahern/MetaCount.git) is available to clone.
         | 
| 41 | 
            +
            #### Steps
         | 
| 42 | 
            +
            1. Clone Project
         | 
| 43 | 
            +
            2. Navigate to "metacountregressor/application_gui/dist/meta_app"
         | 
| 44 | 
            +
            3. Run meta_app.exe
         | 
| 45 | 
            +
            4. Navigate to metacountregressor/app_main.py
         | 
| 46 | 
            +
            5. Run app_main.py
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            ## Setup For Python Package Approach
         | 
| 27 50 | 
             
            The Below code demonstrates how to set up automatic optimization assisted by the harmony search algorithm. References to the Differential Evolution and Simulated Annealing has been mentioned (change accordingly)
         | 
| 28 51 |  | 
| 29 | 
            -
            ##  | 
| 52 | 
            +
            ## Install: Requires Python 3.10
         | 
| 30 53 |  | 
| 31 54 | 
             
            Install `metacountregressor` using pip as follows:
         | 
| 32 55 |  | 
| @@ -1,20 +1,20 @@ | |
| 1 1 | 
             
            metacountregressor/__init__.py,sha256=UM4zaqoAcZVWyx3SeL9bRS8xpQ_iLZU9fIIARWmfjis,2937
         | 
| 2 2 | 
             
            metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0PltKc,2047
         | 
| 3 | 
            -
            metacountregressor/app_main.py,sha256= | 
| 3 | 
            +
            metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
         | 
| 4 4 | 
             
            metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
         | 
| 5 5 | 
             
            metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
         | 
| 6 6 | 
             
            metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
         | 
| 7 | 
            -
            metacountregressor/main.py,sha256= | 
| 7 | 
            +
            metacountregressor/main.py,sha256=lHrj2Hvj2hTGi-2mLSbuGEHDDILl4V-ml9e9Y_Hjpts,20560
         | 
| 8 8 | 
             
            metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
         | 
| 9 9 | 
             
            metacountregressor/metaheuristics.py,sha256=UyUmHyuQd5vZ2wdaVL0dWpJfBOBdtCAqcA0GlFzouH8,105849
         | 
| 10 10 | 
             
            metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
         | 
| 11 11 | 
             
            metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
         | 
| 12 12 | 
             
            metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,919
         | 
| 13 13 | 
             
            metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
         | 
| 14 | 
            -
            metacountregressor/solution.py,sha256= | 
| 14 | 
            +
            metacountregressor/solution.py,sha256=4pfpDcH38eTIGV7DIDL5ZOyB_ND-iDzod964-RBhC5o,274195
         | 
| 15 15 | 
             
            metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
         | 
| 16 | 
            -
            metacountregressor-0.1. | 
| 17 | 
            -
            metacountregressor-0.1. | 
| 18 | 
            -
            metacountregressor-0.1. | 
| 19 | 
            -
            metacountregressor-0.1. | 
| 20 | 
            -
            metacountregressor-0.1. | 
| 16 | 
            +
            metacountregressor-0.1.119.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
         | 
| 17 | 
            +
            metacountregressor-0.1.119.dist-info/METADATA,sha256=Kya6i22baCjmfCkzW-1VnaAjzinJS7iI15R05qWr5Z8,23415
         | 
| 18 | 
            +
            metacountregressor-0.1.119.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         | 
| 19 | 
            +
            metacountregressor-0.1.119.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
         | 
| 20 | 
            +
            metacountregressor-0.1.119.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |