PyPI - metacountregressor - Versions diffs - 0.1.176__tar.gz → 0.1.203__tar.gz - Mend

metacountregressor 0.1.176tar.gz → 0.1.203tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{metacountregressor-0.1.176 → metacountregressor-0.1.203}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: metacountregressor
-Version: 0.1.176
+Version: 0.1.203
 Summary: Extensive Testing for Estimation of Data Count Models
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern
@@ -16,7 +16,7 @@ Requires-Dist: latextable
 Requires-Dist: pandas
 Requires-Dist: scikit_learn>=1.4.1.post1
 Requires-Dist: statsmodels
-Requires-Dist: psustil
+Requires-Dist: psutil
 Dynamic: author
 Dynamic: author-email
 Dynamic: description

{metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/helperprocess.py RENAMED Viewed

@@ -271,7 +271,17 @@ def guess_low_medium_high(column_name, series):
     # Compute the tertiles (33rd and 66th percentiles)
     #print('did it make it...')
     #mode_value = st.mode(series)  # Get the most frequent value
-    #print('good')
+    #i dont think this works cayse its not a seriers any other way
+    is_binary = series.isin([0, 1]).all()
+    if is_binary:
+        return {
+            'type': 'binary',
+            'bins': [0,1],
+            'labels': ['Off', 'On'],
+            'prefix': f'{column_name}'
+        }
    # series = pd.to_numeric(series, errors='coerce').fillna(mode_value)
     low_threshold = np.quantile(series, 0.33)
     high_threshold = np.quantile(series,0.66)

{metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor/solution.py RENAMED Viewed

@@ -159,7 +159,7 @@ class ObjectiveFunction(object):
         self.full_model = None
         self.GP_parameter = 0
         self.is_multi = kwargs.get('is_multi', False)
-        self.complexity_level = 6
+        self.complexity_level = kwargs.get('complexity_level', 6)
         self._max_iterations_improvement = 10000
         self.generated_sln = set()
         self.ave_mae = 0
@@ -256,10 +256,11 @@ class ObjectiveFunction(object):
                 self.is_multi = False
             if 'panels' in kwargs and not (kwargs.get('panels') == None):
-                self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
+                if kwargs.get('group') is not None:
+                    self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
-                x_data[kwargs['group']] = x_data[kwargs['group']].astype(
-                    'category').cat.codes
+                    x_data[kwargs['group']] = x_data[kwargs['group']].astype(
+                        'category').cat.codes
                 self.complexity_level = 6
                 # create test dataset
@@ -309,10 +310,13 @@ class ObjectiveFunction(object):
                 df_train[kwargs['panels']]) if kwargs['panels'] is not None else None
             self.ids_test = np.asarray(
                 df_test[kwargs['panels']]) if kwargs['panels'] is not None else None
-            groupll = np.asarray(df_train[kwargs['group']].astype(
-                'category').cat.codes)
-            group_test = np.asarray(df_test[kwargs['group']].astype(
-                'category').cat.codes)
+            if kwargs.get('group') is not None:
+                groupll = np.asarray(df_train[kwargs['group']].astype(
+                    'category').cat.codes)
+                group_test = np.asarray(df_test[kwargs['group']].astype(
+                    'category').cat.codes)
+            else:
+                groupll = None
             X, Y, panel, group = self._arrange_long_format(
                 df_train, y_train, self.ids, self.ids, groupll)
             self.group_halton = group.copy()
@@ -501,7 +505,7 @@ class ObjectiveFunction(object):
         self._max_hurdle = 4
         #Manually fit from analyst specification
-        manual_fit = kwargs.get('Manual_Fit')
+        manual_fit = kwargs.get('Manual_Fit', None)
         if manual_fit is not None:
             print('fitting manual')
             self.process_manual_fit(manual_fit)
@@ -538,7 +542,7 @@ class ObjectiveFunction(object):
                 if self.is_multi:
                     self._offsets_test = self._x_data_test[:, :, val_od]
                     self._x_data_test = self.remove_offset(self._x_data_test, val_od)
-                print(self._offsets)
+                #print(self._offsets)
             else:
                 self.initialize_empty_offsets()
@@ -1712,6 +1716,11 @@ class ObjectiveFunction(object):
                         vector[get_rdm_i] -= 1
                         only_ints_vals[get_rdm_i] -= 1
+                    elif vector[get_rdm_i] == 1:
+                        vector[get_rdm_i] -= 1
+                        only_ints_vals[get_rdm_i] -= 1
                     if vector.count(5) == 1:
                         idx = vector.index(5)
                         vector[idx] = 0
@@ -2361,7 +2370,7 @@ class ObjectiveFunction(object):
             sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
     def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
-        obj_1 = 10.0 ** 5
+        obj_1 = 10.0 ** 4
         obj_best = None
         sub_slns = list()
@@ -2369,12 +2378,14 @@ class ObjectiveFunction(object):
             vector)  # just added to grab the fixed fit TODO: Clean up
         dispersion = model_nature.get('dispersion')
         self.define_selfs_fixed_rdm_cor(model_nature)
         try:
             self.repair(vector)
         except Exception as e:
-            print('prolem repairing here')
+            print('problem repairing here')
             print(vector)
             print(e)
         layout = vector.copy()
         trial_run = 0
         max_trial = 0
@@ -2453,10 +2464,10 @@ class ObjectiveFunction(object):
             if not self.is_quanitifiable_num(obj_1[self._obj_1]):
-                obj_1[self._obj_1] = 10 ** 9
+                obj_1[self._obj_1] = 10 ** 5
             else:
                 if obj_1[self._obj_1] <= 0:
-                    obj_1[self._obj_1] = 10 ** 9
+                    obj_1[self._obj_1] = 10 ** 5
         if multi:
@@ -2487,10 +2498,10 @@ class ObjectiveFunction(object):
         self.reset_sln()
         if not self.is_quanitifiable_num(obj_1[self._obj_1]):
-            obj_1[self._obj_1] = 10 ** 9
+            obj_1[self._obj_1] = 10 ** 5
         else:
             if obj_1[self._obj_1] == 0:
-                obj_1[self._obj_1] = 10 ** 9
+                obj_1[self._obj_1] = 10 **5
         if verbose:
             print('The best solution iteratively is of objective value:', obj_1)
@@ -3029,6 +3040,39 @@ class ObjectiveFunction(object):
         # print('log_lik poisson', log_lik)
         return -log_lik
+    def extract_parameters(self, betas, Kf, Kr, Kchol_a, Krb_a):
+        """
+        Extracts parameters from the `betas` array based on the given sizes.
+        Parameters:
+            betas (numpy.ndarray): The array of betas.
+            Kf (int): Size of Bf (first Kf elements of betas).
+            Kr (int): Size of Br.
+            Kchol_a (int): Part of the size for brstd.
+            Krb_a (int): Part of the size for brstd.
+        Returns:
+            tuple: A tuple containing:
+                - Bf (numpy.ndarray): The first Kf elements of betas.
+                - Br (numpy.ndarray): The next Kr elements of betas after Bf.
+                - brstd (numpy.ndarray): The next Kchol_a + Krb_a elements of betas after Br.
+                - remaining_betas (numpy.ndarray): Any remaining elements in betas after brstd.
+        """
+        # Step 1: Extract Bf
+        Bf = betas[:Kf]  # First Kf elements
+        # Step 2: Extract Br
+        Br = betas[Kf:Kf + Kr]  # Next Kr elements after Bf
+        # Step 3: Extract brstd
+        brstd_size = Kchol_a + Krb_a  # Total size of brstd
+        brstd = betas[Kf + Kr:Kf + Kr + brstd_size]  # Next brstd_size elements after Br
+        # Step 4: Extract remaining betas
+        remaining_betas = betas[Kf + Kr + brstd_size:]  # Remaining elements in betas
+        return Bf, Br, brstd, remaining_betas
     def convert_nbinom_params(self, mu, theta):
         """
             Convert mean/dispersion parameterization of a negative binomial to the ones scipy supports
@@ -3561,8 +3605,11 @@ class ObjectiveFunction(object):
         # Compute: betas = mean + sd*draws
         if len(br_sd) != draws.shape[1]:
             #get the same size as the mean
-            betas_random = self.Br.copy()
+            #if hasattr(self.Br):
+            #    betas_random = self.Br.copy()
+            #else:
+            idx = self.get_X_draw_tril()
+            betas_random = br_mean[None, :, None] + draws[:,idx, :] * br_sd[None, :, None]
             '''
             c = self.get_num_params()[3:5]
@@ -4716,10 +4763,10 @@ class ObjectiveFunction(object):
                 n_coeff = self.get_param_num(dispersion)
                 Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
                 if Kchol_a != Kchol:
-                    print('hold')
+                    print('hold qhy')
                 if Kr_b != Kr_b_a:
-                    print('hold')
+                    print('hold qhy')
@@ -4735,13 +4782,32 @@ class ObjectiveFunction(object):
                 Bf = betas[0:Kf]  # Fixed betas
+           # Bf_new, Br_new, Br_std_new, Br_rema = self.extract_parameters(betas, Kf, Kr, Kchol_a, Kr_b_a)
             Vdf = dev.np.einsum('njk,k -> nj', Xdf, Bf, dtype=np.float64)  # (N, P)
             br = betas[Kf:Kf + Kr]
+            #i have an array of betas, Kf represents the first kf of the betas array
+            # now return Bf where size of bf = kf
+            # size of br needs to be Kr
+            #Kr
+            #now extract from betas, after all the Bf
+            # cakk
+            #the next array is brstd
+            # size of brstd needs to be
+            # Kchol_a + Krb_a
+            #its grabbing from the
             brstd = betas[Kf + Kr:Kf + Kr + Kr_b + Kchol]
             # initialises size matrix
             proba = []  # Temp batching storage
@@ -4755,6 +4821,8 @@ class ObjectiveFunction(object):
                 if len(self.none_handler(self.rdm_cor_fit)) == 0:
                     # Br = self._transform_rand_betas(br, np.abs(
                     #     brstd), draws_)  # Get random coefficients, old method
+                    #TODO
                     Br = self._transform_rand_betas(br,
                                                     brstd, draws_)  # Get random coefficients
                     self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
@@ -5844,6 +5912,11 @@ class ObjectiveFunction(object):
                     if self.no_extra_param:
                         dispersion_poisson = 0
+                        print('b :', len(b))
+                        print(self.get_param_num())
+                        baby = self.get_param_num()
+                        if len(b) != baby:
+                            print('modify')
                         betas_est = self._minimize(self._loglik_gradient, b, args=(
                             X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
                             self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
@@ -6205,8 +6278,9 @@ class ObjectiveFunction(object):
                             transform, distribution, None, dispersion=dispersion)
     def get_named_indices(self, names):
-        indices = [i for i, name in enumerate(self._characteristics_names) if name in names]
+        # Change substrings issue
+        indices = [i for i, name in enumerate(self._characteristics_names) if name == names]
+        indices = [i for i, name in enumerate(self._characteristics_names) if name in names and isinstance(name, str)]
         return indices
     """
@@ -6482,10 +6556,7 @@ class ObjectiveFunction(object):
             else:
                 rv_indices.append(rv_count_all - 1)
-        # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
-        draws_tril_idx = np.array([corr_indices[j]
-                                   for i in range(len(self.none_handler(self.rdm_cor_fit)))
-                                   for j in range(i + 1)])  # varnames pos.
         X_tril_idx = np.array([corr_indices[i]
                                for i in range(len(self.none_handler(self.rdm_cor_fit)))
                                for j in range(i + 1)])
@@ -6494,12 +6565,55 @@ class ObjectiveFunction(object):
         range_var = [x for x in
                      range(len(self.none_handler(var_uncor)))]
         range_var = sorted(range_var)
-        draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
         X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
-        draws_tril_idx = draws_tril_idx.astype(int)
         X_tril_idx = X_tril_idx.astype(int)
         return  X_tril_idx
+    def get_X_draw_tril(self):
+        '''For correlations find the repeating terms'''
+        varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
+        rv_count_all = 0
+        chol_count = 0
+        rv_count = 0
+        corr_indices = []
+        rv_indices = []
+        for ii, var in enumerate(varnames):  # TODO: BUGFIXf
+            if var in self.none_handler(self.rdm_cor_fit):
+                is_correlated = True
+            else:
+                is_correlated = False
+            rv_count_all += 1
+            if is_correlated:
+                chol_count += 1
+            else:
+                rv_count += 1
+            if var in self.none_handler(self.rdm_cor_fit):
+                corr_indices.append(rv_count_all - 1)  # TODO: what does tis do
+            else:
+                rv_indices.append(rv_count_all - 1)
+        # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
+        draws_tril_idx = np.array([corr_indices[j]
+                                   for i in range(len(self.none_handler(self.rdm_cor_fit)))
+                                   for j in range(i + 1)])  # varnames pos.
+        # Find the s.d. for random variables that are not correlated
+        var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
+        range_var = [x for x in
+                     range(len(self.none_handler(var_uncor)))]
+        range_var = sorted(range_var)
+        draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
+        draws_tril_idx = draws_tril_idx.astype(int)
+        return draws_tril_idx
     def modifyn(self, data):

{metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: metacountregressor
-Version: 0.1.176
+Version: 0.1.203
 Summary: Extensive Testing for Estimation of Data Count Models
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern
@@ -16,7 +16,7 @@ Requires-Dist: latextable
 Requires-Dist: pandas
 Requires-Dist: scikit_learn>=1.4.1.post1
 Requires-Dist: statsmodels
-Requires-Dist: psustil
+Requires-Dist: psutil
 Dynamic: author
 Dynamic: author-email
 Dynamic: description

{metacountregressor-0.1.176 → metacountregressor-0.1.203}/metacountregressor.egg-info/requires.txt RENAMED Viewed

@@ -5,4 +5,4 @@ latextable
 pandas
 scikit_learn>=1.4.1.post1
 statsmodels
-psustil
+psutil

{metacountregressor-0.1.176 → metacountregressor-0.1.203}/setup.py RENAMED Viewed

@@ -72,6 +72,6 @@ setuptools.setup(
         'pandas',
         'scikit_learn>=1.4.1.post1',
         'statsmodels',
-        'psustil'
+        'psutil'
     ]
 )