PyPI - metacountregressor - Versions diffs - 0.1.122__tar.gz → 0.1.125__tar.gz - Mend

metacountregressor 0.1.122tar.gz → 0.1.125tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{metacountregressor-0.1.122 → metacountregressor-0.1.125}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.2
 Name: metacountregressor
-Version: 0.1.122
+Version: 0.1.125
 Summary: Extensions for a Python package for estimation of count models.
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern
-Author-email: zeke.ahern@hdr.qut.edu.au
+Author-email: z.ahern@qut.edu.au
 License: QUT
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

{metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/helperprocess.py RENAMED Viewed

@@ -180,6 +180,23 @@ config = {
 '''
+# Function to guess Low, Medium, High ranges
+def guess_low_medium_high(column_name, series):
+    # Compute the tertiles (33rd and 66th percentiles)
+    low_threshold = series.quantile(0.33)
+    high_threshold = series.quantile(0.66)
+    # Define the bins and labels
+    bins = [series.min() - 1, low_threshold, high_threshold, series.max()]
+    labels = ['Low', 'Medium', 'High']
+    return {
+        'type': 'bin',
+        'bins': bins,
+        'labels': labels,
+        'prefix': f'{column_name}_Binned'
+    }
 def transform_dataframe(df, config):
     output_df = pd.DataFrame()
@@ -226,12 +243,13 @@ def guess_column_type(column_name, series):
         return {'type': 'one-hot', 'prefix': column_name}
     elif pd.api.types.is_numeric_dtype(series):
         unique_values = series.nunique()
-        if unique_values < 10:
+        if unique_values < 5:
+            return {'type': 'one-hot', 'prefix': column_name}
+        elif series.range() > 20:
             # If there are few unique values, assume binning with default bins
-            min_val, max_val = series.min(), series.max()
-            bins = np.linspace(min_val, max_val, num=unique_values + 1)
-            labels = [f'Bin_{i}' for i in range(1, len(bins))]
-            return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
+            guess_low_medium_high(column_name,series)
         else:
            # # Otherwise, assume continuous data with normalization
             # Otherwise, fallback to continuous standardization
@@ -293,14 +311,26 @@ def interactions(df, keep=None, drop_this_perc=0.6, interact = False):
         df = pd.concat([df, df_interactions], axis=1, sort=False)
     # second
-    corr_matrix = df.corr().abs()
+    # Remove `keep` columns from the correlation matrix
+    if keep is not None:
+        df_corr = df.drop(columns=keep, errors='ignore')  # Exclude `keep` columns
+    else:
+        df_corr = df
+    # Compute the absolute correlation matrix
+    corr_matrix = df_corr.corr().abs()
+    # Keep only the upper triangle of the correlation matrix
     upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
-    # Find features with correlation greater than 0.6
+    # Find features with correlation greater than the threshold
     to_drop = [column for column in upper.columns if any(upper[column] > drop_this_perc)]
+    # Ensure `keep` columns are not dropped
     if keep is not None:
         to_drop = [column for column in to_drop if column not in keep]
-    # Drop features
+    # Drop the identified features
     df.drop(to_drop, axis=1, inplace=True)
     return df

{metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/solution.py RENAMED Viewed

@@ -2983,7 +2983,7 @@ class ObjectiveFunction(object):
         argument = prob.mean(axis=1)
         # if less than 0 penalise
         if np.min(argument) < 0:
-            print('what the fuck')
+            print('Error with args..')
         if np.min(argument) < limit:
             # add a penalty for too small argument of log
             log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3473,8 +3473,7 @@ class ObjectiveFunction(object):
             corr_pairs = list(itertools.combinations(self.Kr, 2))
         else:
             corr_pairs = list(itertools.combinations(corr_indices, 2))
-            if len(corr_pairs) >0:
-                print('maybe get the terms here')
         for ii, corr_pair in enumerate(corr_pairs):
             # lower cholesky matrix
@@ -3635,7 +3634,7 @@ class ObjectiveFunction(object):
             #print("Custom functieon time:", end_time - start_time)
         except Exception as e:
-            print(e)
+            print("Neg Binom error.")
         return gg_alt
     def lindley_pmf(self, x, r, theta, k=50):
@@ -3969,7 +3968,7 @@ class ObjectiveFunction(object):
         # proba_d = self.dnegbimonli(y, eVd, b_gam )
-        # print('fuck if this actually works')
         elif dispersion == 2:

{metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.2
 Name: metacountregressor
-Version: 0.1.122
+Version: 0.1.125
 Summary: Extensions for a Python package for estimation of count models.
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern
-Author-email: zeke.ahern@hdr.qut.edu.au
+Author-email: z.ahern@qut.edu.au
 License: QUT
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

{metacountregressor-0.1.122 → metacountregressor-0.1.125}/setup.py RENAMED Viewed

@@ -53,7 +53,7 @@ setuptools.setup(
     long_description_content_type='text/markdown',  # Specify the content type as Markdown
     url='https://github.com/zahern/CountDataEstimation',
     author='Zeke Ahern',
-    author_email='zeke.ahern@hdr.qut.edu.au',
+    author_email='z.ahern@qut.edu.au',
     license='QUT',
     packages=['metacountregressor'],
     zip_safe=False,
@@ -61,6 +61,7 @@ setuptools.setup(
     install_requires=[
         'numpy>=1.13.1',
         'scipy>=1.0.0',
-        'requests','latextable'
+        'requests',
+        'latextable'
     ]
 )