PyPI - metacountregressor - Versions diffs - 0.1.124__py3-none-any.whl → 0.1.127__py3-none-any.whl - Mend

metacountregressor 0.1.124py3-none-any.whl → 0.1.127py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

metacountregressor/helperprocess.py CHANGED Viewed

@@ -180,6 +180,23 @@ config = {
 '''
+# Function to guess Low, Medium, High ranges
+def guess_low_medium_high(column_name, series):
+    # Compute the tertiles (33rd and 66th percentiles)
+    low_threshold = series.quantile(0.33)
+    high_threshold = series.quantile(0.66)
+    # Define the bins and labels
+    bins = [series.min() - 1, low_threshold, high_threshold, series.max()]
+    labels = ['Low', 'Medium', 'High']
+    return {
+        'type': 'bin',
+        'bins': bins,
+        'labels': labels,
+        'prefix': f'{column_name}_Binned'
+    }
 def transform_dataframe(df, config):
     output_df = pd.DataFrame()
@@ -226,12 +243,13 @@ def guess_column_type(column_name, series):
         return {'type': 'one-hot', 'prefix': column_name}
     elif pd.api.types.is_numeric_dtype(series):
         unique_values = series.nunique()
-        if unique_values < 10:
+        if unique_values < 5:
+            return {'type': 'one-hot', 'prefix': column_name}
+        elif series.range() > 20:
             # If there are few unique values, assume binning with default bins
-            min_val, max_val = series.min(), series.max()
-            bins = np.linspace(min_val, max_val, num=unique_values + 1)
-            labels = [f'Bin_{i}' for i in range(1, len(bins))]
-            return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
+            guess_low_medium_high(column_name,series)
         else:
            # # Otherwise, assume continuous data with normalization
             # Otherwise, fallback to continuous standardization
@@ -293,14 +311,26 @@ def interactions(df, keep=None, drop_this_perc=0.6, interact = False):
         df = pd.concat([df, df_interactions], axis=1, sort=False)
     # second
-    corr_matrix = df.corr().abs()
+    # Remove `keep` columns from the correlation matrix
+    if keep is not None:
+        df_corr = df.drop(columns=keep, errors='ignore')  # Exclude `keep` columns
+    else:
+        df_corr = df
+    # Compute the absolute correlation matrix
+    corr_matrix = df_corr.corr().abs()
+    # Keep only the upper triangle of the correlation matrix
     upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
-    # Find features with correlation greater than 0.6
+    # Find features with correlation greater than the threshold
     to_drop = [column for column in upper.columns if any(upper[column] > drop_this_perc)]
+    # Ensure `keep` columns are not dropped
     if keep is not None:
         to_drop = [column for column in to_drop if column not in keep]
-    # Drop features
+    # Drop the identified features
     df.drop(to_drop, axis=1, inplace=True)
     return df

metacountregressor/solution.py CHANGED Viewed

@@ -2983,7 +2983,7 @@ class ObjectiveFunction(object):
         argument = prob.mean(axis=1)
         # if less than 0 penalise
         if np.min(argument) < 0:
-            print('what the fuck')
+            print('Error with args..')
         if np.min(argument) < limit:
             # add a penalty for too small argument of log
             log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3473,8 +3473,7 @@ class ObjectiveFunction(object):
             corr_pairs = list(itertools.combinations(self.Kr, 2))
         else:
             corr_pairs = list(itertools.combinations(corr_indices, 2))
-            if len(corr_pairs) >0:
-                print('maybe get the terms here')
         for ii, corr_pair in enumerate(corr_pairs):
             # lower cholesky matrix
@@ -3635,7 +3634,7 @@ class ObjectiveFunction(object):
             #print("Custom functieon time:", end_time - start_time)
         except Exception as e:
-            print(e)
+            print("Neg Binom error.")
         return gg_alt
     def lindley_pmf(self, x, r, theta, k=50):
@@ -3969,7 +3968,7 @@ class ObjectiveFunction(object):
         # proba_d = self.dnegbimonli(y, eVd, b_gam )
-        # print('fuck if this actually works')
         elif dispersion == 2:

{metacountregressor-0.1.124.dist-info → metacountregressor-0.1.127.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.2
 Name: metacountregressor
-Version: 0.1.124
+Version: 0.1.127
 Summary: Extensions for a Python package for estimation of count models.
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern
-Author-email: zeke.ahern@hdr.qut.edu.au
+Author-email: z.ahern@qut.edu.au
 License: QUT
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown

{metacountregressor-0.1.124.dist-info → metacountregressor-0.1.127.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
 metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
 metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
 metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
-metacountregressor/helperprocess.py,sha256=4aSoyKP1GfzjwCzZ_dXlTbokOiMt_8sbzB6_tu0GPDg,16290
+metacountregressor/helperprocess.py,sha256=gzZ6g_uzctQeuyzqj3mV1n1Y4Kw8CLYK02Dkoz_fV9w,17041
 metacountregressor/main.py,sha256=A3XGwbwhhKVgMxnEgbAmMpgYaWkS8Rk30-cYs3FxvEk,22713
 metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
 metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
@@ -11,10 +11,10 @@ metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiL
 metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
 metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,936
 metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
-metacountregressor/solution.py,sha256=OJqB00cvGMLFei6RsjphPamOdLm3EWOOzK7k-uVbvFY,277671
+metacountregressor/solution.py,sha256=_InL6f_DHPuP518NfYXhQJQwDejR5CVEXdWyU1kKY7E,277562
 metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
-metacountregressor-0.1.124.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-metacountregressor-0.1.124.dist-info/METADATA,sha256=8d5RG71XKfReAVDxLlVwVbDMJXZqnCEElx7AUwrY_2E,23441
-metacountregressor-0.1.124.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-metacountregressor-0.1.124.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
-metacountregressor-0.1.124.dist-info/RECORD,,
+metacountregressor-0.1.127.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+metacountregressor-0.1.127.dist-info/METADATA,sha256=oQWg23dV1Dww0iSx684RxlhJyAKwn0Ud6PVNcdKt6LQ,23434
+metacountregressor-0.1.127.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+metacountregressor-0.1.127.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
+metacountregressor-0.1.127.dist-info/RECORD,,

{metacountregressor-0.1.124.dist-info → metacountregressor-0.1.127.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.124.dist-info → metacountregressor-0.1.127.dist-info}/WHEEL RENAMED Viewed

File without changes

{metacountregressor-0.1.124.dist-info → metacountregressor-0.1.127.dist-info}/top_level.txt RENAMED Viewed

File without changes

metacountregressor 0.1.124__py3-none-any.whl → 0.1.127__py3-none-any.whl

metacountregressor 0.1.124py3-none-any.whl → 0.1.127py3-none-any.whl