metacountregressor 0.1.124__py3-none-any.whl → 0.1.127__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -180,6 +180,23 @@ config = {
180
180
  '''
181
181
 
182
182
 
183
+ # Function to guess Low, Medium, High ranges
184
+ def guess_low_medium_high(column_name, series):
185
+ # Compute the tertiles (33rd and 66th percentiles)
186
+ low_threshold = series.quantile(0.33)
187
+ high_threshold = series.quantile(0.66)
188
+
189
+ # Define the bins and labels
190
+ bins = [series.min() - 1, low_threshold, high_threshold, series.max()]
191
+ labels = ['Low', 'Medium', 'High']
192
+
193
+ return {
194
+ 'type': 'bin',
195
+ 'bins': bins,
196
+ 'labels': labels,
197
+ 'prefix': f'{column_name}_Binned'
198
+ }
199
+
183
200
  def transform_dataframe(df, config):
184
201
  output_df = pd.DataFrame()
185
202
 
@@ -226,12 +243,13 @@ def guess_column_type(column_name, series):
226
243
  return {'type': 'one-hot', 'prefix': column_name}
227
244
  elif pd.api.types.is_numeric_dtype(series):
228
245
  unique_values = series.nunique()
229
- if unique_values < 10:
246
+ if unique_values < 5:
247
+ return {'type': 'one-hot', 'prefix': column_name}
248
+
249
+
250
+ elif series.range() > 20:
230
251
  # If there are few unique values, assume binning with default bins
231
- min_val, max_val = series.min(), series.max()
232
- bins = np.linspace(min_val, max_val, num=unique_values + 1)
233
- labels = [f'Bin_{i}' for i in range(1, len(bins))]
234
- return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
252
+ guess_low_medium_high(column_name,series)
235
253
  else:
236
254
  # # Otherwise, assume continuous data with normalization
237
255
  # Otherwise, fallback to continuous standardization
@@ -293,14 +311,26 @@ def interactions(df, keep=None, drop_this_perc=0.6, interact = False):
293
311
  df = pd.concat([df, df_interactions], axis=1, sort=False)
294
312
 
295
313
  # second
296
- corr_matrix = df.corr().abs()
314
+ # Remove `keep` columns from the correlation matrix
315
+ if keep is not None:
316
+ df_corr = df.drop(columns=keep, errors='ignore') # Exclude `keep` columns
317
+ else:
318
+ df_corr = df
319
+
320
+ # Compute the absolute correlation matrix
321
+ corr_matrix = df_corr.corr().abs()
322
+
323
+ # Keep only the upper triangle of the correlation matrix
297
324
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
298
325
 
299
- # Find features with correlation greater than 0.6
326
+ # Find features with correlation greater than the threshold
300
327
  to_drop = [column for column in upper.columns if any(upper[column] > drop_this_perc)]
328
+
329
+ # Ensure `keep` columns are not dropped
301
330
  if keep is not None:
302
331
  to_drop = [column for column in to_drop if column not in keep]
303
- # Drop features
332
+
333
+ # Drop the identified features
304
334
  df.drop(to_drop, axis=1, inplace=True)
305
335
 
306
336
  return df
@@ -2983,7 +2983,7 @@ class ObjectiveFunction(object):
2983
2983
  argument = prob.mean(axis=1)
2984
2984
  # if less than 0 penalise
2985
2985
  if np.min(argument) < 0:
2986
- print('what the fuck')
2986
+ print('Error with args..')
2987
2987
  if np.min(argument) < limit:
2988
2988
  # add a penalty for too small argument of log
2989
2989
  log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3473,8 +3473,7 @@ class ObjectiveFunction(object):
3473
3473
  corr_pairs = list(itertools.combinations(self.Kr, 2))
3474
3474
  else:
3475
3475
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3476
- if len(corr_pairs) >0:
3477
- print('maybe get the terms here')
3476
+
3478
3477
 
3479
3478
  for ii, corr_pair in enumerate(corr_pairs):
3480
3479
  # lower cholesky matrix
@@ -3635,7 +3634,7 @@ class ObjectiveFunction(object):
3635
3634
  #print("Custom functieon time:", end_time - start_time)
3636
3635
 
3637
3636
  except Exception as e:
3638
- print(e)
3637
+ print("Neg Binom error.")
3639
3638
  return gg_alt
3640
3639
 
3641
3640
  def lindley_pmf(self, x, r, theta, k=50):
@@ -3969,7 +3968,7 @@ class ObjectiveFunction(object):
3969
3968
 
3970
3969
 
3971
3970
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3972
- # print('fuck if this actually works')
3971
+
3973
3972
 
3974
3973
  elif dispersion == 2:
3975
3974
 
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.124
3
+ Version: 0.1.127
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
7
- Author-email: zeke.ahern@hdr.qut.edu.au
7
+ Author-email: z.ahern@qut.edu.au
8
8
  License: QUT
9
9
  Requires-Python: >=3.10
10
10
  Description-Content-Type: text/markdown
@@ -3,7 +3,7 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
3
3
  metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
4
4
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
5
5
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
6
- metacountregressor/helperprocess.py,sha256=4aSoyKP1GfzjwCzZ_dXlTbokOiMt_8sbzB6_tu0GPDg,16290
6
+ metacountregressor/helperprocess.py,sha256=gzZ6g_uzctQeuyzqj3mV1n1Y4Kw8CLYK02Dkoz_fV9w,17041
7
7
  metacountregressor/main.py,sha256=A3XGwbwhhKVgMxnEgbAmMpgYaWkS8Rk30-cYs3FxvEk,22713
8
8
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
9
9
  metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
@@ -11,10 +11,10 @@ metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiL
11
11
  metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
12
12
  metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,936
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
- metacountregressor/solution.py,sha256=OJqB00cvGMLFei6RsjphPamOdLm3EWOOzK7k-uVbvFY,277671
14
+ metacountregressor/solution.py,sha256=_InL6f_DHPuP518NfYXhQJQwDejR5CVEXdWyU1kKY7E,277562
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.124.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.124.dist-info/METADATA,sha256=8d5RG71XKfReAVDxLlVwVbDMJXZqnCEElx7AUwrY_2E,23441
18
- metacountregressor-0.1.124.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- metacountregressor-0.1.124.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.124.dist-info/RECORD,,
16
+ metacountregressor-0.1.127.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.127.dist-info/METADATA,sha256=oQWg23dV1Dww0iSx684RxlhJyAKwn0Ud6PVNcdKt6LQ,23434
18
+ metacountregressor-0.1.127.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ metacountregressor-0.1.127.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.127.dist-info/RECORD,,