metacountregressor 0.1.122__tar.gz → 0.1.125__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (27) hide show
  1. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/PKG-INFO +2 -2
  2. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/helperprocess.py +38 -8
  3. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/solution.py +4 -5
  4. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/PKG-INFO +2 -2
  5. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/setup.py +3 -2
  6. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/LICENSE.txt +0 -0
  7. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/README.rst +0 -0
  8. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/__init__.py +0 -0
  9. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/_device_cust.py +0 -0
  10. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/app_main.py +0 -0
  11. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/data_split_helper.py +0 -0
  12. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/halton.py +0 -0
  13. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/main.py +0 -0
  14. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/main_old.py +0 -0
  15. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/metaheuristics.py +0 -0
  16. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/pareto_file.py +0 -0
  17. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/pareto_logger__plot.py +0 -0
  18. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/setup.py +0 -0
  19. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/single_objective_finder.py +0 -0
  20. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor/test_generated_paper2.py +0 -0
  21. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/SOURCES.txt +0 -0
  22. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/dependency_links.txt +0 -0
  23. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/not-zip-safe +0 -0
  24. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/requires.txt +0 -0
  25. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/metacountregressor.egg-info/top_level.txt +0 -0
  26. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/setup.cfg +0 -0
  27. {metacountregressor-0.1.122 → metacountregressor-0.1.125}/tests/test.py +0 -0
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.122
3
+ Version: 0.1.125
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
7
- Author-email: zeke.ahern@hdr.qut.edu.au
7
+ Author-email: z.ahern@qut.edu.au
8
8
  License: QUT
9
9
  Requires-Python: >=3.10
10
10
  Description-Content-Type: text/markdown
@@ -180,6 +180,23 @@ config = {
180
180
  '''
181
181
 
182
182
 
183
+ # Function to guess Low, Medium, High ranges
184
+ def guess_low_medium_high(column_name, series):
185
+ # Compute the tertiles (33rd and 66th percentiles)
186
+ low_threshold = series.quantile(0.33)
187
+ high_threshold = series.quantile(0.66)
188
+
189
+ # Define the bins and labels
190
+ bins = [series.min() - 1, low_threshold, high_threshold, series.max()]
191
+ labels = ['Low', 'Medium', 'High']
192
+
193
+ return {
194
+ 'type': 'bin',
195
+ 'bins': bins,
196
+ 'labels': labels,
197
+ 'prefix': f'{column_name}_Binned'
198
+ }
199
+
183
200
  def transform_dataframe(df, config):
184
201
  output_df = pd.DataFrame()
185
202
 
@@ -226,12 +243,13 @@ def guess_column_type(column_name, series):
226
243
  return {'type': 'one-hot', 'prefix': column_name}
227
244
  elif pd.api.types.is_numeric_dtype(series):
228
245
  unique_values = series.nunique()
229
- if unique_values < 10:
246
+ if unique_values < 5:
247
+ return {'type': 'one-hot', 'prefix': column_name}
248
+
249
+
250
+ elif series.range() > 20:
230
251
  # If there are few unique values, assume binning with default bins
231
- min_val, max_val = series.min(), series.max()
232
- bins = np.linspace(min_val, max_val, num=unique_values + 1)
233
- labels = [f'Bin_{i}' for i in range(1, len(bins))]
234
- return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
252
+ guess_low_medium_high(column_name,series)
235
253
  else:
236
254
  # # Otherwise, assume continuous data with normalization
237
255
  # Otherwise, fallback to continuous standardization
@@ -293,14 +311,26 @@ def interactions(df, keep=None, drop_this_perc=0.6, interact = False):
293
311
  df = pd.concat([df, df_interactions], axis=1, sort=False)
294
312
 
295
313
  # second
296
- corr_matrix = df.corr().abs()
314
+ # Remove `keep` columns from the correlation matrix
315
+ if keep is not None:
316
+ df_corr = df.drop(columns=keep, errors='ignore') # Exclude `keep` columns
317
+ else:
318
+ df_corr = df
319
+
320
+ # Compute the absolute correlation matrix
321
+ corr_matrix = df_corr.corr().abs()
322
+
323
+ # Keep only the upper triangle of the correlation matrix
297
324
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
298
325
 
299
- # Find features with correlation greater than 0.6
326
+ # Find features with correlation greater than the threshold
300
327
  to_drop = [column for column in upper.columns if any(upper[column] > drop_this_perc)]
328
+
329
+ # Ensure `keep` columns are not dropped
301
330
  if keep is not None:
302
331
  to_drop = [column for column in to_drop if column not in keep]
303
- # Drop features
332
+
333
+ # Drop the identified features
304
334
  df.drop(to_drop, axis=1, inplace=True)
305
335
 
306
336
  return df
@@ -2983,7 +2983,7 @@ class ObjectiveFunction(object):
2983
2983
  argument = prob.mean(axis=1)
2984
2984
  # if less than 0 penalise
2985
2985
  if np.min(argument) < 0:
2986
- print('what the fuck')
2986
+ print('Error with args..')
2987
2987
  if np.min(argument) < limit:
2988
2988
  # add a penalty for too small argument of log
2989
2989
  log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3473,8 +3473,7 @@ class ObjectiveFunction(object):
3473
3473
  corr_pairs = list(itertools.combinations(self.Kr, 2))
3474
3474
  else:
3475
3475
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3476
- if len(corr_pairs) >0:
3477
- print('maybe get the terms here')
3476
+
3478
3477
 
3479
3478
  for ii, corr_pair in enumerate(corr_pairs):
3480
3479
  # lower cholesky matrix
@@ -3635,7 +3634,7 @@ class ObjectiveFunction(object):
3635
3634
  #print("Custom functieon time:", end_time - start_time)
3636
3635
 
3637
3636
  except Exception as e:
3638
- print(e)
3637
+ print("Neg Binom error.")
3639
3638
  return gg_alt
3640
3639
 
3641
3640
  def lindley_pmf(self, x, r, theta, k=50):
@@ -3969,7 +3968,7 @@ class ObjectiveFunction(object):
3969
3968
 
3970
3969
 
3971
3970
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3972
- # print('fuck if this actually works')
3971
+
3973
3972
 
3974
3973
  elif dispersion == 2:
3975
3974
 
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.122
3
+ Version: 0.1.125
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
7
- Author-email: zeke.ahern@hdr.qut.edu.au
7
+ Author-email: z.ahern@qut.edu.au
8
8
  License: QUT
9
9
  Requires-Python: >=3.10
10
10
  Description-Content-Type: text/markdown
@@ -53,7 +53,7 @@ setuptools.setup(
53
53
  long_description_content_type='text/markdown', # Specify the content type as Markdown
54
54
  url='https://github.com/zahern/CountDataEstimation',
55
55
  author='Zeke Ahern',
56
- author_email='zeke.ahern@hdr.qut.edu.au',
56
+ author_email='z.ahern@qut.edu.au',
57
57
  license='QUT',
58
58
  packages=['metacountregressor'],
59
59
  zip_safe=False,
@@ -61,6 +61,7 @@ setuptools.setup(
61
61
  install_requires=[
62
62
  'numpy>=1.13.1',
63
63
  'scipy>=1.0.0',
64
- 'requests','latextable'
64
+ 'requests',
65
+ 'latextable'
65
66
  ]
66
67
  )