metacountregressor 0.1.132__py3-none-any.whl → 0.1.135__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -183,15 +183,28 @@ config = {
183
183
  # Function to guess Low, Medium, High ranges
184
184
  def guess_low_medium_high(column_name, series):
185
185
  # Compute the tertiles (33rd and 66th percentiles)
186
- print('why')
186
+ print('did it make it...')
187
187
  mode_value = np.mode(series) # Get the most frequent value
188
+ print('good')
188
189
  series = pd.to_numeric(series, errors='coerce').fillna(mode_value)
189
190
  low_threshold = np.quantile(series, 0.33)
190
191
  high_threshold = np.quantile(series,0.66)
191
192
 
192
193
  # Define the bins and labels
193
194
  bins = [np.min(series) - 1, low_threshold, high_threshold, np.max(series)]
194
- labels = ['Low', 'Medium', 'High']
195
+ # Handle duplicate bins by adjusting labels
196
+ if len(set(bins)) < len(bins): # Check for duplicate bin edges
197
+ if low_threshold == high_threshold:
198
+ # Collapse to two bins (Low and High)
199
+ bins = [np.min(series) - 1, low_threshold, np.max(series)]
200
+ labels = ['Low', 'High']
201
+ else:
202
+ # Collapse to three unique bins
203
+ bins = sorted(set(bins)) # Remove duplicate edges
204
+ labels = [f'Bin {i + 1}' for i in range(len(bins) - 1)]
205
+ else:
206
+ # Standard case: Low, Medium, High
207
+ labels = ['Low', 'Medium', 'High']
195
208
 
196
209
  return {
197
210
  'type': 'bin',
@@ -210,7 +223,8 @@ def transform_dataframe(df, config):
210
223
  df[column],
211
224
  bins=settings['bins'],
212
225
  labels=settings['labels'],
213
- right=False
226
+ right=False,
227
+
214
228
  )
215
229
  # One-hot encode the binned column
216
230
  binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
@@ -250,12 +264,14 @@ def guess_column_type(column_name, series):
250
264
  return {'type': 'one-hot', 'prefix': column_name}
251
265
  elif pd.api.types.is_numeric_dtype(series):
252
266
  unique_values = series.nunique()
267
+
253
268
  if unique_values < 5:
254
269
  return {'type': 'one-hot', 'prefix': column_name}
255
270
 
256
271
  elif np.max(series) - np.min(series) > 20:
272
+ print('made it through here')
257
273
  # If there are few unique values, assume binning with default bins
258
- guess_low_medium_high(column_name,series)
274
+ return guess_low_medium_high(column_name,series)
259
275
  else:
260
276
  # # Otherwise, assume continuous data with normalization
261
277
  # Otherwise, fallback to continuous standardization
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.132
3
+ Version: 0.1.135
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -3,7 +3,7 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
3
3
  metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
4
4
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
5
5
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
6
- metacountregressor/helperprocess.py,sha256=A_DMW2uQdEpMvjI40gsGcfwDw5TD5wbET0pghbsLAAQ,17316
6
+ metacountregressor/helperprocess.py,sha256=rzqmb9qs9xHAjMbUf7iX0OSW3qdPqWrBtZCbsyqsMwQ,17964
7
7
  metacountregressor/main.py,sha256=_MVROd1y8qIhvGnG1iFzHw4_2e6-8INjXHDnYlDSLy8,22714
8
8
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
9
9
  metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
@@ -13,8 +13,8 @@ metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,9
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
14
  metacountregressor/solution.py,sha256=6qAtCSKNvmrCpIpBCzp2Zt8wWKZBHRQtTriyiMHUo54,277519
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.132.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.132.dist-info/METADATA,sha256=UfcorXlvrb1fAVeFjcInPbvt1FvgWNG8HorEMSYaJec,23434
18
- metacountregressor-0.1.132.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- metacountregressor-0.1.132.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.132.dist-info/RECORD,,
16
+ metacountregressor-0.1.135.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.135.dist-info/METADATA,sha256=Ga9IAdM6l7jn7lDJ7xPXNeCZQMpHKhmJwoaReS5Ditg,23434
18
+ metacountregressor-0.1.135.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ metacountregressor-0.1.135.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.135.dist-info/RECORD,,