metacountregressor 0.1.132__py3-none-any.whl → 0.1.135__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -183,15 +183,28 @@ config = {
183
183
  # Function to guess Low, Medium, High ranges
184
184
  def guess_low_medium_high(column_name, series):
185
185
  # Compute the tertiles (33rd and 66th percentiles)
186
- print('why')
186
+ print('did it make it...')
187
187
  mode_value = np.mode(series) # Get the most frequent value
188
+ print('good')
188
189
  series = pd.to_numeric(series, errors='coerce').fillna(mode_value)
189
190
  low_threshold = np.quantile(series, 0.33)
190
191
  high_threshold = np.quantile(series,0.66)
191
192
 
192
193
  # Define the bins and labels
193
194
  bins = [np.min(series) - 1, low_threshold, high_threshold, np.max(series)]
194
- labels = ['Low', 'Medium', 'High']
195
+ # Handle duplicate bins by adjusting labels
196
+ if len(set(bins)) < len(bins): # Check for duplicate bin edges
197
+ if low_threshold == high_threshold:
198
+ # Collapse to two bins (Low and High)
199
+ bins = [np.min(series) - 1, low_threshold, np.max(series)]
200
+ labels = ['Low', 'High']
201
+ else:
202
+ # Collapse to three unique bins
203
+ bins = sorted(set(bins)) # Remove duplicate edges
204
+ labels = [f'Bin {i + 1}' for i in range(len(bins) - 1)]
205
+ else:
206
+ # Standard case: Low, Medium, High
207
+ labels = ['Low', 'Medium', 'High']
195
208
 
196
209
  return {
197
210
  'type': 'bin',
@@ -210,7 +223,8 @@ def transform_dataframe(df, config):
210
223
  df[column],
211
224
  bins=settings['bins'],
212
225
  labels=settings['labels'],
213
- right=False
226
+ right=False,
227
+
214
228
  )
215
229
  # One-hot encode the binned column
216
230
  binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
@@ -250,12 +264,14 @@ def guess_column_type(column_name, series):
250
264
  return {'type': 'one-hot', 'prefix': column_name}
251
265
  elif pd.api.types.is_numeric_dtype(series):
252
266
  unique_values = series.nunique()
267
+
253
268
  if unique_values < 5:
254
269
  return {'type': 'one-hot', 'prefix': column_name}
255
270
 
256
271
  elif np.max(series) - np.min(series) > 20:
272
+ print('made it through here')
257
273
  # If there are few unique values, assume binning with default bins
258
- guess_low_medium_high(column_name,series)
274
+ return guess_low_medium_high(column_name,series)
259
275
  else:
260
276
  # # Otherwise, assume continuous data with normalization
261
277
  # Otherwise, fallback to continuous standardization
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.132
3
+ Version: 0.1.135
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -3,7 +3,7 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
3
3
  metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
4
4
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
5
5
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
6
- metacountregressor/helperprocess.py,sha256=A_DMW2uQdEpMvjI40gsGcfwDw5TD5wbET0pghbsLAAQ,17316
6
+ metacountregressor/helperprocess.py,sha256=rzqmb9qs9xHAjMbUf7iX0OSW3qdPqWrBtZCbsyqsMwQ,17964
7
7
  metacountregressor/main.py,sha256=_MVROd1y8qIhvGnG1iFzHw4_2e6-8INjXHDnYlDSLy8,22714
8
8
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
9
9
  metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
@@ -13,8 +13,8 @@ metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,9
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
14
  metacountregressor/solution.py,sha256=6qAtCSKNvmrCpIpBCzp2Zt8wWKZBHRQtTriyiMHUo54,277519
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.132.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.132.dist-info/METADATA,sha256=UfcorXlvrb1fAVeFjcInPbvt1FvgWNG8HorEMSYaJec,23434
18
- metacountregressor-0.1.132.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- metacountregressor-0.1.132.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.132.dist-info/RECORD,,
16
+ metacountregressor-0.1.135.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.135.dist-info/METADATA,sha256=Ga9IAdM6l7jn7lDJ7xPXNeCZQMpHKhmJwoaReS5Ditg,23434
18
+ metacountregressor-0.1.135.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ metacountregressor-0.1.135.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.135.dist-info/RECORD,,