metacountregressor 0.1.132__py3-none-any.whl → 0.1.135__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/helperprocess.py +20 -4
- {metacountregressor-0.1.132.dist-info → metacountregressor-0.1.135.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.132.dist-info → metacountregressor-0.1.135.dist-info}/RECORD +6 -6
- {metacountregressor-0.1.132.dist-info → metacountregressor-0.1.135.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.132.dist-info → metacountregressor-0.1.135.dist-info}/WHEEL +0 -0
- {metacountregressor-0.1.132.dist-info → metacountregressor-0.1.135.dist-info}/top_level.txt +0 -0
@@ -183,15 +183,28 @@ config = {
|
|
183
183
|
# Function to guess Low, Medium, High ranges
|
184
184
|
def guess_low_medium_high(column_name, series):
|
185
185
|
# Compute the tertiles (33rd and 66th percentiles)
|
186
|
-
print('
|
186
|
+
print('did it make it...')
|
187
187
|
mode_value = np.mode(series) # Get the most frequent value
|
188
|
+
print('good')
|
188
189
|
series = pd.to_numeric(series, errors='coerce').fillna(mode_value)
|
189
190
|
low_threshold = np.quantile(series, 0.33)
|
190
191
|
high_threshold = np.quantile(series,0.66)
|
191
192
|
|
192
193
|
# Define the bins and labels
|
193
194
|
bins = [np.min(series) - 1, low_threshold, high_threshold, np.max(series)]
|
194
|
-
|
195
|
+
# Handle duplicate bins by adjusting labels
|
196
|
+
if len(set(bins)) < len(bins): # Check for duplicate bin edges
|
197
|
+
if low_threshold == high_threshold:
|
198
|
+
# Collapse to two bins (Low and High)
|
199
|
+
bins = [np.min(series) - 1, low_threshold, np.max(series)]
|
200
|
+
labels = ['Low', 'High']
|
201
|
+
else:
|
202
|
+
# Collapse to three unique bins
|
203
|
+
bins = sorted(set(bins)) # Remove duplicate edges
|
204
|
+
labels = [f'Bin {i + 1}' for i in range(len(bins) - 1)]
|
205
|
+
else:
|
206
|
+
# Standard case: Low, Medium, High
|
207
|
+
labels = ['Low', 'Medium', 'High']
|
195
208
|
|
196
209
|
return {
|
197
210
|
'type': 'bin',
|
@@ -210,7 +223,8 @@ def transform_dataframe(df, config):
|
|
210
223
|
df[column],
|
211
224
|
bins=settings['bins'],
|
212
225
|
labels=settings['labels'],
|
213
|
-
right=False
|
226
|
+
right=False,
|
227
|
+
|
214
228
|
)
|
215
229
|
# One-hot encode the binned column
|
216
230
|
binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
|
@@ -250,12 +264,14 @@ def guess_column_type(column_name, series):
|
|
250
264
|
return {'type': 'one-hot', 'prefix': column_name}
|
251
265
|
elif pd.api.types.is_numeric_dtype(series):
|
252
266
|
unique_values = series.nunique()
|
267
|
+
|
253
268
|
if unique_values < 5:
|
254
269
|
return {'type': 'one-hot', 'prefix': column_name}
|
255
270
|
|
256
271
|
elif np.max(series) - np.min(series) > 20:
|
272
|
+
print('made it through here')
|
257
273
|
# If there are few unique values, assume binning with default bins
|
258
|
-
guess_low_medium_high(column_name,series)
|
274
|
+
return guess_low_medium_high(column_name,series)
|
259
275
|
else:
|
260
276
|
# # Otherwise, assume continuous data with normalization
|
261
277
|
# Otherwise, fallback to continuous standardization
|
@@ -3,7 +3,7 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
3
3
|
metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
|
4
4
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
5
5
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
6
|
-
metacountregressor/helperprocess.py,sha256=
|
6
|
+
metacountregressor/helperprocess.py,sha256=rzqmb9qs9xHAjMbUf7iX0OSW3qdPqWrBtZCbsyqsMwQ,17964
|
7
7
|
metacountregressor/main.py,sha256=_MVROd1y8qIhvGnG1iFzHw4_2e6-8INjXHDnYlDSLy8,22714
|
8
8
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
9
9
|
metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
|
@@ -13,8 +13,8 @@ metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,9
|
|
13
13
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
14
14
|
metacountregressor/solution.py,sha256=6qAtCSKNvmrCpIpBCzp2Zt8wWKZBHRQtTriyiMHUo54,277519
|
15
15
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
16
|
-
metacountregressor-0.1.
|
17
|
-
metacountregressor-0.1.
|
18
|
-
metacountregressor-0.1.
|
19
|
-
metacountregressor-0.1.
|
20
|
-
metacountregressor-0.1.
|
16
|
+
metacountregressor-0.1.135.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
metacountregressor-0.1.135.dist-info/METADATA,sha256=Ga9IAdM6l7jn7lDJ7xPXNeCZQMpHKhmJwoaReS5Ditg,23434
|
18
|
+
metacountregressor-0.1.135.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
19
|
+
metacountregressor-0.1.135.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
20
|
+
metacountregressor-0.1.135.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|