metacountregressor 0.1.121__py3-none-any.whl → 0.1.123__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/helperprocess.py +96 -0
- metacountregressor/main.py +64 -8
- {metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/RECORD +7 -7
- {metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/WHEEL +0 -0
- {metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@ import numpy as np
|
|
2
2
|
import pandas as pd
|
3
3
|
import csv
|
4
4
|
import matplotlib.pyplot as plt
|
5
|
+
from sklearn.preprocessing import StandardScaler
|
5
6
|
|
6
7
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
7
8
|
|
@@ -151,6 +152,99 @@ def remove_files(yes=1):
|
|
151
152
|
os.remove('pop_log.csv')
|
152
153
|
|
153
154
|
|
155
|
+
# Function to process the DataFrame
|
156
|
+
'''
|
157
|
+
Example usuage
|
158
|
+
# Configuration dictionary
|
159
|
+
config = {
|
160
|
+
'Age': {
|
161
|
+
'type': 'bin',
|
162
|
+
'bins': [0, 18, 35, 50, 100],
|
163
|
+
'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
|
164
|
+
'prefix': 'Age_Binned'
|
165
|
+
},
|
166
|
+
'Income': {
|
167
|
+
'type': 'bin',
|
168
|
+
'bins': [0, 2000, 5000, 10000],
|
169
|
+
'labels': ['Low', 'Medium', 'High'],
|
170
|
+
'prefix': 'Income_Binned'
|
171
|
+
},
|
172
|
+
'Gender': {
|
173
|
+
'type': 'one-hot',
|
174
|
+
'prefix': 'Gender'
|
175
|
+
},
|
176
|
+
'Score': {
|
177
|
+
'type': 'none'
|
178
|
+
}
|
179
|
+
}
|
180
|
+
'''
|
181
|
+
|
182
|
+
|
183
|
+
def transform_dataframe(df, config):
|
184
|
+
output_df = pd.DataFrame()
|
185
|
+
|
186
|
+
for column, settings in config.items():
|
187
|
+
if settings['type'] == 'bin':
|
188
|
+
# Apply binning
|
189
|
+
binned = pd.cut(
|
190
|
+
df[column],
|
191
|
+
bins=settings['bins'],
|
192
|
+
labels=settings['labels'],
|
193
|
+
right=False
|
194
|
+
)
|
195
|
+
# One-hot encode the binned column
|
196
|
+
binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
|
197
|
+
output_df = pd.concat([output_df, binned_dummies], axis=1)
|
198
|
+
|
199
|
+
elif settings['type'] == 'one-hot':
|
200
|
+
# One-hot encode the column
|
201
|
+
one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
|
202
|
+
output_df = pd.concat([output_df, one_hot_dummies], axis=1)
|
203
|
+
|
204
|
+
elif settings['type'] == 'continuous':
|
205
|
+
# Apply function to continuous data
|
206
|
+
data = df[column]
|
207
|
+
if 'bounds' in settings:
|
208
|
+
# Apply bounds filtering
|
209
|
+
lower, upper = settings['bounds']
|
210
|
+
data = data[(data >= lower) & (data <= upper)]
|
211
|
+
if 'apply_func' in settings:
|
212
|
+
# Apply custom function
|
213
|
+
data = data.apply(settings['apply_func'])
|
214
|
+
output_df[column] = data
|
215
|
+
|
216
|
+
elif settings['type'] == 'none':
|
217
|
+
# Leave the column unchanged
|
218
|
+
output_df = pd.concat([output_df, df[[column]]], axis=1)
|
219
|
+
|
220
|
+
return output_df
|
221
|
+
|
222
|
+
# Helper function to guess column type and update `config`
|
223
|
+
def guess_column_type(column_name, series):
|
224
|
+
if series.dtype == 'object' or series.dtype.name == 'category':
|
225
|
+
# If the column is categorical (e.g., strings), assume one-hot encoding
|
226
|
+
return {'type': 'one-hot', 'prefix': column_name}
|
227
|
+
elif pd.api.types.is_numeric_dtype(series):
|
228
|
+
unique_values = series.nunique()
|
229
|
+
if unique_values < 10:
|
230
|
+
# If there are few unique values, assume binning with default bins
|
231
|
+
min_val, max_val = series.min(), series.max()
|
232
|
+
bins = np.linspace(min_val, max_val, num=unique_values + 1)
|
233
|
+
labels = [f'Bin_{i}' for i in range(1, len(bins))]
|
234
|
+
return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
|
235
|
+
else:
|
236
|
+
# # Otherwise, assume continuous data with normalization
|
237
|
+
# Otherwise, fallback to continuous standardization
|
238
|
+
return {
|
239
|
+
'type': 'continuous',
|
240
|
+
'apply_func': (lambda x: (x - series.mean()) / series.std()) # Z-Score Standardization
|
241
|
+
}
|
242
|
+
else:
|
243
|
+
# Default fallback (leave the column unchanged)
|
244
|
+
return {'type': 'none'}
|
245
|
+
|
246
|
+
|
247
|
+
|
154
248
|
def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
|
155
249
|
if not yes:
|
156
250
|
return x_df
|
@@ -330,3 +424,5 @@ def entries_to_remove(entries, the_dict):
|
|
330
424
|
for key in entries:
|
331
425
|
if key in the_dict:
|
332
426
|
del the_dict[key]
|
427
|
+
|
428
|
+
|
metacountregressor/main.py
CHANGED
@@ -28,12 +28,60 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
28
28
|
return df
|
29
29
|
|
30
30
|
|
31
|
-
def process_arguments():
|
31
|
+
def process_arguments(**kwargs):
|
32
32
|
'''
|
33
33
|
TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
|
34
34
|
'''
|
35
|
-
|
36
|
-
|
35
|
+
#dataset
|
36
|
+
if kwargs.get('dataset_file', False
|
37
|
+
):
|
38
|
+
dataset = pd.read_csv(kwargs.get('dataset_file'))
|
39
|
+
named_data_headers = dataset.columns.tolist()
|
40
|
+
decision_constants = {name: list(range(7)) for name in named_data_headers}
|
41
|
+
data_info = {
|
42
|
+
|
43
|
+
|
44
|
+
'AADT': {
|
45
|
+
'type': 'continuous',
|
46
|
+
'bounds': [0.0, np.infty],
|
47
|
+
'discrete': False,
|
48
|
+
'apply_func': (lambda x: np.log(x + 1)),
|
49
|
+
},
|
50
|
+
'SPEED': {
|
51
|
+
'type': 'continuous',
|
52
|
+
'bounds': [0, 100],
|
53
|
+
'enforce_bounds': True,
|
54
|
+
'discrete': True
|
55
|
+
},
|
56
|
+
'TIME': {
|
57
|
+
'type': 'continuous',
|
58
|
+
'bounds': [0, 23.999],
|
59
|
+
'discrete': False
|
60
|
+
}
|
61
|
+
}
|
62
|
+
#remove ID CoLUMNS from dataset
|
63
|
+
dataset = dataset.drop(columns = [
|
64
|
+
'ID'
|
65
|
+
])
|
66
|
+
for c in dataset.columns:
|
67
|
+
if c not in data_info.keys():
|
68
|
+
data_info[c] = {'type': 'categorical'}
|
69
|
+
|
70
|
+
data_new =helperprocess.transform_dataframe(dataset,data_info)
|
71
|
+
|
72
|
+
update_constant = kwargs.get('analyst_constraints')
|
73
|
+
#update the decision_constraints
|
74
|
+
|
75
|
+
data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
|
76
|
+
# Extract the column as a list of characteristic names
|
77
|
+
name_data_characteristics = data_characteristic.columns.tolist()
|
78
|
+
|
79
|
+
# Create the dictionary
|
80
|
+
decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
|
81
|
+
|
82
|
+
print('this gets all the features, I need to remove...')
|
83
|
+
|
84
|
+
analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
|
37
85
|
hyper = pd.read_csv('setup_hyper.csv')
|
38
86
|
|
39
87
|
new_data = {'data': data_characteristic,
|
@@ -41,7 +89,7 @@ def process_arguments():
|
|
41
89
|
'hyper': hyper}
|
42
90
|
return new_data
|
43
91
|
|
44
|
-
def
|
92
|
+
def process_package_arguments():
|
45
93
|
|
46
94
|
new_data = {}
|
47
95
|
pass
|
@@ -319,8 +367,8 @@ def main(args, **kwargs):
|
|
319
367
|
x_df = helperprocess.interactions(x_df, keep)
|
320
368
|
|
321
369
|
|
322
|
-
|
323
|
-
data_info = process_arguments()
|
370
|
+
elif dataset ==10: # the dataset has been selected in the program as something else
|
371
|
+
data_info = process_arguments(**args)
|
324
372
|
data_info['hyper']
|
325
373
|
data_info['analyst']
|
326
374
|
data_info['data']['Y']
|
@@ -339,6 +387,10 @@ def main(args, **kwargs):
|
|
339
387
|
y_df = df[[data_info['data']['Y'][0]]]
|
340
388
|
y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
|
341
389
|
print('test') #FIXME
|
390
|
+
else:
|
391
|
+
print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
|
392
|
+
data_info =process_package_arguments()
|
393
|
+
|
342
394
|
|
343
395
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
344
396
|
if manual_fit_spec is None:
|
@@ -449,6 +501,8 @@ if __name__ == '__main__':
|
|
449
501
|
BATCH_JOB = True
|
450
502
|
|
451
503
|
if BATCH_JOB:
|
504
|
+
parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
|
505
|
+
|
452
506
|
parser.add_argument('-line', type=int, default=1,
|
453
507
|
help='line to read in csv to pass in argument')
|
454
508
|
|
@@ -463,6 +517,7 @@ if __name__ == '__main__':
|
|
463
517
|
line_number_obs += 1
|
464
518
|
args = dict(args)
|
465
519
|
|
520
|
+
|
466
521
|
for key, value in args.items():
|
467
522
|
try:
|
468
523
|
# Attempt to parse the string value to a Python literal if value is a string.
|
@@ -479,7 +534,7 @@ if __name__ == '__main__':
|
|
479
534
|
if "-algorithm" in action.option_strings:
|
480
535
|
parser._optionals._actions[i].help = "optimization algorithm"
|
481
536
|
|
482
|
-
override =
|
537
|
+
override = True
|
483
538
|
if override:
|
484
539
|
print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
|
485
540
|
parser.add_argument('-problem_number', default='10')
|
@@ -494,9 +549,10 @@ if __name__ == '__main__':
|
|
494
549
|
parser.add_argument('-seperate_out_factors', action='store_false', default=False,
|
495
550
|
help='Trie of wanting to split data that is potentially categorical as binary'
|
496
551
|
' we want to split the data for processing')
|
497
|
-
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full
|
552
|
+
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
|
498
553
|
|
499
554
|
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
555
|
+
print("RUNNING WITH ARGS")
|
500
556
|
parser.add_argument('-com', type=str, default='MetaCode',
|
501
557
|
help='line to read csv')
|
502
558
|
|
@@ -3,8 +3,8 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
|
|
3
3
|
metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
|
4
4
|
metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
|
5
5
|
metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
|
6
|
-
metacountregressor/helperprocess.py,sha256=
|
7
|
-
metacountregressor/main.py,sha256=
|
6
|
+
metacountregressor/helperprocess.py,sha256=4aSoyKP1GfzjwCzZ_dXlTbokOiMt_8sbzB6_tu0GPDg,16290
|
7
|
+
metacountregressor/main.py,sha256=A3XGwbwhhKVgMxnEgbAmMpgYaWkS8Rk30-cYs3FxvEk,22713
|
8
8
|
metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
|
9
9
|
metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
|
10
10
|
metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
|
@@ -13,8 +13,8 @@ metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,9
|
|
13
13
|
metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
|
14
14
|
metacountregressor/solution.py,sha256=OJqB00cvGMLFei6RsjphPamOdLm3EWOOzK7k-uVbvFY,277671
|
15
15
|
metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
|
16
|
-
metacountregressor-0.1.
|
17
|
-
metacountregressor-0.1.
|
18
|
-
metacountregressor-0.1.
|
19
|
-
metacountregressor-0.1.
|
20
|
-
metacountregressor-0.1.
|
16
|
+
metacountregressor-0.1.123.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
17
|
+
metacountregressor-0.1.123.dist-info/METADATA,sha256=e4jQ9vtFxhHtA98q1Vd8PJ9gJiIz91iSUKgGPt78kg8,23415
|
18
|
+
metacountregressor-0.1.123.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
19
|
+
metacountregressor-0.1.123.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
|
20
|
+
metacountregressor-0.1.123.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|