metacountregressor 0.1.119__tar.gz → 0.1.121__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/PKG-INFO +1 -1
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/helperprocess.py +96 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/main.py +64 -8
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/PKG-INFO +1 -1
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/LICENSE.txt +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/README.rst +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/__init__.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/_device_cust.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/app_main.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/data_split_helper.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/halton.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/main_old.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/metaheuristics.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/pareto_file.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/pareto_logger__plot.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/setup.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/single_objective_finder.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/solution.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/test_generated_paper2.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/SOURCES.txt +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/dependency_links.txt +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/not-zip-safe +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/requires.txt +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/top_level.txt +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/setup.cfg +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/setup.py +0 -0
- {metacountregressor-0.1.119 → metacountregressor-0.1.121}/tests/test.py +0 -0
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/helperprocess.py
RENAMED
@@ -2,6 +2,7 @@ import numpy as np
|
|
2
2
|
import pandas as pd
|
3
3
|
import csv
|
4
4
|
import matplotlib.pyplot as plt
|
5
|
+
from sklearn.preprocessing import StandardScaler
|
5
6
|
|
6
7
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
7
8
|
|
@@ -151,6 +152,99 @@ def remove_files(yes=1):
|
|
151
152
|
os.remove('pop_log.csv')
|
152
153
|
|
153
154
|
|
155
|
+
# Function to process the DataFrame
|
156
|
+
'''
|
157
|
+
Example usuage
|
158
|
+
# Configuration dictionary
|
159
|
+
config = {
|
160
|
+
'Age': {
|
161
|
+
'type': 'bin',
|
162
|
+
'bins': [0, 18, 35, 50, 100],
|
163
|
+
'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
|
164
|
+
'prefix': 'Age_Binned'
|
165
|
+
},
|
166
|
+
'Income': {
|
167
|
+
'type': 'bin',
|
168
|
+
'bins': [0, 2000, 5000, 10000],
|
169
|
+
'labels': ['Low', 'Medium', 'High'],
|
170
|
+
'prefix': 'Income_Binned'
|
171
|
+
},
|
172
|
+
'Gender': {
|
173
|
+
'type': 'one-hot',
|
174
|
+
'prefix': 'Gender'
|
175
|
+
},
|
176
|
+
'Score': {
|
177
|
+
'type': 'none'
|
178
|
+
}
|
179
|
+
}
|
180
|
+
'''
|
181
|
+
|
182
|
+
|
183
|
+
def transform_dataframe(df, config):
|
184
|
+
output_df = pd.DataFrame()
|
185
|
+
|
186
|
+
for column, settings in config.items():
|
187
|
+
if settings['type'] == 'bin':
|
188
|
+
# Apply binning
|
189
|
+
binned = pd.cut(
|
190
|
+
df[column],
|
191
|
+
bins=settings['bins'],
|
192
|
+
labels=settings['labels'],
|
193
|
+
right=False
|
194
|
+
)
|
195
|
+
# One-hot encode the binned column
|
196
|
+
binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
|
197
|
+
output_df = pd.concat([output_df, binned_dummies], axis=1)
|
198
|
+
|
199
|
+
elif settings['type'] == 'one-hot':
|
200
|
+
# One-hot encode the column
|
201
|
+
one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
|
202
|
+
output_df = pd.concat([output_df, one_hot_dummies], axis=1)
|
203
|
+
|
204
|
+
elif settings['type'] == 'continuous':
|
205
|
+
# Apply function to continuous data
|
206
|
+
data = df[column]
|
207
|
+
if 'bounds' in settings:
|
208
|
+
# Apply bounds filtering
|
209
|
+
lower, upper = settings['bounds']
|
210
|
+
data = data[(data >= lower) & (data <= upper)]
|
211
|
+
if 'apply_func' in settings:
|
212
|
+
# Apply custom function
|
213
|
+
data = data.apply(settings['apply_func'])
|
214
|
+
output_df[column] = data
|
215
|
+
|
216
|
+
elif settings['type'] == 'none':
|
217
|
+
# Leave the column unchanged
|
218
|
+
output_df = pd.concat([output_df, df[[column]]], axis=1)
|
219
|
+
|
220
|
+
return output_df
|
221
|
+
|
222
|
+
# Helper function to guess column type and update `config`
|
223
|
+
def guess_column_type(column_name, series):
|
224
|
+
if series.dtype == 'object' or series.dtype.name == 'category':
|
225
|
+
# If the column is categorical (e.g., strings), assume one-hot encoding
|
226
|
+
return {'type': 'one-hot', 'prefix': column_name}
|
227
|
+
elif pd.api.types.is_numeric_dtype(series):
|
228
|
+
unique_values = series.nunique()
|
229
|
+
if unique_values < 10:
|
230
|
+
# If there are few unique values, assume binning with default bins
|
231
|
+
min_val, max_val = series.min(), series.max()
|
232
|
+
bins = np.linspace(min_val, max_val, num=unique_values + 1)
|
233
|
+
labels = [f'Bin_{i}' for i in range(1, len(bins))]
|
234
|
+
return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
|
235
|
+
else:
|
236
|
+
# # Otherwise, assume continuous data with normalization
|
237
|
+
# Otherwise, fallback to continuous standardization
|
238
|
+
return {
|
239
|
+
'type': 'continuous',
|
240
|
+
'apply_func': (lambda x: (x - series.mean()) / series.std()) # Z-Score Standardization
|
241
|
+
}
|
242
|
+
else:
|
243
|
+
# Default fallback (leave the column unchanged)
|
244
|
+
return {'type': 'none'}
|
245
|
+
|
246
|
+
|
247
|
+
|
154
248
|
def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
|
155
249
|
if not yes:
|
156
250
|
return x_df
|
@@ -330,3 +424,5 @@ def entries_to_remove(entries, the_dict):
|
|
330
424
|
for key in entries:
|
331
425
|
if key in the_dict:
|
332
426
|
del the_dict[key]
|
427
|
+
|
428
|
+
|
@@ -28,12 +28,60 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
28
28
|
return df
|
29
29
|
|
30
30
|
|
31
|
-
def process_arguments():
|
31
|
+
def process_arguments(**kwargs):
|
32
32
|
'''
|
33
33
|
TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
|
34
34
|
'''
|
35
|
-
|
36
|
-
|
35
|
+
#dataset
|
36
|
+
if kwargs.get('dataset_file', False
|
37
|
+
):
|
38
|
+
dataset = pd.read_csv(kwargs.get('dataset_file'))
|
39
|
+
named_data_headers = dataset.columns.tolist()
|
40
|
+
decision_constants = {name: list(range(7)) for name in named_data_headers}
|
41
|
+
data_info = {
|
42
|
+
|
43
|
+
|
44
|
+
'AADT': {
|
45
|
+
'type': 'continuous',
|
46
|
+
'bounds': [0.0, np.infty],
|
47
|
+
'discrete': False,
|
48
|
+
'apply_func': (lambda x: np.log(x + 1)),
|
49
|
+
},
|
50
|
+
'SPEED': {
|
51
|
+
'type': 'continuous',
|
52
|
+
'bounds': [0, 100],
|
53
|
+
'enforce_bounds': True,
|
54
|
+
'discrete': True
|
55
|
+
},
|
56
|
+
'TIME': {
|
57
|
+
'type': 'continuous',
|
58
|
+
'bounds': [0, 23.999],
|
59
|
+
'discrete': False
|
60
|
+
}
|
61
|
+
}
|
62
|
+
#remove ID CoLUMNS from dataset
|
63
|
+
dataset = dataset.drop(columns = [
|
64
|
+
'ID'
|
65
|
+
])
|
66
|
+
for c in dataset.columns:
|
67
|
+
if c not in data_info.keys():
|
68
|
+
data_info[c] = {'type': 'categorical'}
|
69
|
+
|
70
|
+
data_new =helperprocess.transform_dataframe(dataset,data_info)
|
71
|
+
|
72
|
+
update_constant = kwargs.get('analyst_constraints')
|
73
|
+
#update the decision_constraints
|
74
|
+
|
75
|
+
data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
|
76
|
+
# Extract the column as a list of characteristic names
|
77
|
+
name_data_characteristics = data_characteristic.columns.tolist()
|
78
|
+
|
79
|
+
# Create the dictionary
|
80
|
+
decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
|
81
|
+
|
82
|
+
print('this gets all the features, I need to remove...')
|
83
|
+
|
84
|
+
analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
|
37
85
|
hyper = pd.read_csv('setup_hyper.csv')
|
38
86
|
|
39
87
|
new_data = {'data': data_characteristic,
|
@@ -41,7 +89,7 @@ def process_arguments():
|
|
41
89
|
'hyper': hyper}
|
42
90
|
return new_data
|
43
91
|
|
44
|
-
def
|
92
|
+
def process_package_arguments():
|
45
93
|
|
46
94
|
new_data = {}
|
47
95
|
pass
|
@@ -319,8 +367,8 @@ def main(args, **kwargs):
|
|
319
367
|
x_df = helperprocess.interactions(x_df, keep)
|
320
368
|
|
321
369
|
|
322
|
-
|
323
|
-
data_info = process_arguments()
|
370
|
+
elif dataset ==10: # the dataset has been selected in the program as something else
|
371
|
+
data_info = process_arguments(**args)
|
324
372
|
data_info['hyper']
|
325
373
|
data_info['analyst']
|
326
374
|
data_info['data']['Y']
|
@@ -339,6 +387,10 @@ def main(args, **kwargs):
|
|
339
387
|
y_df = df[[data_info['data']['Y'][0]]]
|
340
388
|
y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
|
341
389
|
print('test') #FIXME
|
390
|
+
else:
|
391
|
+
print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
|
392
|
+
data_info =process_package_arguments()
|
393
|
+
|
342
394
|
|
343
395
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
344
396
|
if manual_fit_spec is None:
|
@@ -449,6 +501,8 @@ if __name__ == '__main__':
|
|
449
501
|
BATCH_JOB = True
|
450
502
|
|
451
503
|
if BATCH_JOB:
|
504
|
+
parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
|
505
|
+
|
452
506
|
parser.add_argument('-line', type=int, default=1,
|
453
507
|
help='line to read in csv to pass in argument')
|
454
508
|
|
@@ -463,6 +517,7 @@ if __name__ == '__main__':
|
|
463
517
|
line_number_obs += 1
|
464
518
|
args = dict(args)
|
465
519
|
|
520
|
+
|
466
521
|
for key, value in args.items():
|
467
522
|
try:
|
468
523
|
# Attempt to parse the string value to a Python literal if value is a string.
|
@@ -479,7 +534,7 @@ if __name__ == '__main__':
|
|
479
534
|
if "-algorithm" in action.option_strings:
|
480
535
|
parser._optionals._actions[i].help = "optimization algorithm"
|
481
536
|
|
482
|
-
override =
|
537
|
+
override = True
|
483
538
|
if override:
|
484
539
|
print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
|
485
540
|
parser.add_argument('-problem_number', default='10')
|
@@ -494,9 +549,10 @@ if __name__ == '__main__':
|
|
494
549
|
parser.add_argument('-seperate_out_factors', action='store_false', default=False,
|
495
550
|
help='Trie of wanting to split data that is potentially categorical as binary'
|
496
551
|
' we want to split the data for processing')
|
497
|
-
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full
|
552
|
+
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
|
498
553
|
|
499
554
|
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
555
|
+
print("RUNNING WITH ARGS")
|
500
556
|
parser.add_argument('-com', type=str, default='MetaCode',
|
501
557
|
help='line to read csv')
|
502
558
|
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/_device_cust.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/data_split_helper.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/metaheuristics.py
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor/pareto_logger__plot.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/not-zip-safe
RENAMED
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/requires.txt
RENAMED
File without changes
|
{metacountregressor-0.1.119 → metacountregressor-0.1.121}/metacountregressor.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|