metacountregressor 0.1.121__py3-none-any.whl → 0.1.123__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,6 +2,7 @@ import numpy as np
2
2
  import pandas as pd
3
3
  import csv
4
4
  import matplotlib.pyplot as plt
5
+ from sklearn.preprocessing import StandardScaler
5
6
 
6
7
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
7
8
 
@@ -151,6 +152,99 @@ def remove_files(yes=1):
151
152
  os.remove('pop_log.csv')
152
153
 
153
154
 
155
+ # Function to process the DataFrame
156
+ '''
157
+ Example usuage
158
+ # Configuration dictionary
159
+ config = {
160
+ 'Age': {
161
+ 'type': 'bin',
162
+ 'bins': [0, 18, 35, 50, 100],
163
+ 'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
164
+ 'prefix': 'Age_Binned'
165
+ },
166
+ 'Income': {
167
+ 'type': 'bin',
168
+ 'bins': [0, 2000, 5000, 10000],
169
+ 'labels': ['Low', 'Medium', 'High'],
170
+ 'prefix': 'Income_Binned'
171
+ },
172
+ 'Gender': {
173
+ 'type': 'one-hot',
174
+ 'prefix': 'Gender'
175
+ },
176
+ 'Score': {
177
+ 'type': 'none'
178
+ }
179
+ }
180
+ '''
181
+
182
+
183
+ def transform_dataframe(df, config):
184
+ output_df = pd.DataFrame()
185
+
186
+ for column, settings in config.items():
187
+ if settings['type'] == 'bin':
188
+ # Apply binning
189
+ binned = pd.cut(
190
+ df[column],
191
+ bins=settings['bins'],
192
+ labels=settings['labels'],
193
+ right=False
194
+ )
195
+ # One-hot encode the binned column
196
+ binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
197
+ output_df = pd.concat([output_df, binned_dummies], axis=1)
198
+
199
+ elif settings['type'] == 'one-hot':
200
+ # One-hot encode the column
201
+ one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
202
+ output_df = pd.concat([output_df, one_hot_dummies], axis=1)
203
+
204
+ elif settings['type'] == 'continuous':
205
+ # Apply function to continuous data
206
+ data = df[column]
207
+ if 'bounds' in settings:
208
+ # Apply bounds filtering
209
+ lower, upper = settings['bounds']
210
+ data = data[(data >= lower) & (data <= upper)]
211
+ if 'apply_func' in settings:
212
+ # Apply custom function
213
+ data = data.apply(settings['apply_func'])
214
+ output_df[column] = data
215
+
216
+ elif settings['type'] == 'none':
217
+ # Leave the column unchanged
218
+ output_df = pd.concat([output_df, df[[column]]], axis=1)
219
+
220
+ return output_df
221
+
222
+ # Helper function to guess column type and update `config`
223
+ def guess_column_type(column_name, series):
224
+ if series.dtype == 'object' or series.dtype.name == 'category':
225
+ # If the column is categorical (e.g., strings), assume one-hot encoding
226
+ return {'type': 'one-hot', 'prefix': column_name}
227
+ elif pd.api.types.is_numeric_dtype(series):
228
+ unique_values = series.nunique()
229
+ if unique_values < 10:
230
+ # If there are few unique values, assume binning with default bins
231
+ min_val, max_val = series.min(), series.max()
232
+ bins = np.linspace(min_val, max_val, num=unique_values + 1)
233
+ labels = [f'Bin_{i}' for i in range(1, len(bins))]
234
+ return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
235
+ else:
236
+ # # Otherwise, assume continuous data with normalization
237
+ # Otherwise, fallback to continuous standardization
238
+ return {
239
+ 'type': 'continuous',
240
+ 'apply_func': (lambda x: (x - series.mean()) / series.std()) # Z-Score Standardization
241
+ }
242
+ else:
243
+ # Default fallback (leave the column unchanged)
244
+ return {'type': 'none'}
245
+
246
+
247
+
154
248
  def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
155
249
  if not yes:
156
250
  return x_df
@@ -330,3 +424,5 @@ def entries_to_remove(entries, the_dict):
330
424
  for key in entries:
331
425
  if key in the_dict:
332
426
  del the_dict[key]
427
+
428
+
@@ -28,12 +28,60 @@ def convert_df_columns_to_binary_and_wide(df):
28
28
  return df
29
29
 
30
30
 
31
- def process_arguments():
31
+ def process_arguments(**kwargs):
32
32
  '''
33
33
  TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
34
34
  '''
35
- data_characteristic = pd.read_csv('problem_data.csv')
36
- analyst_d = pd.read_csv('decisions.csv')
35
+ #dataset
36
+ if kwargs.get('dataset_file', False
37
+ ):
38
+ dataset = pd.read_csv(kwargs.get('dataset_file'))
39
+ named_data_headers = dataset.columns.tolist()
40
+ decision_constants = {name: list(range(7)) for name in named_data_headers}
41
+ data_info = {
42
+
43
+
44
+ 'AADT': {
45
+ 'type': 'continuous',
46
+ 'bounds': [0.0, np.infty],
47
+ 'discrete': False,
48
+ 'apply_func': (lambda x: np.log(x + 1)),
49
+ },
50
+ 'SPEED': {
51
+ 'type': 'continuous',
52
+ 'bounds': [0, 100],
53
+ 'enforce_bounds': True,
54
+ 'discrete': True
55
+ },
56
+ 'TIME': {
57
+ 'type': 'continuous',
58
+ 'bounds': [0, 23.999],
59
+ 'discrete': False
60
+ }
61
+ }
62
+ #remove ID CoLUMNS from dataset
63
+ dataset = dataset.drop(columns = [
64
+ 'ID'
65
+ ])
66
+ for c in dataset.columns:
67
+ if c not in data_info.keys():
68
+ data_info[c] = {'type': 'categorical'}
69
+
70
+ data_new =helperprocess.transform_dataframe(dataset,data_info)
71
+
72
+ update_constant = kwargs.get('analyst_constraints')
73
+ #update the decision_constraints
74
+
75
+ data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
76
+ # Extract the column as a list of characteristic names
77
+ name_data_characteristics = data_characteristic.columns.tolist()
78
+
79
+ # Create the dictionary
80
+ decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
81
+
82
+ print('this gets all the features, I need to remove...')
83
+
84
+ analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
37
85
  hyper = pd.read_csv('setup_hyper.csv')
38
86
 
39
87
  new_data = {'data': data_characteristic,
@@ -41,7 +89,7 @@ def process_arguments():
41
89
  'hyper': hyper}
42
90
  return new_data
43
91
 
44
- def process_package_argumemnts():
92
+ def process_package_arguments():
45
93
 
46
94
  new_data = {}
47
95
  pass
@@ -319,8 +367,8 @@ def main(args, **kwargs):
319
367
  x_df = helperprocess.interactions(x_df, keep)
320
368
 
321
369
 
322
- else: # the dataset has been selected in the program as something else
323
- data_info = process_arguments()
370
+ elif dataset ==10: # the dataset has been selected in the program as something else
371
+ data_info = process_arguments(**args)
324
372
  data_info['hyper']
325
373
  data_info['analyst']
326
374
  data_info['data']['Y']
@@ -339,6 +387,10 @@ def main(args, **kwargs):
339
387
  y_df = df[[data_info['data']['Y'][0]]]
340
388
  y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
341
389
  print('test') #FIXME
390
+ else:
391
+ print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
392
+ data_info =process_package_arguments()
393
+
342
394
 
343
395
  if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
344
396
  if manual_fit_spec is None:
@@ -449,6 +501,8 @@ if __name__ == '__main__':
449
501
  BATCH_JOB = True
450
502
 
451
503
  if BATCH_JOB:
504
+ parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
505
+
452
506
  parser.add_argument('-line', type=int, default=1,
453
507
  help='line to read in csv to pass in argument')
454
508
 
@@ -463,6 +517,7 @@ if __name__ == '__main__':
463
517
  line_number_obs += 1
464
518
  args = dict(args)
465
519
 
520
+
466
521
  for key, value in args.items():
467
522
  try:
468
523
  # Attempt to parse the string value to a Python literal if value is a string.
@@ -479,7 +534,7 @@ if __name__ == '__main__':
479
534
  if "-algorithm" in action.option_strings:
480
535
  parser._optionals._actions[i].help = "optimization algorithm"
481
536
 
482
- override = False
537
+ override = True
483
538
  if override:
484
539
  print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
485
540
  parser.add_argument('-problem_number', default='10')
@@ -494,9 +549,10 @@ if __name__ == '__main__':
494
549
  parser.add_argument('-seperate_out_factors', action='store_false', default=False,
495
550
  help='Trie of wanting to split data that is potentially categorical as binary'
496
551
  ' we want to split the data for processing')
497
- parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
552
+ parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
498
553
 
499
554
  else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
555
+ print("RUNNING WITH ARGS")
500
556
  parser.add_argument('-com', type=str, default='MetaCode',
501
557
  help='line to read csv')
502
558
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.121
3
+ Version: 0.1.123
4
4
  Summary: Extensions for a Python package for estimation of count models.
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -3,8 +3,8 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
3
3
  metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
4
4
  metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
5
5
  metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
6
- metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
7
- metacountregressor/main.py,sha256=37yw2weAhaDR-wH83QC4Jy8SeUFIHpxqhO9YPwgmRi4,20764
6
+ metacountregressor/helperprocess.py,sha256=4aSoyKP1GfzjwCzZ_dXlTbokOiMt_8sbzB6_tu0GPDg,16290
7
+ metacountregressor/main.py,sha256=A3XGwbwhhKVgMxnEgbAmMpgYaWkS8Rk30-cYs3FxvEk,22713
8
8
  metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
9
9
  metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
10
10
  metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
@@ -13,8 +13,8 @@ metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,9
13
13
  metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
14
14
  metacountregressor/solution.py,sha256=OJqB00cvGMLFei6RsjphPamOdLm3EWOOzK7k-uVbvFY,277671
15
15
  metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
16
- metacountregressor-0.1.121.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
- metacountregressor-0.1.121.dist-info/METADATA,sha256=c-c5mHUC6gdf2JEq-DWBuw0F1gAp-Cq0pQeYVLKG_y8,23415
18
- metacountregressor-0.1.121.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
- metacountregressor-0.1.121.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
- metacountregressor-0.1.121.dist-info/RECORD,,
16
+ metacountregressor-0.1.123.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
17
+ metacountregressor-0.1.123.dist-info/METADATA,sha256=e4jQ9vtFxhHtA98q1Vd8PJ9gJiIz91iSUKgGPt78kg8,23415
18
+ metacountregressor-0.1.123.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
19
+ metacountregressor-0.1.123.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
20
+ metacountregressor-0.1.123.dist-info/RECORD,,