metacountregressor 0.1.113__py3-none-any.whl → 0.1.117__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,15 +69,20 @@ def main(args, **kwargs):
69
69
  #data_info['data']['Group'][0]
70
70
  #data_info['data']['Panel'][0]
71
71
  args['decisions'] = data_info['analyst']
72
-
73
- if not np.isnan(data_info['data']['Grouped'][0]):
72
+ grouped_c = data_info['data']['Grouped'][0]
73
+ if isinstance(data_info['data']['Grouped'][0],str):
74
74
  args['group'] = data_info['data']['Grouped'][0]
75
- args['ID'] = data_info['data']['Grouped'][0]
76
- if not np.isnan(data_info['data']['Panel'][0]):
75
+ args['ID'] = data_info['data']['Panel'][0]
76
+ if isinstance(data_info['data']['Panel'][0],str):
77
77
  args['panels'] = data_info['data']['Panel'][0]
78
78
 
79
79
  df = pd.read_csv(str(data_info['data']['Problem'][0]))
80
80
  x_df = df.drop(columns=[data_info['data']['Y'][0]])
81
+ # drop the columns of x_df where column is string exclude the column stype args['group']
82
+ exclude_column = args['group']
83
+ columns_to_keep = x_df.dtypes != 'object'
84
+ columns_to_keep |= (x_df.columns == exclude_column)
85
+ x_df = x_df.loc[:, columns_to_keep]
81
86
  y_df = df[[data_info['data']['Y'][0]]]
82
87
  y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
83
88
 
@@ -1,10 +1,28 @@
1
+ from os.path import exists
1
2
  import numpy as np
2
3
  import pandas as pd
3
4
  import csv
4
5
  import matplotlib.pyplot as plt
6
+ from scipy import stats as st
7
+ from sklearn.preprocessing import StandardScaler
8
+
5
9
 
6
10
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
7
11
 
12
+
13
+
14
+
15
+
16
+ from itertools import product
17
+
18
+ # Function to create a list of dictionaries from a parameter grid
19
+ def generate_param_combinations(param_grid):
20
+ keys = param_grid.keys()
21
+ values = param_grid.values()
22
+ combinations = [dict(zip(keys, v)) for v in product(*values)]
23
+ return combinations
24
+
25
+
8
26
  ##Select the best Features Based on RF
9
27
  def select_features(X_train, y_train, n_f=16):
10
28
  try:
@@ -77,6 +95,7 @@ def findCorrelation(corr, cutoff=0.9, exact=None): """
77
95
  findCorrelation(R1, cutoff=0.6, exact=True) # ['x1', 'x5', 'x4']
78
96
  """
79
97
 
98
+
80
99
  def _findCorrelation_fast(corr, avg, cutoff):
81
100
 
82
101
  combsAboveCutoff = corr.where(lambda x: (np.tril(x) == 0) & (x > cutoff)).stack().index
@@ -151,6 +170,230 @@ def remove_files(yes=1):
151
170
  os.remove('pop_log.csv')
152
171
 
153
172
 
173
+ # Function to process the DataFrame
174
+ '''
175
+ Example usuage
176
+ # Configuration dictionary
177
+ config = {
178
+ 'Age': {
179
+ 'type': 'bin',
180
+ 'bins': [0, 18, 35, 50, 100],
181
+ 'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
182
+ 'prefix': 'Age_Binned'
183
+ },
184
+ 'Income': {
185
+ 'type': 'bin',
186
+ 'bins': [0, 2000, 5000, 10000],
187
+ 'labels': ['Low', 'Medium', 'High'],
188
+ 'prefix': 'Income_Binned'
189
+ },
190
+ 'Gender': {
191
+ 'type': 'one-hot',
192
+ 'prefix': 'Gender'
193
+ },
194
+ 'Score': {
195
+ 'type': 'none'
196
+ }
197
+ }
198
+ '''
199
+ def null_handler(vari):
200
+ if vari in locals():
201
+ return vari
202
+ else:
203
+ print(f'{vari} does not exist, setting None..')
204
+ return None
205
+
206
+
207
+ def set_up_analyst_constraints(data_characteristic, model_terms, variable_decisions_alt = None):
208
+
209
+
210
+ name_data_characteristics = data_characteristic.columns.tolist()
211
+ # Get non-None values as a list
212
+ non_none_terms = [value for value in model_terms.values() if value is not None]
213
+ # how to make name_data_characteristics - non_none_terms
214
+
215
+ result = [item for item in name_data_characteristics if item not in non_none_terms]
216
+ distu = ['normal', 'uniform', 'triangular']
217
+ tra = ['no', 'sqrt', 'arcsinh']
218
+ if model_terms.get('group') is None:
219
+ print('cant have grouped rpm, removing level 4 from every item')
220
+ MAKE_ALL_4_FALSE = True
221
+ else:
222
+ MAKE_ALL_4_FALSE = False
223
+
224
+ variable_decisions = {
225
+ name: {
226
+ 'levels': list(range(6)),
227
+ 'Distributions': distu,
228
+ 'Transformations': tra
229
+ }
230
+ for name in result
231
+ }
232
+ # Override elements in the original dictionary with the alt dictionary
233
+ if variable_decisions_alt is not None:
234
+ for key, alt_value in variable_decisions_alt.items():
235
+ if key in variable_decisions:
236
+ # Update the existing entry
237
+ variable_decisions[key].update(alt_value)
238
+ else:
239
+ # Add new entry if it doesn't exist
240
+ variable_decisions[key] = alt_value
241
+ # Prepare the data for the DataFrame
242
+ rows = []
243
+ for column_name, details in variable_decisions.items():
244
+ # Create a row dictionary
245
+ row = {'Column': column_name}
246
+
247
+ # Add levels as True/False for Level 0 through Level 5
248
+ for level in range(6): # Assuming Level 0 to Level 5
249
+
250
+ if level == 4 and MAKE_ALL_4_FALSE:
251
+ row[f'Level {level}'] = False
252
+ else:
253
+ row[f'Level {level}'] = level in details['levels']
254
+
255
+ # Add distributions and transformations directly
256
+
257
+ # Add distributions and transformations as comma-separated strings
258
+ row['Distributions'] = str(details['Distributions'])
259
+ row['Transformations'] = str(details['Transformations'])
260
+
261
+ rows.append(row)
262
+
263
+ # Create the DataFrame
264
+ df = pd.DataFrame(rows)
265
+
266
+ data_new = data_characteristic.rename(columns={v: k for k, v in model_terms.items() if v in data_characteristic.columns})
267
+ return df, data_new
268
+
269
+ # Function to guess Low, Medium, High ranges
270
+ def guess_low_medium_high(column_name, series):
271
+ # Compute the tertiles (33rd and 66th percentiles)
272
+ #print('did it make it...')
273
+ #mode_value = st.mode(series) # Get the most frequent value
274
+ #i dont think this works cayse its not a seriers any other way
275
+ is_binary = series.isin([0, 1]).all()
276
+ if is_binary:
277
+ return {
278
+ 'type': 'binary',
279
+ 'bins': [0,1],
280
+ 'labels': ['Off', 'On'],
281
+ 'prefix': f'{column_name}'
282
+
283
+ }
284
+
285
+ # series = pd.to_numeric(series, errors='coerce').fillna(mode_value)
286
+ low_threshold = np.quantile(series, 0.33)
287
+ high_threshold = np.quantile(series,0.66)
288
+
289
+ # Define the bins and labels
290
+ bins = [np.min(series) - 1, low_threshold, high_threshold, np.max(series)]
291
+ # Handle duplicate bins by adjusting labels
292
+ if len(set(bins)) < len(bins): # Check for duplicate bin edges
293
+ if low_threshold == high_threshold:
294
+ # Collapse to two bins (Low and High)
295
+ bins = [np.min(series) - 1, low_threshold, np.max(series)]
296
+ labels = ['Low', 'High']
297
+ else:
298
+ # Collapse to three unique bins
299
+ bins = sorted(set(bins)) # Remove duplicate edges
300
+ labels = [f'Bin {i + 1}' for i in range(len(bins) - 1)]
301
+ else:
302
+ # Standard case: Low, Medium, High
303
+ labels = ['Low', 'Medium', 'High']
304
+
305
+ return {
306
+ 'type': 'bin',
307
+ 'bins': bins,
308
+ 'labels': labels,
309
+ 'prefix': f'{column_name}'
310
+ }
311
+
312
+ def transform_dataframe(df, config):
313
+ output_df = pd.DataFrame()
314
+
315
+ for column, settings in config.items():
316
+ if settings['type'] == 'bin':
317
+ # Apply binning
318
+ # Get unique bins (remove duplicates)
319
+ unique_bins = sorted(set(settings['bins']))
320
+
321
+ # Adjust labels if necessary
322
+ if len(unique_bins) - 1 != len(settings['labels']):
323
+ print(f"Adjusting labels to match bins: {len(unique_bins) - 1} bins detected.")
324
+ labels = [f'Bin {i+1}' for i in range(len(unique_bins) - 1)]
325
+ else:
326
+ labels = settings['labels']
327
+
328
+ # Perform the binning
329
+ binned_d = pd.cut(
330
+ df[column],
331
+ bins=unique_bins, # Deduplicated bins
332
+ labels=labels, # Adjusted or original labels
333
+ right=False # Adjust based on whether to include the right edge
334
+ )
335
+ # One-hot encode the binned column
336
+ binned_dummies = pd.get_dummies(binned_d, prefix=settings['prefix'])
337
+ output_df = pd.concat([output_df, binned_dummies], axis=1)
338
+
339
+ elif settings['type'] == 'one-hot':
340
+ # One-hot encode the column
341
+ one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
342
+ output_df = pd.concat([output_df, one_hot_dummies], axis=1)
343
+
344
+ elif settings['type'] == 'continuous':
345
+ # Apply function to continuous data
346
+ data = df[column]
347
+ if 'bounds' in settings:
348
+ # Apply bounds filtering
349
+ lower, upper = settings['bounds']
350
+ data = data[(data >= lower) & (data <= upper)]
351
+ if 'apply_func' in settings:
352
+ # Apply custom function
353
+ data = data.apply(settings['apply_func'])
354
+ output_df[column] = data
355
+
356
+ elif settings['type'] == 'none':
357
+ # Leave the column unchanged
358
+ if column in df.columns:
359
+
360
+ output_df = pd.concat([output_df, df[[column]]], axis=1)
361
+ else:
362
+ print(f'config variable {column} is not in the data. Ignoring ...')
363
+ return output_df
364
+
365
+ # Helper function to guess column type and update `config`
366
+ def guess_column_type(column_name, series):
367
+
368
+ if series.empty:
369
+ raise ValueError(f"The column {column_name} contains no numeric data.")
370
+
371
+ if series.dtype == 'object' or series.dtype.name == 'category':
372
+ # If the column is categorical (e.g., strings), assume one-hot encoding
373
+ return {'type': 'one-hot', 'prefix': column_name}
374
+ elif pd.api.types.is_numeric_dtype(series):
375
+ unique_values = series.nunique()
376
+
377
+ if unique_values < 5:
378
+ return {'type': 'one-hot', 'prefix': column_name}
379
+
380
+ elif np.max(series) - np.min(series) > 20:
381
+ print('made it through here')
382
+ # If there are few unique values, assume binning with default bins
383
+ return guess_low_medium_high(column_name,series)
384
+ else:
385
+ # # Otherwise, assume continuous data with normalization
386
+ # Otherwise, fallback to continuous standardization
387
+ return {
388
+ 'type': 'continuous',
389
+ 'apply_func': (lambda x: (x - series.mean()) / series.std()) # Z-Score Standardization
390
+ }
391
+ else:
392
+ # Default fallback (leave the column unchanged)
393
+ return {'type': 'none'}
394
+
395
+
396
+
154
397
  def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
155
398
  if not yes:
156
399
  return x_df
@@ -173,7 +416,7 @@ def PCA_code(X, n_components=5):
173
416
 
174
417
 
175
418
  def interactions(df, keep=None, drop_this_perc=0.6, interact = False):
176
-
419
+ full_columns = df.columns
177
420
  if interact:
178
421
  interactions_list = []
179
422
  for i, var_i in enumerate(df.columns):
@@ -199,14 +442,31 @@ def interactions(df, keep=None, drop_this_perc=0.6, interact = False):
199
442
  df = pd.concat([df, df_interactions], axis=1, sort=False)
200
443
 
201
444
  # second
202
- corr_matrix = df.corr().abs()
445
+ # Remove `keep` columns from the correlation matrix
446
+ if keep is not None:
447
+ missing_columns = [col for col in keep if col not in df.columns]
448
+
449
+ if missing_columns:
450
+ print(f"The following columns are not in the DataFrame and will be ignored: {missing_columns}")
451
+ keep = [col for col in keep if col not in missing_columns]
452
+ df_corr = df.drop(columns=keep, errors='ignore', inplace=False) # Exclude `keep` columns
453
+ else:
454
+ df_corr = df
455
+
456
+ # Compute the absolute correlation matrix
457
+ corr_matrix = df_corr.corr().abs()
458
+
459
+ # Keep only the upper triangle of the correlation matrix
203
460
  upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
204
461
 
205
- # Find features with correlation greater than 0.6
462
+ # Find features with correlation greater than the threshold
206
463
  to_drop = [column for column in upper.columns if any(upper[column] > drop_this_perc)]
464
+
465
+ # Ensure `keep` columns are not dropped
207
466
  if keep is not None:
208
- to_drop = [column for column in to_drop if column not in keep]
209
- # Drop features
467
+ to_drop = [column for column in to_drop if column not in full_columns]
468
+
469
+ # Drop the identified features
210
470
  df.drop(to_drop, axis=1, inplace=True)
211
471
 
212
472
  return df
@@ -330,3 +590,5 @@ def entries_to_remove(entries, the_dict):
330
590
  for key in entries:
331
591
  if key in the_dict:
332
592
  del the_dict[key]
593
+
594
+
@@ -28,12 +28,65 @@ def convert_df_columns_to_binary_and_wide(df):
28
28
  return df
29
29
 
30
30
 
31
- def process_arguments():
31
+
32
+
33
+
34
+
35
+ def process_arguments(**kwargs):
32
36
  '''
33
37
  TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
34
38
  '''
35
- data_characteristic = pd.read_csv('problem_data.csv')
36
- analyst_d = pd.read_csv('decisions.csv')
39
+ #dataset
40
+ '''
41
+ if kwargs.get('dataset_file', False
42
+ ):
43
+ dataset = pd.read_csv(kwargs.get('dataset_file'))
44
+ named_data_headers = dataset.columns.tolist()
45
+ decision_constants = {name: list(range(7)) for name in named_data_headers}
46
+ data_info = {
47
+
48
+
49
+ 'AADT': {
50
+ 'type': 'continuous',
51
+ 'bounds': [0.0, np.infty],
52
+ 'discrete': False,
53
+ 'apply_func': (lambda x: np.log(x + 1)),
54
+ },
55
+ 'SPEED': {
56
+ 'type': 'continuous',
57
+ 'bounds': [0, 100],
58
+ 'enforce_bounds': True,
59
+ 'discrete': True
60
+ },
61
+ 'TIME': {
62
+ 'type': 'continuous',
63
+ 'bounds': [0, 23.999],
64
+ 'discrete': False
65
+ }
66
+ }
67
+ #remove ID CoLUMNS from dataset
68
+ dataset = dataset.drop(columns = [
69
+ 'ID'
70
+ ])
71
+ for c in dataset.columns:
72
+ if c not in data_info.keys():
73
+ data_info[c] = {'type': 'categorical'}
74
+
75
+ data_new =helperprocess.transform_dataframe(dataset,data_info)
76
+
77
+ update_constant = kwargs.get('analyst_constraints')
78
+ #update the decision_constraints
79
+ '''
80
+ data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
81
+ # Extract the column as a list of characteristic names
82
+ #name_data_characteristics = data_characteristic.columns.tolist()
83
+
84
+ # Create the dictionary
85
+ #decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
86
+
87
+ #print('this gets all the features, I need to remove...')
88
+
89
+ analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
37
90
  hyper = pd.read_csv('setup_hyper.csv')
38
91
 
39
92
  new_data = {'data': data_characteristic,
@@ -41,7 +94,14 @@ def process_arguments():
41
94
  'hyper': hyper}
42
95
  return new_data
43
96
 
97
+ def process_package_arguments():
98
+
99
+ new_data = {}
100
+ pass
101
+
102
+
44
103
  def main(args, **kwargs):
104
+
45
105
  '''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
46
106
 
47
107
  '''
@@ -114,13 +174,25 @@ def main(args, **kwargs):
114
174
  X = df
115
175
  y = df['FREQ'] # Frequency of crashes
116
176
  X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
177
+ df['Offset'] = np.log(df['AADT'])
117
178
  # Drop Y, selected offset term and ID as there are no panels
118
179
  X = df.drop(columns=['FREQ', 'ID', 'AADT'])
119
-
180
+ # Step 0: Process Data
181
+ model_terms = {
182
+ 'Y': 'FREQ', # Replace 'FREQ' with the name of your dependent variable
183
+ 'group': None, # Replace 'group_column' with the name of your grouping column (or None if not used)
184
+ 'panels': None, # Replace 'panel_column' with the name of your panel column (or None if not used)
185
+ 'Offset': 'Offset' # Replace None with the name of your offset column if using one
186
+ }
187
+ a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
120
188
  # some example argument, these are defualt so the following line is just for claritity
121
189
  args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
122
- 'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
190
+ 'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6, 'desicions':a_des}
123
191
  # Fit the model with metacountregressor
192
+ # Step 5: Transform the dataset based on the configuration
193
+ #data_new = helperprocess.transform_dataframe(dataset, config)
194
+ y = df[['Y']]
195
+ X = df.drop(columns=['Y'])
124
196
  obj_fun = ObjectiveFunction(X, y, **args)
125
197
  # replace with other metaheuristics if desired
126
198
  results = harmony_search(obj_fun)
@@ -162,8 +234,8 @@ def main(args, **kwargs):
162
234
  'rdm_cor_terms': [],
163
235
  'grouped_terms': [],
164
236
  'hetro_in_means': [],
165
- 'transformations': ['no', 'log', 'log', 'no', 'no', 'no', 'no'],
166
- 'dispersion': 1
237
+ 'transformations': ['no', 'log', 'no', 'no', 'no', 'no', 'no'],
238
+ 'dispersion': 0
167
239
  }
168
240
 
169
241
  keep = ['Constant', 'US', 'RSMS', 'MCV', 'RSHS', 'AADT', 'Curve50', 'Offset']
@@ -172,13 +244,27 @@ def main(args, **kwargs):
172
244
  elif dataset == 4:
173
245
  manual_fit_spec = {
174
246
  'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION'],
175
- 'rdm_terms': ['Expose:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
247
+ 'rdm_terms': ['EXPOSE:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
176
248
  'rdm_cor_terms': [],
177
249
  'grouped_terms': [],
178
250
  'hetro_in_means': [],
179
251
  'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
180
252
  'dispersion': 1
181
253
  }
254
+ '''
255
+ manual_fit_spec = {
256
+ 'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION', 'EXPOSE', 'INTPM', 'CPM', 'HISNOW'],
257
+ 'rdm_terms': [],
258
+ 'rdm_cor_terms': [],
259
+ 'grouped_terms': [],
260
+ 'hetro_in_means': [],
261
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
262
+ 'dispersion': 1
263
+ }
264
+ '''
265
+
266
+
267
+ '''
182
268
  print('overriding this delete, just want to test the NB')
183
269
  manual_fit_spec = {
184
270
  'fixed_terms': ['const'],
@@ -189,7 +275,7 @@ def main(args, **kwargs):
189
275
  'transformations': ['no'],
190
276
  'dispersion': 1
191
277
  }
192
-
278
+ '''
193
279
  df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
194
280
  y_df = df[['FREQ']].copy() # only consider crashes
195
281
  y_df.rename(columns={"FREQ": "Y"}, inplace=True)
@@ -262,6 +348,17 @@ def main(args, **kwargs):
262
348
  x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
263
349
  x_df['county'] = group_grab
264
350
 
351
+ print('benchmark specification')
352
+ manual_fit_spec = {
353
+ 'fixed_terms': ['const', 'monthly_AADT', 'segment_length', 'speed', 'paved_shoulder', 'curve'],
354
+ 'rdm_terms': [],
355
+ 'rdm_cor_terms': [],
356
+ 'grouped_terms': ['DP01:normal', 'DX32:normal'],
357
+ 'hetro_in_means': [],
358
+ 'transformations': ['no', 'no', 'no', 'no', 'no', 'no'],
359
+ 'dispersion': 0
360
+ }
361
+
265
362
  elif dataset == 9:
266
363
  df = pd.read_csv('panel_synth.csv') # read in the data
267
364
  y_df = df[['Y']].copy() # only consider crashes
@@ -286,19 +383,21 @@ def main(args, **kwargs):
286
383
  keep = ['group', 'constant', 'element_ID']
287
384
 
288
385
  x_df = helperprocess.interactions(x_df, keep)
289
- else: # the dataset has been selected in the program as something else
290
- data_info = process_arguments()
386
+
387
+
388
+ elif dataset ==10: # the dataset has been selected in the program as something else
389
+ data_info = process_arguments(**args)
291
390
  data_info['hyper']
292
391
  data_info['analyst']
293
392
  data_info['data']['Y']
294
393
  #data_info['data']['Group'][0]
295
394
  #data_info['data']['Panel'][0]
296
395
  args['decisions'] = data_info['analyst']
297
-
298
- if not np.isnan(data_info['data']['Grouped'][0]):
396
+ print('check the args of the decions')
397
+ if type(data_info['data']['Grouped'][0]) == str and len(data_info['data']['Grouped'][0]) >1:
299
398
  args['group'] = data_info['data']['Grouped'][0]
300
399
  args['ID'] = data_info['data']['Grouped'][0]
301
- if not np.isnan(data_info['data']['Panel'][0]):
400
+ if type(data_info['data']['Panel'][0]) == str and len(data_info['data']['Panel'][0])>1:
302
401
  args['panels'] = data_info['data']['Panel'][0]
303
402
 
304
403
  df = pd.read_csv(str(data_info['data']['Problem'][0]))
@@ -306,6 +405,10 @@ def main(args, **kwargs):
306
405
  y_df = df[[data_info['data']['Y'][0]]]
307
406
  y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
308
407
  print('test') #FIXME
408
+ else:
409
+ print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
410
+ data_info =process_package_arguments()
411
+
309
412
 
310
413
  if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
311
414
  if manual_fit_spec is None:
@@ -411,55 +514,63 @@ if __name__ == '__main__':
411
514
  parser = argparse.ArgumentParser(prog='main',
412
515
  epilog=main.__doc__,
413
516
  formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
414
-
415
- parser.add_argument('-line', type=int, default=1,
416
- help='line to read in csv to pass in argument')
417
-
418
- if vars(parser.parse_args())['line'] is not None:
419
- reader = csv.DictReader(open('set_data.csv', 'r'))
420
- args = list()
421
- line_number_obs = 0
422
- for dictionary in reader: # TODO find a way to handle multiple args
423
- args = dictionary
424
- if line_number_obs == int(vars(parser.parse_args())['line']):
425
- break
426
- line_number_obs += 1
427
- args = dict(args)
428
-
429
- for key, value in args.items():
430
- try:
431
- # Attempt to parse the string value to a Python literal if value is a string.
432
- if isinstance(value, str):
433
- value = ast.literal_eval(value)
434
- except (ValueError, SyntaxError):
435
- # If there's a parsing error, value remains as the original string.
436
- pass
437
-
438
- # Add the argument to the parser with the potentially updated value.
439
- parser.add_argument(f'-{key}', default=value)
440
-
441
- for i, action in enumerate(parser._optionals._actions):
442
- if "-algorithm" in action.option_strings:
443
- parser._optionals._actions[i].help = "optimization algorithm"
444
-
445
- override = True
446
- if override:
447
- print('todo turn off, in testing phase')
448
- parser.add_argument('-problem_number', default='10')
449
- print('did it make it')
450
- if 'algorithm' not in args:
451
- parser.add_argument('-algorithm', type=str, default='hs',
452
- help='optimization algorithm')
453
- elif 'Manual_Fit' not in args:
454
- parser.add_argument('-Manual_Fit', action='store_false', default=None,
455
- help='To fit a model manually if desired.')
456
-
457
- parser.add_argument('-seperate_out_factors', action='store_false', default=False,
458
- help='Trie of wanting to split data that is potentially categorical as binary'
459
- ' we want to split the data for processing')
460
- parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
517
+
518
+
519
+ BATCH_JOB = False
520
+
521
+ if BATCH_JOB:
522
+ parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
523
+
524
+ parser.add_argument('-line', type=int, default=1,
525
+ help='line to read in csv to pass in argument')
526
+
527
+ if vars(parser.parse_args())['line'] is not None:
528
+ reader = csv.DictReader(open('set_data.csv', 'r'))
529
+ args = list()
530
+ line_number_obs = 0
531
+ for dictionary in reader: # TODO find a way to handle multiple args
532
+ args = dictionary
533
+ if line_number_obs == int(vars(parser.parse_args())['line']):
534
+ break
535
+ line_number_obs += 1
536
+ args = dict(args)
537
+
538
+
539
+ for key, value in args.items():
540
+ try:
541
+ # Attempt to parse the string value to a Python literal if value is a string.
542
+ if isinstance(value, str):
543
+ value = ast.literal_eval(value)
544
+ except (ValueError, SyntaxError):
545
+ # If there's a parsing error, value remains as the original string.
546
+ pass
547
+
548
+ # Add the argument to the parser with the potentially updated value.
549
+ parser.add_argument(f'-{key}', default=value)
550
+
551
+ for i, action in enumerate(parser._optionals._actions):
552
+ if "-algorithm" in action.option_strings:
553
+ parser._optionals._actions[i].help = "optimization algorithm"
554
+
555
+ override = True
556
+ if override:
557
+ print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
558
+ parser.add_argument('-problem_number', default='10')
559
+
560
+ if 'algorithm' not in args:
561
+ parser.add_argument('-algorithm', type=str, default='hs',
562
+ help='optimization algorithm')
563
+ elif 'Manual_Fit' not in args:
564
+ parser.add_argument('-Manual_Fit', action='store_false', default=None,
565
+ help='To fit a model manually if desired.')
566
+
567
+ parser.add_argument('-seperate_out_factors', action='store_false', default=False,
568
+ help='Trie of wanting to split data that is potentially categorical as binary'
569
+ ' we want to split the data for processing')
570
+ parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
461
571
 
462
572
  else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
573
+ print("RUNNING WITH ARGS")
463
574
  parser.add_argument('-com', type=str, default='MetaCode',
464
575
  help='line to read csv')
465
576