metacountregressor 0.1.73__py3-none-any.whl → 0.1.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +258 -0
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +372 -5
- metacountregressor/main.py +297 -117
- metacountregressor/metaheuristics.py +43 -31
- metacountregressor/setup.py +3 -2
- metacountregressor/solution.py +734 -832
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/METADATA +256 -35
- metacountregressor-0.1.83.dist-info/RECORD +20 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/WHEEL +1 -1
- metacountregressor-0.1.73.dist-info/RECORD +0 -18
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.83.dist-info}/top_level.txt +0 -0
metacountregressor/main.py
CHANGED
|
@@ -9,14 +9,12 @@ import numpy as np
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
from pandas import DataFrame
|
|
11
11
|
from pandas.io.parsers import TextFileReader
|
|
12
|
-
|
|
13
12
|
import helperprocess
|
|
14
13
|
from metaheuristics import (differential_evolution,
|
|
15
14
|
harmony_search,
|
|
16
15
|
simulated_annealing)
|
|
17
16
|
from solution import ObjectiveFunction
|
|
18
17
|
|
|
19
|
-
from test_motor import *
|
|
20
18
|
|
|
21
19
|
warnings.simplefilter("ignore")
|
|
22
20
|
|
|
@@ -30,14 +28,144 @@ def convert_df_columns_to_binary_and_wide(df):
|
|
|
30
28
|
return df
|
|
31
29
|
|
|
32
30
|
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def process_arguments(**kwargs):
|
|
36
|
+
'''
|
|
37
|
+
TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
|
|
38
|
+
'''
|
|
39
|
+
#dataset
|
|
40
|
+
'''
|
|
41
|
+
if kwargs.get('dataset_file', False
|
|
42
|
+
):
|
|
43
|
+
dataset = pd.read_csv(kwargs.get('dataset_file'))
|
|
44
|
+
named_data_headers = dataset.columns.tolist()
|
|
45
|
+
decision_constants = {name: list(range(7)) for name in named_data_headers}
|
|
46
|
+
data_info = {
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
'AADT': {
|
|
50
|
+
'type': 'continuous',
|
|
51
|
+
'bounds': [0.0, np.infty],
|
|
52
|
+
'discrete': False,
|
|
53
|
+
'apply_func': (lambda x: np.log(x + 1)),
|
|
54
|
+
},
|
|
55
|
+
'SPEED': {
|
|
56
|
+
'type': 'continuous',
|
|
57
|
+
'bounds': [0, 100],
|
|
58
|
+
'enforce_bounds': True,
|
|
59
|
+
'discrete': True
|
|
60
|
+
},
|
|
61
|
+
'TIME': {
|
|
62
|
+
'type': 'continuous',
|
|
63
|
+
'bounds': [0, 23.999],
|
|
64
|
+
'discrete': False
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
#remove ID CoLUMNS from dataset
|
|
68
|
+
dataset = dataset.drop(columns = [
|
|
69
|
+
'ID'
|
|
70
|
+
])
|
|
71
|
+
for c in dataset.columns:
|
|
72
|
+
if c not in data_info.keys():
|
|
73
|
+
data_info[c] = {'type': 'categorical'}
|
|
74
|
+
|
|
75
|
+
data_new =helperprocess.transform_dataframe(dataset,data_info)
|
|
76
|
+
|
|
77
|
+
update_constant = kwargs.get('analyst_constraints')
|
|
78
|
+
#update the decision_constraints
|
|
79
|
+
'''
|
|
80
|
+
data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
|
|
81
|
+
# Extract the column as a list of characteristic names
|
|
82
|
+
#name_data_characteristics = data_characteristic.columns.tolist()
|
|
83
|
+
|
|
84
|
+
# Create the dictionary
|
|
85
|
+
#decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
|
|
86
|
+
|
|
87
|
+
#print('this gets all the features, I need to remove...')
|
|
88
|
+
|
|
89
|
+
analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
|
|
90
|
+
hyper = pd.read_csv('setup_hyper.csv')
|
|
91
|
+
|
|
92
|
+
new_data = {'data': data_characteristic,
|
|
93
|
+
'analyst':analyst_d,
|
|
94
|
+
'hyper': hyper}
|
|
95
|
+
return new_data
|
|
96
|
+
|
|
97
|
+
def process_package_arguments():
|
|
98
|
+
|
|
99
|
+
new_data = {}
|
|
100
|
+
pass
|
|
101
|
+
|
|
102
|
+
|
|
33
103
|
def main(args, **kwargs):
|
|
104
|
+
|
|
105
|
+
'''METACOUNT REGRESSOR TESTING ENVIRONMENT'''
|
|
106
|
+
|
|
107
|
+
'''
|
|
108
|
+
TESTING_ENV = False
|
|
109
|
+
if TESTING_ENV:
|
|
110
|
+
|
|
111
|
+
import statsmodels.api as sm
|
|
112
|
+
|
|
113
|
+
data = sm.datasets.sunspots.load_pandas().data
|
|
114
|
+
# print(data.exog)
|
|
115
|
+
data_exog = data['YEAR']
|
|
116
|
+
data_exog = sm.add_constant(data_exog)
|
|
117
|
+
data_endog = data['SUNACTIVITY']
|
|
118
|
+
|
|
119
|
+
# Instantiate a gamma family model with the default link function.
|
|
120
|
+
import numpy as np
|
|
121
|
+
|
|
122
|
+
gamma_model = sm.NegativeBinomial(data_endog, data_exog)
|
|
123
|
+
gamma_results = gamma_model.fit()
|
|
124
|
+
|
|
125
|
+
print(gamma_results.summary())
|
|
126
|
+
|
|
127
|
+
# NOW LET's COMPARE THIS TO METACOUNT REGRESSOR
|
|
128
|
+
import metacountregressor
|
|
129
|
+
from importlib.metadata import version
|
|
130
|
+
print(version('metacountregressor'))
|
|
131
|
+
import pandas as pd
|
|
132
|
+
import numpy as np
|
|
133
|
+
from metacountregressor.solution import ObjectiveFunction
|
|
134
|
+
from metacountregressor.metaheuristics import (harmony_search,
|
|
135
|
+
differential_evolution,
|
|
136
|
+
simulated_annealing)
|
|
137
|
+
|
|
138
|
+
# Model Decisions,
|
|
139
|
+
manual_fit_spec = {
|
|
140
|
+
|
|
141
|
+
'fixed_terms': ['const', 'YEAR'],
|
|
142
|
+
'rdm_terms': [],
|
|
143
|
+
'rdm_cor_terms': [],
|
|
144
|
+
'grouped_terms': [],
|
|
145
|
+
'hetro_in_means': [],
|
|
146
|
+
'transformations': ['no', 'no'],
|
|
147
|
+
'dispersion': 1 # Negative Binomial
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
# Arguments
|
|
151
|
+
arguments = {
|
|
152
|
+
'algorithm': 'hs',
|
|
153
|
+
'test_percentage': 0,
|
|
154
|
+
'test_complexity': 6,
|
|
155
|
+
'instance_number': 'name',
|
|
156
|
+
'Manual_Fit': manual_fit_spec
|
|
157
|
+
}
|
|
158
|
+
obj_fun = ObjectiveFunction(data_exog, data_endog, **arguments)
|
|
159
|
+
'''
|
|
160
|
+
|
|
161
|
+
|
|
34
162
|
print('the args is:', args)
|
|
35
163
|
print('the kwargs is', kwargs)
|
|
36
164
|
|
|
37
165
|
# removing junk files if specicified
|
|
38
166
|
helperprocess.remove_files(args.get('removeFiles', True))
|
|
39
167
|
|
|
40
|
-
# do we want
|
|
168
|
+
# do we want to run a test
|
|
41
169
|
if args.get('com', False) == 'MetaCode':
|
|
42
170
|
print('Testing the Python Package') # TODO add in python package import
|
|
43
171
|
# Read data from CSV file
|
|
@@ -46,13 +174,25 @@ def main(args, **kwargs):
|
|
|
46
174
|
X = df
|
|
47
175
|
y = df['FREQ'] # Frequency of crashes
|
|
48
176
|
X['Offset'] = np.log(df['AADT']) # Explicitley define how to offset the data, no offset otherwise
|
|
177
|
+
df['Offset'] = np.log(df['AADT'])
|
|
49
178
|
# Drop Y, selected offset term and ID as there are no panels
|
|
50
179
|
X = df.drop(columns=['FREQ', 'ID', 'AADT'])
|
|
51
|
-
|
|
180
|
+
# Step 0: Process Data
|
|
181
|
+
model_terms = {
|
|
182
|
+
'Y': 'FREQ', # Replace 'FREQ' with the name of your dependent variable
|
|
183
|
+
'group': None, # Replace 'group_column' with the name of your grouping column (or None if not used)
|
|
184
|
+
'panels': None, # Replace 'panel_column' with the name of your panel column (or None if not used)
|
|
185
|
+
'Offset': 'Offset' # Replace None with the name of your offset column if using one
|
|
186
|
+
}
|
|
187
|
+
a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
|
|
52
188
|
# some example argument, these are defualt so the following line is just for claritity
|
|
53
189
|
args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
|
|
54
|
-
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6}
|
|
190
|
+
'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6, 'desicions':a_des}
|
|
55
191
|
# Fit the model with metacountregressor
|
|
192
|
+
# Step 5: Transform the dataset based on the configuration
|
|
193
|
+
#data_new = helperprocess.transform_dataframe(dataset, config)
|
|
194
|
+
y = df[['Y']]
|
|
195
|
+
X = df.drop(columns=['Y'])
|
|
56
196
|
obj_fun = ObjectiveFunction(X, y, **args)
|
|
57
197
|
# replace with other metaheuristics if desired
|
|
58
198
|
results = harmony_search(obj_fun)
|
|
@@ -64,6 +204,7 @@ def main(args, **kwargs):
|
|
|
64
204
|
print('the dataset is', dataset)
|
|
65
205
|
manual_fit_spec = args.get('Manual_Fit', None)
|
|
66
206
|
if dataset == 1:
|
|
207
|
+
print('Stage 5 A Short.')
|
|
67
208
|
df = pd.read_csv('./data/1848.csv') # read in the data
|
|
68
209
|
y_df = df[['FSI']] # only consider crashes
|
|
69
210
|
y_df.rename(columns={"FSI": "Y"}, inplace=True)
|
|
@@ -71,6 +212,7 @@ def main(args, **kwargs):
|
|
|
71
212
|
x_df = helperprocess.as_wide_factor(x_df)
|
|
72
213
|
|
|
73
214
|
elif dataset == 3:
|
|
215
|
+
print('Stage 5 A Data Complete.')
|
|
74
216
|
x_df = pd.read_csv('./data/Stage5A_1848_All_Initial_Columns.csv') # drop the ID columns
|
|
75
217
|
drop_these = ['Id', 'ID', 'old', 'G_N']
|
|
76
218
|
for i in drop_these:
|
|
@@ -92,8 +234,8 @@ def main(args, **kwargs):
|
|
|
92
234
|
'rdm_cor_terms': [],
|
|
93
235
|
'grouped_terms': [],
|
|
94
236
|
'hetro_in_means': [],
|
|
95
|
-
'transformations': ['no', 'log', '
|
|
96
|
-
'dispersion':
|
|
237
|
+
'transformations': ['no', 'log', 'no', 'no', 'no', 'no', 'no'],
|
|
238
|
+
'dispersion': 0
|
|
97
239
|
}
|
|
98
240
|
|
|
99
241
|
keep = ['Constant', 'US', 'RSMS', 'MCV', 'RSHS', 'AADT', 'Curve50', 'Offset']
|
|
@@ -102,14 +244,38 @@ def main(args, **kwargs):
|
|
|
102
244
|
elif dataset == 4:
|
|
103
245
|
manual_fit_spec = {
|
|
104
246
|
'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION'],
|
|
105
|
-
'rdm_terms': ['
|
|
247
|
+
'rdm_terms': ['EXPOSE:normal', 'INTPM:normal', 'CPM:normal', 'HISNOW:normal'],
|
|
248
|
+
'rdm_cor_terms': [],
|
|
249
|
+
'grouped_terms': [],
|
|
250
|
+
'hetro_in_means': [],
|
|
251
|
+
'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
|
|
252
|
+
'dispersion': 1
|
|
253
|
+
}
|
|
254
|
+
'''
|
|
255
|
+
manual_fit_spec = {
|
|
256
|
+
'fixed_terms': ['const', 'LOWPRE', 'GBRPM', 'FRICTION', 'EXPOSE', 'INTPM', 'CPM', 'HISNOW'],
|
|
257
|
+
'rdm_terms': [],
|
|
106
258
|
'rdm_cor_terms': [],
|
|
107
259
|
'grouped_terms': [],
|
|
108
260
|
'hetro_in_means': [],
|
|
109
261
|
'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
|
|
110
262
|
'dispersion': 1
|
|
111
263
|
}
|
|
264
|
+
'''
|
|
265
|
+
|
|
112
266
|
|
|
267
|
+
'''
|
|
268
|
+
print('overriding this delete, just want to test the NB')
|
|
269
|
+
manual_fit_spec = {
|
|
270
|
+
'fixed_terms': ['const'],
|
|
271
|
+
'rdm_terms': [],
|
|
272
|
+
'rdm_cor_terms': [],
|
|
273
|
+
'grouped_terms': [],
|
|
274
|
+
'hetro_in_means': [],
|
|
275
|
+
'transformations': ['no'],
|
|
276
|
+
'dispersion': 1
|
|
277
|
+
}
|
|
278
|
+
'''
|
|
113
279
|
df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
|
|
114
280
|
y_df = df[['FREQ']].copy() # only consider crashes
|
|
115
281
|
y_df.rename(columns={"FREQ": "Y"}, inplace=True)
|
|
@@ -118,7 +284,7 @@ def main(args, **kwargs):
|
|
|
118
284
|
x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
|
|
119
285
|
x_df = x_df.drop(columns=['AADT', 'LENGTH'])
|
|
120
286
|
|
|
121
|
-
if args
|
|
287
|
+
if args.get('seperate_out_factors', 0):
|
|
122
288
|
|
|
123
289
|
x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
|
|
124
290
|
exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
|
|
@@ -159,7 +325,39 @@ def main(args, **kwargs):
|
|
|
159
325
|
'transformations': ['no', 'no', 'no', 'no'],
|
|
160
326
|
'dispersion': 0
|
|
161
327
|
}
|
|
162
|
-
|
|
328
|
+
elif dataset == 8:
|
|
329
|
+
print('Main County')
|
|
330
|
+
df = pd.read_csv('./data/rural_int.csv') # read in the data
|
|
331
|
+
y_df = df[['crashes']].copy() # only consider crashes
|
|
332
|
+
y_df.rename(columns={"crashes": "Y"}, inplace=True)
|
|
333
|
+
panels = df['orig_ID']
|
|
334
|
+
try:
|
|
335
|
+
x_df = df.drop(columns=['crashes', 'year', 'orig_ID',
|
|
336
|
+
'jurisdiction', 'town', 'maint_region', 'weather_station', 'dummy_winter_2']) # was dropped postcode
|
|
337
|
+
print('dropping for test')
|
|
338
|
+
x_df = x_df.drop(columns=['month', 'inj.fat', 'PDO'])
|
|
339
|
+
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
|
340
|
+
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
|
341
|
+
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
|
342
|
+
except Exception as e:
|
|
343
|
+
print(e)
|
|
344
|
+
x_df = df.drop(columns=['Y']) # was dropped postcode
|
|
345
|
+
|
|
346
|
+
group_grab = x_df['county']
|
|
347
|
+
x_df = x_df.drop(columns =['county'])
|
|
348
|
+
x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
|
|
349
|
+
x_df['county'] = group_grab
|
|
350
|
+
|
|
351
|
+
print('benchmark specification')
|
|
352
|
+
manual_fit_spec = {
|
|
353
|
+
'fixed_terms': ['const', 'monthly_AADT', 'segment_length', 'speed', 'paved_shoulder', 'curve'],
|
|
354
|
+
'rdm_terms': [],
|
|
355
|
+
'rdm_cor_terms': [],
|
|
356
|
+
'grouped_terms': ['DP01:normal', 'DX32:normal'],
|
|
357
|
+
'hetro_in_means': [],
|
|
358
|
+
'transformations': ['no', 'no', 'no', 'no', 'no', 'no'],
|
|
359
|
+
'dispersion': 0
|
|
360
|
+
}
|
|
163
361
|
|
|
164
362
|
elif dataset == 9:
|
|
165
363
|
df = pd.read_csv('panel_synth.csv') # read in the data
|
|
@@ -185,65 +383,32 @@ def main(args, **kwargs):
|
|
|
185
383
|
keep = ['group', 'constant', 'element_ID']
|
|
186
384
|
|
|
187
385
|
x_df = helperprocess.interactions(x_df, keep)
|
|
188
|
-
else: # the dataset has been selected in the program as something else
|
|
189
|
-
from tkinter import Tk
|
|
190
|
-
from tkinter.filedialog import askopenfilename
|
|
191
|
-
|
|
192
|
-
ASK_ANALALYST = 0
|
|
193
|
-
if ASK_ANALALYST:
|
|
194
|
-
root = Tk()
|
|
195
|
-
root.withdraw()
|
|
196
|
-
# Prompt the user to select a directory
|
|
197
|
-
directory = askopenfilename(title="Select File For Analysis")
|
|
198
|
-
skip_lines = int(input("Select the number of lines to skip, (numeric): "))
|
|
199
|
-
df = pd.read_csv(directory, skip_rows=skip_lines)
|
|
200
|
-
else:
|
|
201
|
-
df = pd.read_csv('data/rqc40516_MotorcycleQUT_engineer_crash.csv', skiprows=5)
|
|
202
|
-
df['CRASH_SPEED_LIMIT'] = df['CRASH_SPEED_LIMIT'].str.replace(' km/h', '').astype(int)
|
|
203
|
-
|
|
204
|
-
# Clean data types
|
|
205
|
-
df = clean_data_types(df)
|
|
206
|
-
|
|
207
|
-
# Encode categorical variables
|
|
208
|
-
categories = ['CRASH_SEVERITY', 'CRASH_TYPE', 'CRASH_NATURE', 'CRASH_ATMOSPHERIC_CONDITION']
|
|
209
|
-
df = pd.get_dummies(df, columns=categories)
|
|
210
|
-
|
|
211
|
-
# Select only numeric columns
|
|
212
|
-
numeric_types = ['int32', 'uint8', 'bool', 'int64', 'float64']
|
|
213
|
-
df = df.select_dtypes(include=numeric_types)
|
|
214
386
|
|
|
215
|
-
# Check for missing values and fill with column mean
|
|
216
|
-
missing_values_count = df['CASUALTY_TOTAL'].isnull().sum()
|
|
217
|
-
df.fillna(df.mean())
|
|
218
387
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
]
|
|
388
|
+
elif dataset ==10: # the dataset has been selected in the program as something else
|
|
389
|
+
data_info = process_arguments(**args)
|
|
390
|
+
data_info['hyper']
|
|
391
|
+
data_info['analyst']
|
|
392
|
+
data_info['data']['Y']
|
|
393
|
+
#data_info['data']['Group'][0]
|
|
394
|
+
#data_info['data']['Panel'][0]
|
|
395
|
+
args['decisions'] = data_info['analyst']
|
|
396
|
+
print('check the args of the decions')
|
|
397
|
+
if type(data_info['data']['Grouped'][0]) == str and len(data_info['data']['Grouped'][0]) >1:
|
|
398
|
+
args['group'] = data_info['data']['Grouped'][0]
|
|
399
|
+
args['ID'] = data_info['data']['Grouped'][0]
|
|
400
|
+
if type(data_info['data']['Panel'][0]) == str and len(data_info['data']['Panel'][0])>1:
|
|
401
|
+
args['panels'] = data_info['data']['Panel'][0]
|
|
402
|
+
|
|
403
|
+
df = pd.read_csv(str(data_info['data']['Problem'][0]))
|
|
404
|
+
x_df = df.drop(columns=[data_info['data']['Y'][0]])
|
|
405
|
+
y_df = df[[data_info['data']['Y'][0]]]
|
|
406
|
+
y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
|
|
407
|
+
print('test') #FIXME
|
|
408
|
+
else:
|
|
409
|
+
print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
|
|
410
|
+
data_info =process_package_arguments()
|
|
227
411
|
|
|
228
|
-
# Filter out excluded columns
|
|
229
|
-
df = df[[col for col in df.columns if not any(ex in col for ex in EXCLUDE)]]
|
|
230
|
-
|
|
231
|
-
# Prepare target variable
|
|
232
|
-
|
|
233
|
-
# Check for finite values and compute correlations
|
|
234
|
-
finite_check = df.apply(np.isfinite).all()
|
|
235
|
-
df_clean = df.loc[:, finite_check]
|
|
236
|
-
corr = df_clean.corr()
|
|
237
|
-
|
|
238
|
-
# Identify and remove highly correlated features
|
|
239
|
-
hc = findCorrelation(corr, cutoff=0.5)
|
|
240
|
-
trimmed_df = df_clean.drop(columns=hc)
|
|
241
|
-
|
|
242
|
-
# Feature selection
|
|
243
|
-
df_cleaner, fs = select_features(trimmed_df, y)
|
|
244
|
-
x_df = df_cleaner
|
|
245
|
-
y_df = y.to_frame(name="Y")
|
|
246
|
-
# y_df.rename(columns={"CASUALTY_TOTAL": "Y"}, inplace=True)
|
|
247
412
|
|
|
248
413
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
|
249
414
|
if manual_fit_spec is None:
|
|
@@ -251,8 +416,8 @@ def main(args, **kwargs):
|
|
|
251
416
|
else:
|
|
252
417
|
print('fitting manually')
|
|
253
418
|
args['Manual_Fit'] = manual_fit_spec
|
|
254
|
-
|
|
255
419
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
|
420
|
+
print('Maine County Dataset.')
|
|
256
421
|
args['group'] = 'county'
|
|
257
422
|
args['panels'] = 'element_ID'
|
|
258
423
|
args['ID'] = 'element_ID'
|
|
@@ -262,11 +427,13 @@ def main(args, **kwargs):
|
|
|
262
427
|
args['panels'] = 'ind_id'
|
|
263
428
|
args['ID'] = 'ind_id'
|
|
264
429
|
|
|
430
|
+
|
|
431
|
+
|
|
265
432
|
args['complexity_level'] = args.get('complexity_level', 6)
|
|
266
433
|
|
|
267
|
-
|
|
268
|
-
AnalystSpecs
|
|
269
|
-
args['AnalystSpecs'] = AnalystSpecs
|
|
434
|
+
|
|
435
|
+
# Initialize AnalystSpecs to None if not manually provided
|
|
436
|
+
args['AnalystSpecs'] = args.get('AnalystSpecs', None)
|
|
270
437
|
|
|
271
438
|
if args['algorithm'] == 'sa':
|
|
272
439
|
args_hyperparameters = {'alpha': float(args['temp_scale']),
|
|
@@ -312,7 +479,7 @@ def main(args, **kwargs):
|
|
|
312
479
|
|
|
313
480
|
|
|
314
481
|
elif args['algorithm'] == 'de':
|
|
315
|
-
# force
|
|
482
|
+
# force variables
|
|
316
483
|
args['must_include'] = args.get('force', [])
|
|
317
484
|
|
|
318
485
|
args_hyperparameters = {'_AI': args.get('_AI', 2),
|
|
@@ -321,7 +488,6 @@ def main(args, **kwargs):
|
|
|
321
488
|
, '_pop_size': int(args['_hms']), 'instance_number': int(args['line'])
|
|
322
489
|
, 'Manual_Fit': args['Manual_Fit'],
|
|
323
490
|
'MP': int(args['MP'])
|
|
324
|
-
|
|
325
491
|
}
|
|
326
492
|
|
|
327
493
|
args_hyperparameters = dict(args_hyperparameters)
|
|
@@ -347,50 +513,64 @@ if __name__ == '__main__':
|
|
|
347
513
|
alg_parser.print_help()
|
|
348
514
|
parser = argparse.ArgumentParser(prog='main',
|
|
349
515
|
epilog=main.__doc__,
|
|
350
|
-
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
if
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
516
|
+
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
BATCH_JOB = False
|
|
520
|
+
|
|
521
|
+
if BATCH_JOB:
|
|
522
|
+
parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
|
|
523
|
+
|
|
524
|
+
parser.add_argument('-line', type=int, default=1,
|
|
525
|
+
help='line to read in csv to pass in argument')
|
|
526
|
+
|
|
527
|
+
if vars(parser.parse_args())['line'] is not None:
|
|
528
|
+
reader = csv.DictReader(open('set_data.csv', 'r'))
|
|
529
|
+
args = list()
|
|
530
|
+
line_number_obs = 0
|
|
531
|
+
for dictionary in reader: # TODO find a way to handle multiple args
|
|
532
|
+
args = dictionary
|
|
533
|
+
if line_number_obs == int(vars(parser.parse_args())['line']):
|
|
534
|
+
break
|
|
535
|
+
line_number_obs += 1
|
|
536
|
+
args = dict(args)
|
|
537
|
+
|
|
538
|
+
|
|
539
|
+
for key, value in args.items():
|
|
540
|
+
try:
|
|
541
|
+
# Attempt to parse the string value to a Python literal if value is a string.
|
|
542
|
+
if isinstance(value, str):
|
|
543
|
+
value = ast.literal_eval(value)
|
|
544
|
+
except (ValueError, SyntaxError):
|
|
545
|
+
# If there's a parsing error, value remains as the original string.
|
|
546
|
+
pass
|
|
547
|
+
|
|
548
|
+
# Add the argument to the parser with the potentially updated value.
|
|
549
|
+
parser.add_argument(f'-{key}', default=value)
|
|
550
|
+
|
|
551
|
+
for i, action in enumerate(parser._optionals._actions):
|
|
552
|
+
if "-algorithm" in action.option_strings:
|
|
553
|
+
parser._optionals._actions[i].help = "optimization algorithm"
|
|
554
|
+
|
|
555
|
+
override = True
|
|
556
|
+
if override:
|
|
557
|
+
print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
|
|
558
|
+
parser.add_argument('-problem_number', default='10')
|
|
559
|
+
|
|
560
|
+
if 'algorithm' not in args:
|
|
561
|
+
parser.add_argument('-algorithm', type=str, default='hs',
|
|
562
|
+
help='optimization algorithm')
|
|
563
|
+
elif 'Manual_Fit' not in args:
|
|
564
|
+
parser.add_argument('-Manual_Fit', action='store_false', default=None,
|
|
565
|
+
help='To fit a model manually if desired.')
|
|
566
|
+
|
|
567
|
+
parser.add_argument('-seperate_out_factors', action='store_false', default=False,
|
|
568
|
+
help='Trie of wanting to split data that is potentially categorical as binary'
|
|
569
|
+
' we want to split the data for processing')
|
|
570
|
+
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
|
|
571
|
+
|
|
572
|
+
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
|
573
|
+
print("RUNNING WITH ARGS")
|
|
394
574
|
parser.add_argument('-com', type=str, default='MetaCode',
|
|
395
575
|
help='line to read csv')
|
|
396
576
|
|
|
@@ -398,7 +578,7 @@ if __name__ == '__main__':
|
|
|
398
578
|
parser.print_help()
|
|
399
579
|
args = vars(parser.parse_args())
|
|
400
580
|
print(type(args))
|
|
401
|
-
# TODO add in chi 2 and df in estimation and compare degrees of freedom
|
|
581
|
+
# TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
|
|
402
582
|
|
|
403
583
|
# Print the args.
|
|
404
584
|
profiler = cProfile.Profile()
|