metacountregressor 0.1.73__py3-none-any.whl → 0.1.88__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +115 -0
- metacountregressor/main.py +51 -72
- metacountregressor/metaheuristics.py +25 -24
- metacountregressor/solution.py +281 -694
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/METADATA +78 -20
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/RECORD +10 -9
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.88.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
class DataProcessor:
|
8
|
+
def __init__(self, x_data, y_data, kwargs):
|
9
|
+
self._obj_1 = kwargs.get('_obj_1')
|
10
|
+
self._obj_2 = kwargs.get('_obj_2')
|
11
|
+
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
12
|
+
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
13
|
+
self.is_multi = self.test_percentage != 0
|
14
|
+
self._x_data = x_data
|
15
|
+
self._y_data = y_data
|
16
|
+
self._process_data(kwargs)
|
17
|
+
|
18
|
+
def _process_data(self, kwargs):
|
19
|
+
if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
|
20
|
+
self._handle_special_conditions(kwargs)
|
21
|
+
else:
|
22
|
+
self._standard_data_partition()
|
23
|
+
|
24
|
+
self._characteristics_names = list(self._x_data.columns)
|
25
|
+
self._max_group_all_means = 1
|
26
|
+
self._exclude_this_test = [4]
|
27
|
+
|
28
|
+
def _handle_special_conditions(self, kwargs):
|
29
|
+
if 'panels' in kwargs:
|
30
|
+
self._process_panels_data(kwargs)
|
31
|
+
else:
|
32
|
+
self._standard_data_partition()
|
33
|
+
|
34
|
+
def _process_panels_data(self, kwargs):
|
35
|
+
group_key = kwargs['group']
|
36
|
+
panels_key = kwargs['panels']
|
37
|
+
|
38
|
+
# Process groups and panels
|
39
|
+
self._x_data[group_key] = self._x_data[group_key].astype('category').cat.codes
|
40
|
+
try:
|
41
|
+
self._x_data[panels_key] = self._x_data[panels_key].rank(method='dense').astype(int)
|
42
|
+
self._x_data[panels_key] -= self._x_data[panels_key].min() - 1
|
43
|
+
except KeyError:
|
44
|
+
pass
|
45
|
+
|
46
|
+
# Create training and test datasets
|
47
|
+
unique_ids = np.unique(self._x_data[panels_key])
|
48
|
+
training_size = int((1 - self.test_percentage - self.val_percentage) * len(unique_ids))
|
49
|
+
training_ids = np.random.choice(unique_ids, training_size, replace=False)
|
50
|
+
|
51
|
+
train_idx = self._x_data.index[self._x_data[panels_key].isin(training_ids)]
|
52
|
+
test_idx = self._x_data.index[~self._x_data[panels_key].isin(training_ids)]
|
53
|
+
|
54
|
+
self._create_datasets(train_idx, test_idx)
|
55
|
+
|
56
|
+
def _standard_data_partition(self):
|
57
|
+
total_samples = len(self._x_data)
|
58
|
+
training_size = int((1 - self.test_percentage - self.val_percentage) * total_samples)
|
59
|
+
training_indices = np.random.choice(total_samples, training_size, replace=False)
|
60
|
+
|
61
|
+
train_idx = np.array([i for i in range(total_samples) if i in training_indices])
|
62
|
+
test_idx = np.array([i for i in range(total_samples) if i not in training_indices])
|
63
|
+
|
64
|
+
self._create_datasets(train_idx, test_idx)
|
65
|
+
|
66
|
+
def _create_datasets(self, train_idx, test_idx):
|
67
|
+
self.df_train = self._x_data.loc[train_idx, :]
|
68
|
+
self.df_test = self._x_data.loc[test_idx, :]
|
69
|
+
self.y_train = self._y_data.loc[train_idx, :]
|
70
|
+
self.y_test = self._y_data.loc[test_idx, :]
|
71
|
+
|
72
|
+
self._x_data_test = self.df_test.copy()
|
73
|
+
self._y_data_test = self.y_test.astype('float').copy()
|
74
|
+
self._x_data = self.df_train.copy()
|
75
|
+
self._y_data = self.y_train.astype('float').copy()
|
76
|
+
|
77
|
+
# Handle different shapes
|
78
|
+
if self._x_data.ndim == 2: # Typical DataFrame
|
79
|
+
self._samples, self._characteristics = self._x_data.shape
|
80
|
+
self._panels = None
|
81
|
+
elif self._x_data.ndim == 3: # 3D structure, e.g., Panel or similar
|
82
|
+
self._samples, self._panels, self._characteristics = self._x_data.shape
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
@@ -5,6 +5,121 @@ import matplotlib.pyplot as plt
|
|
5
5
|
|
6
6
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
7
7
|
|
8
|
+
##Select the best Features Based on RF
|
9
|
+
def select_features(X_train, y_train, n_f=16):
|
10
|
+
try:
|
11
|
+
from sklearn.feature_selection import SelectKBest
|
12
|
+
from sklearn.feature_selection import f_regression
|
13
|
+
feature_names = X_train.columns
|
14
|
+
# configure to select all features
|
15
|
+
fs = SelectKBest(score_func=f_regression, k=16)
|
16
|
+
|
17
|
+
# learn relationship from training data
|
18
|
+
fs.fit(X_train, y_train)
|
19
|
+
|
20
|
+
mask = fs.get_support() # Boolean array of selected features
|
21
|
+
selected_features = [feature for bool, feature in zip(mask, feature_names) if bool]
|
22
|
+
X_train = X_train[selected_features]
|
23
|
+
except:
|
24
|
+
print('import error, not performing feature selection')
|
25
|
+
fs = X_train.columns #TODO check if this is actually getting the names
|
26
|
+
|
27
|
+
return X_train, fs
|
28
|
+
|
29
|
+
|
30
|
+
#Cutts off correlated data
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
def findCorrelation(corr, cutoff=0.9, exact=None): """
|
37
|
+
This function is the Python implementation of the R function
|
38
|
+
`findCorrelation()`.
|
39
|
+
|
40
|
+
Relies on numpy and pandas, so must have them pre-installed.
|
41
|
+
|
42
|
+
It searches through a correlation matrix and returns a list of column names
|
43
|
+
to remove to reduce pairwise correlations.
|
44
|
+
|
45
|
+
For the documentation of the R function, see
|
46
|
+
https://www.rdocumentation.org/packages/caret/topics/findCorrelation
|
47
|
+
and for the source code of `findCorrelation()`, see
|
48
|
+
https://github.com/topepo/caret/blob/master/pkg/caret/R/findCorrelation.R
|
49
|
+
|
50
|
+
-----------------------------------------------------------------------------
|
51
|
+
|
52
|
+
Parameters:
|
53
|
+
-----------
|
54
|
+
corr: pandas dataframe.
|
55
|
+
A correlation matrix as a pandas dataframe.
|
56
|
+
cutoff: float, default: 0.9.
|
57
|
+
A numeric value for the pairwise absolute correlation cutoff
|
58
|
+
exact: bool, default: None
|
59
|
+
A boolean value that determines whether the average correlations be
|
60
|
+
recomputed at each step
|
61
|
+
-----------------------------------------------------------------------------
|
62
|
+
Returns:
|
63
|
+
--------
|
64
|
+
list of column names
|
65
|
+
-----------------------------------------------------------------------------
|
66
|
+
Example:
|
67
|
+
--------
|
68
|
+
R1 = pd.DataFrame({
|
69
|
+
'x1': [1.0, 0.86, 0.56, 0.32, 0.85],
|
70
|
+
'x2': [0.86, 1.0, 0.01, 0.74, 0.32],
|
71
|
+
'x3': [0.56, 0.01, 1.0, 0.65, 0.91],
|
72
|
+
'x4': [0.32, 0.74, 0.65, 1.0, 0.36],
|
73
|
+
'x5': [0.85, 0.32, 0.91, 0.36, 1.0]
|
74
|
+
}, index=['x1', 'x2', 'x3', 'x4', 'x5'])
|
75
|
+
|
76
|
+
findCorrelation(R1, cutoff=0.6, exact=False) # ['x4', 'x5', 'x1', 'x3']
|
77
|
+
findCorrelation(R1, cutoff=0.6, exact=True) # ['x1', 'x5', 'x4']
|
78
|
+
"""
|
79
|
+
|
80
|
+
def _findCorrelation_fast(corr, avg, cutoff):
|
81
|
+
|
82
|
+
combsAboveCutoff = corr.where(lambda x: (np.tril(x) == 0) & (x > cutoff)).stack().index
|
83
|
+
|
84
|
+
rowsToCheck = combsAboveCutoff.get_level_values(0)
|
85
|
+
colsToCheck = combsAboveCutoff.get_level_values(1)
|
86
|
+
|
87
|
+
msk = avg[colsToCheck] > avg[rowsToCheck].values
|
88
|
+
deletecol = pd.unique(np.r_[colsToCheck[msk], rowsToCheck[~msk]]).tolist()
|
89
|
+
|
90
|
+
return deletecol
|
91
|
+
|
92
|
+
def _findCorrelation_exact(corr, avg, cutoff):
|
93
|
+
|
94
|
+
x = corr.loc[(*[avg.sort_values(ascending=False).index] * 2,)]
|
95
|
+
|
96
|
+
if (x.dtypes.values[:, None] == ['int64', 'int32', 'int16', 'int8']).any():
|
97
|
+
x = x.astype(float)
|
98
|
+
|
99
|
+
x.values[(*[np.arange(len(x))] * 2,)] = np.nan
|
100
|
+
|
101
|
+
deletecol = []
|
102
|
+
for ix, i in enumerate(x.columns[:-1]):
|
103
|
+
for j in x.columns[ix + 1:]:
|
104
|
+
if x.loc[i, j] > cutoff:
|
105
|
+
if x[i].mean() > x[j].mean():
|
106
|
+
deletecol.append(i)
|
107
|
+
x.loc[i] = x[i] = np.nan
|
108
|
+
else:
|
109
|
+
deletecol.append(j)
|
110
|
+
x.loc[j] = x[j] = np.nan
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
"""Funtion to Convert Data to Binaries """
|
116
|
+
def clean_data_types(df):
|
117
|
+
for col in df.columns:
|
118
|
+
if df[col].dtype == 'object':
|
119
|
+
# Attempt to convert the column to numeric type
|
120
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
121
|
+
return df
|
122
|
+
|
8
123
|
|
9
124
|
def drop_correlations(x_df, percentage=0.85):
|
10
125
|
cor_matrix = x_df.corr().abs()
|
metacountregressor/main.py
CHANGED
@@ -9,14 +9,12 @@ import numpy as np
|
|
9
9
|
import pandas as pd
|
10
10
|
from pandas import DataFrame
|
11
11
|
from pandas.io.parsers import TextFileReader
|
12
|
-
|
13
12
|
import helperprocess
|
14
13
|
from metaheuristics import (differential_evolution,
|
15
14
|
harmony_search,
|
16
15
|
simulated_annealing)
|
17
16
|
from solution import ObjectiveFunction
|
18
17
|
|
19
|
-
from test_motor import *
|
20
18
|
|
21
19
|
warnings.simplefilter("ignore")
|
22
20
|
|
@@ -37,7 +35,7 @@ def main(args, **kwargs):
|
|
37
35
|
# removing junk files if specicified
|
38
36
|
helperprocess.remove_files(args.get('removeFiles', True))
|
39
37
|
|
40
|
-
# do we want
|
38
|
+
# do we want to run a test
|
41
39
|
if args.get('com', False) == 'MetaCode':
|
42
40
|
print('Testing the Python Package') # TODO add in python package import
|
43
41
|
# Read data from CSV file
|
@@ -64,6 +62,7 @@ def main(args, **kwargs):
|
|
64
62
|
print('the dataset is', dataset)
|
65
63
|
manual_fit_spec = args.get('Manual_Fit', None)
|
66
64
|
if dataset == 1:
|
65
|
+
print('Stage 5 A Short.')
|
67
66
|
df = pd.read_csv('./data/1848.csv') # read in the data
|
68
67
|
y_df = df[['FSI']] # only consider crashes
|
69
68
|
y_df.rename(columns={"FSI": "Y"}, inplace=True)
|
@@ -71,6 +70,7 @@ def main(args, **kwargs):
|
|
71
70
|
x_df = helperprocess.as_wide_factor(x_df)
|
72
71
|
|
73
72
|
elif dataset == 3:
|
73
|
+
print('Stage 5 A Data Complete.')
|
74
74
|
x_df = pd.read_csv('./data/Stage5A_1848_All_Initial_Columns.csv') # drop the ID columns
|
75
75
|
drop_these = ['Id', 'ID', 'old', 'G_N']
|
76
76
|
for i in drop_these:
|
@@ -109,6 +109,16 @@ def main(args, **kwargs):
|
|
109
109
|
'transformations': ['no', 'no', 'no', 'no', 'no', 'no', 'no', 'no'],
|
110
110
|
'dispersion': 1
|
111
111
|
}
|
112
|
+
print('overriding this delete, just want to test the NB')
|
113
|
+
manual_fit_spec = {
|
114
|
+
'fixed_terms': ['const'],
|
115
|
+
'rdm_terms': [],
|
116
|
+
'rdm_cor_terms': [],
|
117
|
+
'grouped_terms': [],
|
118
|
+
'hetro_in_means': [],
|
119
|
+
'transformations': ['no'],
|
120
|
+
'dispersion': 1
|
121
|
+
}
|
112
122
|
|
113
123
|
df = pd.read_csv('./data/Ex-16-3.csv') # read in the data
|
114
124
|
y_df = df[['FREQ']].copy() # only consider crashes
|
@@ -118,7 +128,7 @@ def main(args, **kwargs):
|
|
118
128
|
x_df['Offset'] = np.log(1 + x_df['AADT'] * x_df['LENGTH'] * 365 / 100000000)
|
119
129
|
x_df = x_df.drop(columns=['AADT', 'LENGTH'])
|
120
130
|
|
121
|
-
if args
|
131
|
+
if args.get('seperate_out_factors', 0):
|
122
132
|
|
123
133
|
x_df = helperprocess.as_wide_factor(x_df, keep_original=0,
|
124
134
|
exclude=['INTECHAG', 'CURVES', 'MIMEDSH', 'MXMEDSH', 'SPEED'])
|
@@ -159,7 +169,28 @@ def main(args, **kwargs):
|
|
159
169
|
'transformations': ['no', 'no', 'no', 'no'],
|
160
170
|
'dispersion': 0
|
161
171
|
}
|
162
|
-
|
172
|
+
elif dataset == 8:
|
173
|
+
print('Main County')
|
174
|
+
df = pd.read_csv('./data/rural_int.csv') # read in the data
|
175
|
+
y_df = df[['crashes']].copy() # only consider crashes
|
176
|
+
y_df.rename(columns={"crashes": "Y"}, inplace=True)
|
177
|
+
panels = df['orig_ID']
|
178
|
+
try:
|
179
|
+
x_df = df.drop(columns=['crashes', 'year', 'orig_ID',
|
180
|
+
'jurisdiction', 'town', 'maint_region', 'weather_station', 'dummy_winter_2']) # was dropped postcode
|
181
|
+
print('dropping for test')
|
182
|
+
x_df = x_df.drop(columns=['month', 'inj.fat', 'PDO'])
|
183
|
+
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
184
|
+
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
185
|
+
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
186
|
+
except Exception as e:
|
187
|
+
print(e)
|
188
|
+
x_df = df.drop(columns=['Y']) # was dropped postcode
|
189
|
+
|
190
|
+
group_grab = x_df['county']
|
191
|
+
x_df = x_df.drop(columns =['county'])
|
192
|
+
x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
|
193
|
+
x_df['county'] = group_grab
|
163
194
|
|
164
195
|
elif dataset == 9:
|
165
196
|
df = pd.read_csv('panel_synth.csv') # read in the data
|
@@ -186,64 +217,7 @@ def main(args, **kwargs):
|
|
186
217
|
|
187
218
|
x_df = helperprocess.interactions(x_df, keep)
|
188
219
|
else: # the dataset has been selected in the program as something else
|
189
|
-
|
190
|
-
from tkinter.filedialog import askopenfilename
|
191
|
-
|
192
|
-
ASK_ANALALYST = 0
|
193
|
-
if ASK_ANALALYST:
|
194
|
-
root = Tk()
|
195
|
-
root.withdraw()
|
196
|
-
# Prompt the user to select a directory
|
197
|
-
directory = askopenfilename(title="Select File For Analysis")
|
198
|
-
skip_lines = int(input("Select the number of lines to skip, (numeric): "))
|
199
|
-
df = pd.read_csv(directory, skip_rows=skip_lines)
|
200
|
-
else:
|
201
|
-
df = pd.read_csv('data/rqc40516_MotorcycleQUT_engineer_crash.csv', skiprows=5)
|
202
|
-
df['CRASH_SPEED_LIMIT'] = df['CRASH_SPEED_LIMIT'].str.replace(' km/h', '').astype(int)
|
203
|
-
|
204
|
-
# Clean data types
|
205
|
-
df = clean_data_types(df)
|
206
|
-
|
207
|
-
# Encode categorical variables
|
208
|
-
categories = ['CRASH_SEVERITY', 'CRASH_TYPE', 'CRASH_NATURE', 'CRASH_ATMOSPHERIC_CONDITION']
|
209
|
-
df = pd.get_dummies(df, columns=categories)
|
210
|
-
|
211
|
-
# Select only numeric columns
|
212
|
-
numeric_types = ['int32', 'uint8', 'bool', 'int64', 'float64']
|
213
|
-
df = df.select_dtypes(include=numeric_types)
|
214
|
-
|
215
|
-
# Check for missing values and fill with column mean
|
216
|
-
missing_values_count = df['CASUALTY_TOTAL'].isnull().sum()
|
217
|
-
df.fillna(df.mean())
|
218
|
-
|
219
|
-
# Remove unnecessary columns
|
220
|
-
df.drop(columns=['CRASH_REF_NUMBER'], inplace=True)
|
221
|
-
y = df['CASUALTY_TOTAL']
|
222
|
-
# Define columns to exclude from the analysis
|
223
|
-
EXCLUDE = [
|
224
|
-
'LONGITUDE', 'YEAR', 'DCA', 'ID', 'LATIT', 'NAME', 'SEVERITY',
|
225
|
-
"CASUALTY", "CRASH_FIN_YEAR", "CRASH_HOUR", "MOPED"
|
226
|
-
]
|
227
|
-
|
228
|
-
# Filter out excluded columns
|
229
|
-
df = df[[col for col in df.columns if not any(ex in col for ex in EXCLUDE)]]
|
230
|
-
|
231
|
-
# Prepare target variable
|
232
|
-
|
233
|
-
# Check for finite values and compute correlations
|
234
|
-
finite_check = df.apply(np.isfinite).all()
|
235
|
-
df_clean = df.loc[:, finite_check]
|
236
|
-
corr = df_clean.corr()
|
237
|
-
|
238
|
-
# Identify and remove highly correlated features
|
239
|
-
hc = findCorrelation(corr, cutoff=0.5)
|
240
|
-
trimmed_df = df_clean.drop(columns=hc)
|
241
|
-
|
242
|
-
# Feature selection
|
243
|
-
df_cleaner, fs = select_features(trimmed_df, y)
|
244
|
-
x_df = df_cleaner
|
245
|
-
y_df = y.to_frame(name="Y")
|
246
|
-
# y_df.rename(columns={"CASUALTY_TOTAL": "Y"}, inplace=True)
|
220
|
+
print('TODO add in dataset')
|
247
221
|
|
248
222
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
249
223
|
if manual_fit_spec is None:
|
@@ -251,8 +225,8 @@ def main(args, **kwargs):
|
|
251
225
|
else:
|
252
226
|
print('fitting manually')
|
253
227
|
args['Manual_Fit'] = manual_fit_spec
|
254
|
-
|
255
228
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
229
|
+
print('Maine County Dataset.')
|
256
230
|
args['group'] = 'county'
|
257
231
|
args['panels'] = 'element_ID'
|
258
232
|
args['ID'] = 'element_ID'
|
@@ -264,9 +238,9 @@ def main(args, **kwargs):
|
|
264
238
|
|
265
239
|
args['complexity_level'] = args.get('complexity_level', 6)
|
266
240
|
|
267
|
-
|
268
|
-
AnalystSpecs
|
269
|
-
args['AnalystSpecs'] = AnalystSpecs
|
241
|
+
|
242
|
+
# Initialize AnalystSpecs to None if not manually provided
|
243
|
+
args['AnalystSpecs'] = args.get('AnalystSpecs', None)
|
270
244
|
|
271
245
|
if args['algorithm'] == 'sa':
|
272
246
|
args_hyperparameters = {'alpha': float(args['temp_scale']),
|
@@ -312,7 +286,7 @@ def main(args, **kwargs):
|
|
312
286
|
|
313
287
|
|
314
288
|
elif args['algorithm'] == 'de':
|
315
|
-
# force
|
289
|
+
# force variables
|
316
290
|
args['must_include'] = args.get('force', [])
|
317
291
|
|
318
292
|
args_hyperparameters = {'_AI': args.get('_AI', 2),
|
@@ -321,7 +295,6 @@ def main(args, **kwargs):
|
|
321
295
|
, '_pop_size': int(args['_hms']), 'instance_number': int(args['line'])
|
322
296
|
, 'Manual_Fit': args['Manual_Fit'],
|
323
297
|
'MP': int(args['MP'])
|
324
|
-
|
325
298
|
}
|
326
299
|
|
327
300
|
args_hyperparameters = dict(args_hyperparameters)
|
@@ -347,7 +320,7 @@ if __name__ == '__main__':
|
|
347
320
|
alg_parser.print_help()
|
348
321
|
parser = argparse.ArgumentParser(prog='main',
|
349
322
|
epilog=main.__doc__,
|
350
|
-
formatter_class=argparse.RawDescriptionHelpFormatter)
|
323
|
+
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
351
324
|
|
352
325
|
parser.add_argument('-line', type=int, default=44,
|
353
326
|
help='line to read in csv to pass in argument')
|
@@ -362,6 +335,7 @@ if __name__ == '__main__':
|
|
362
335
|
break
|
363
336
|
line_number_obs += 1
|
364
337
|
args = dict(args)
|
338
|
+
|
365
339
|
for key, value in args.items():
|
366
340
|
try:
|
367
341
|
# Attempt to parse the string value to a Python literal if value is a string.
|
@@ -378,6 +352,11 @@ if __name__ == '__main__':
|
|
378
352
|
if "-algorithm" in action.option_strings:
|
379
353
|
parser._optionals._actions[i].help = "optimization algorithm"
|
380
354
|
|
355
|
+
override = True
|
356
|
+
if override:
|
357
|
+
print('todo turn off, in testing phase')
|
358
|
+
parser.add_argument('-problem_number', default='4')
|
359
|
+
print('did it make it')
|
381
360
|
if 'algorithm' not in args:
|
382
361
|
parser.add_argument('-algorithm', type=str, default='hs',
|
383
362
|
help='optimization algorithm')
|
@@ -390,7 +369,7 @@ if __name__ == '__main__':
|
|
390
369
|
' we want to split the data for processing')
|
391
370
|
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
|
392
371
|
|
393
|
-
else: # DIDN"T SPECIFY LINES TRY EACH ONE
|
372
|
+
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
394
373
|
parser.add_argument('-com', type=str, default='MetaCode',
|
395
374
|
help='line to read csv')
|
396
375
|
|
@@ -398,7 +377,7 @@ if __name__ == '__main__':
|
|
398
377
|
parser.print_help()
|
399
378
|
args = vars(parser.parse_args())
|
400
379
|
print(type(args))
|
401
|
-
# TODO add in chi 2 and df in estimation and compare degrees of freedom
|
380
|
+
# TODO add in chi 2 and df in estimation and compare degrees of freedom this needs to be done in solution
|
402
381
|
|
403
382
|
# Print the args.
|
404
383
|
profiler = cProfile.Profile()
|
@@ -15,8 +15,14 @@ from datetime import datetime
|
|
15
15
|
import numpy as np
|
16
16
|
import pandas as pd
|
17
17
|
|
18
|
-
|
19
|
-
from .
|
18
|
+
try:
|
19
|
+
from .pareto_file import Pareto, Solution
|
20
|
+
from .solution import ObjectiveFunction
|
21
|
+
except:
|
22
|
+
print('Exception relative import')
|
23
|
+
from metacountregressor.pareto_file import Pareto, Solution
|
24
|
+
from metacountregressor.solution import ObjectiveFunction
|
25
|
+
|
20
26
|
|
21
27
|
HarmonySearchResults = namedtuple('HarmonySearchResults',
|
22
28
|
['elapsed_time', 'best_harmony', 'best_fitness', 'harmony_memories',
|
@@ -32,7 +38,7 @@ DifferentialEvolutionMulti = namedtuple('DifferentialEvolutionMulti',
|
|
32
38
|
['elapsed_time', 'best_solutions', 'population_solutions'])
|
33
39
|
|
34
40
|
|
35
|
-
#helper function to plot the bic
|
41
|
+
# helper function to plot the bic
|
36
42
|
def _plot(x, y, z, xlabel=None, ylabel=None, zlabel=None, filename=None):
|
37
43
|
from matplotlib import pyplot as plt
|
38
44
|
|
@@ -54,7 +60,8 @@ def _plot(x, y, z, xlabel=None, ylabel=None, zlabel=None, filename=None):
|
|
54
60
|
plt.savefig('bic.png')
|
55
61
|
plt.show()
|
56
62
|
|
57
|
-
|
63
|
+
|
64
|
+
# helper function to grab dictionary means
|
58
65
|
def dict_mean(dict_list,
|
59
66
|
ignore=None):
|
60
67
|
if ignore is None:
|
@@ -204,8 +211,7 @@ def different_evolution(objective_function, initial_slns=None, **kwargs):
|
|
204
211
|
|
205
212
|
|
206
213
|
def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
207
|
-
|
208
|
-
raise Exception
|
214
|
+
|
209
215
|
start = datetime.now()
|
210
216
|
|
211
217
|
man = None
|
@@ -220,11 +226,8 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
|
220
226
|
de = Mutlithreaded_Meta(objective_function, **kwargs)
|
221
227
|
best, pare = de.run_mp(initial_slns=initial_slns, mod_init=man)
|
222
228
|
else:
|
223
|
-
|
224
229
|
print('Not Multi Threaded')
|
225
|
-
|
226
230
|
de = DifferentialEvolution(objective_function, **kwargs)
|
227
|
-
|
228
231
|
best, pare = de.differential_evolution_run(initial_slns=initial_slns, mod_init=man)
|
229
232
|
|
230
233
|
end = datetime.now()
|
@@ -393,12 +396,10 @@ class DifferentialEvolution(object):
|
|
393
396
|
"""
|
394
397
|
|
395
398
|
def __init__(self, objective_function, **kwargs):
|
396
|
-
|
397
|
-
if not isinstance(objective_function, ObjectiveFunction):
|
398
|
-
raise TypeError
|
399
399
|
self._obj_fun = objective_function
|
400
400
|
if self._obj_fun._obj_1 is None:
|
401
|
-
|
401
|
+
print('no objective found, automatically selecting BIC')
|
402
|
+
self._obj_fun._obj_1 = 'bic'
|
402
403
|
|
403
404
|
self._pop_size = kwargs.get('_pop_size', 20)
|
404
405
|
if not isinstance(self._pop_size, int):
|
@@ -406,7 +407,7 @@ class DifferentialEvolution(object):
|
|
406
407
|
elif self._pop_size <= 3:
|
407
408
|
raise ValueError("_pop_size must be greater than 4")
|
408
409
|
|
409
|
-
self.F = kwargs.get('_AI', 2) #
|
410
|
+
self.F = kwargs.get('_AI', 2) # mutation scale
|
410
411
|
self.iter = kwargs.get('_max_iter', 10000)
|
411
412
|
self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
|
412
413
|
self.instance_number = str(kwargs.get('instance_number', 1))
|
@@ -415,12 +416,9 @@ class DifferentialEvolution(object):
|
|
415
416
|
self._population = list()
|
416
417
|
self.it_process = 1
|
417
418
|
if objective_function.is_multi:
|
418
|
-
|
419
419
|
self.obj_1 = objective_function._obj_1
|
420
420
|
self.obj_2 = objective_function._obj_2
|
421
|
-
|
422
421
|
self.pf = Pareto(self.obj_1, self.obj_2, True)
|
423
|
-
|
424
422
|
self._pareto_population = list()
|
425
423
|
else:
|
426
424
|
self.obj_1 = objective_function._obj_1
|
@@ -555,7 +553,6 @@ class DifferentialEvolution(object):
|
|
555
553
|
average_iteration = 0
|
556
554
|
iterations_without_improvement = 0
|
557
555
|
|
558
|
-
|
559
556
|
start_time = datetime.now()
|
560
557
|
if self._obj_fun.use_random_seed():
|
561
558
|
self._obj_fun.set_random_seed()
|
@@ -949,10 +946,9 @@ class SimulatedAnnealing(object):
|
|
949
946
|
output_step.append(a)
|
950
947
|
output_energy.append(b)
|
951
948
|
output_best_energy.append(c)
|
952
|
-
|
953
949
|
|
954
|
-
return {'elapsed_time': elapsed_time, 'Iteration': iteration} #TODO make this reachavble
|
955
|
-
#return output_step, output_energy, output_best_energy, self.best_energy, self.best_struct
|
950
|
+
return {'elapsed_time': elapsed_time, 'Iteration': iteration} # TODO make this reachavble
|
951
|
+
# return output_step, output_energy, output_best_energy, self.best_energy, self.best_struct
|
956
952
|
|
957
953
|
def _get_neighbour(self, current, mutations=None):
|
958
954
|
neighbour = copy.deepcopy(current)
|
@@ -963,7 +959,6 @@ class SimulatedAnnealing(object):
|
|
963
959
|
|
964
960
|
# number of paramaters in the model #TODO get the last value if 2
|
965
961
|
|
966
|
-
|
967
962
|
num_of_changeablePARMs = 0
|
968
963
|
|
969
964
|
self._obj_fun.nbr_routine(current)
|
@@ -1242,7 +1237,8 @@ class HarmonySearch(object):
|
|
1242
1237
|
Initialize HS with the specified objective function. Note that this objective function must implement ObjectiveFunctionInterface.
|
1243
1238
|
"""
|
1244
1239
|
self._obj_fun = objective_function
|
1245
|
-
|
1240
|
+
# for printing basics metrics
|
1241
|
+
self.print_verbose = True
|
1246
1242
|
# harmony_memory stores the best hms harmonies
|
1247
1243
|
self._harmony_memory = list()
|
1248
1244
|
# harmony_history stores all hms harmonies every nth improvisations (i.e., one 'generation')
|
@@ -1294,7 +1290,7 @@ class HarmonySearch(object):
|
|
1294
1290
|
def does_it_appear(self, new):
|
1295
1291
|
for d in self._harmony_memory:
|
1296
1292
|
if self.mixed_list_chescker(d['layout'], new):
|
1297
|
-
#print('same sln appears in population')
|
1293
|
+
# print('same sln appears in population')
|
1298
1294
|
return True
|
1299
1295
|
|
1300
1296
|
return False
|
@@ -1314,6 +1310,7 @@ class HarmonySearch(object):
|
|
1314
1310
|
self._obj_fun.set_random_seed()
|
1315
1311
|
# fill harmony_memory using random parameter values by default, but with initial_harmonies if provided
|
1316
1312
|
self._initialize(initial_harmonies, mod_init)
|
1313
|
+
if self.print_verbose: print('Initialization complete')
|
1317
1314
|
if self.pf.get_objective_is_multi():
|
1318
1315
|
self._pareto_harmony_memory = self.pf.non_dominant_sorting(self._harmony_memory)
|
1319
1316
|
generation_best = self._pareto_harmony_memory[0]
|
@@ -1333,6 +1330,9 @@ class HarmonySearch(object):
|
|
1333
1330
|
iterations_without_improvement < self._obj_fun.get_termination_iter()):
|
1334
1331
|
# generate new harmony
|
1335
1332
|
elapsed_time = (datetime.now() - start_time).total_seconds()
|
1333
|
+
if self.print_verbose:
|
1334
|
+
print('Time: ', elapsed_time)
|
1335
|
+
print('Improvisation: ', num_imp)
|
1336
1336
|
harmony = list()
|
1337
1337
|
|
1338
1338
|
for i in range(0, self._obj_fun.get_num_parameters()):
|
@@ -1374,6 +1374,7 @@ class HarmonySearch(object):
|
|
1374
1374
|
self.pf.get_objective_is_multi())
|
1375
1375
|
num_imp += 1
|
1376
1376
|
if iterations_without_improvement == 0: # if there is any kind of improvement updae the logs
|
1377
|
+
if self.print_verbose: print('improvement found at improvisation', num_imp)
|
1377
1378
|
if self.pf.get_objective_is_multi():
|
1378
1379
|
try:
|
1379
1380
|
logger(num_imp, fitness, self._harmony_memory, True, self.get_instance_name(),
|