metacountregressor 0.1.73__py3-none-any.whl → 0.1.78__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +115 -0
- metacountregressor/main.py +41 -69
- metacountregressor/metaheuristics.py +25 -24
- metacountregressor/solution.py +189 -628
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/RECORD +10 -9
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.73.dist-info → metacountregressor-0.1.78.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
class DataProcessor:
|
8
|
+
def __init__(self, x_data, y_data, kwargs):
|
9
|
+
self._obj_1 = kwargs.get('_obj_1')
|
10
|
+
self._obj_2 = kwargs.get('_obj_2')
|
11
|
+
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
12
|
+
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
13
|
+
self.is_multi = self.test_percentage != 0
|
14
|
+
self._x_data = x_data
|
15
|
+
self._y_data = y_data
|
16
|
+
self._process_data(kwargs)
|
17
|
+
|
18
|
+
def _process_data(self, kwargs):
|
19
|
+
if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
|
20
|
+
self._handle_special_conditions(kwargs)
|
21
|
+
else:
|
22
|
+
self._standard_data_partition()
|
23
|
+
|
24
|
+
self._characteristics_names = list(self._x_data.columns)
|
25
|
+
self._max_group_all_means = 1
|
26
|
+
self._exclude_this_test = [4]
|
27
|
+
|
28
|
+
def _handle_special_conditions(self, kwargs):
|
29
|
+
if 'panels' in kwargs:
|
30
|
+
self._process_panels_data(kwargs)
|
31
|
+
else:
|
32
|
+
self._standard_data_partition()
|
33
|
+
|
34
|
+
def _process_panels_data(self, kwargs):
|
35
|
+
group_key = kwargs['group']
|
36
|
+
panels_key = kwargs['panels']
|
37
|
+
|
38
|
+
# Process groups and panels
|
39
|
+
self._x_data[group_key] = self._x_data[group_key].astype('category').cat.codes
|
40
|
+
try:
|
41
|
+
self._x_data[panels_key] = self._x_data[panels_key].rank(method='dense').astype(int)
|
42
|
+
self._x_data[panels_key] -= self._x_data[panels_key].min() - 1
|
43
|
+
except KeyError:
|
44
|
+
pass
|
45
|
+
|
46
|
+
# Create training and test datasets
|
47
|
+
unique_ids = np.unique(self._x_data[panels_key])
|
48
|
+
training_size = int((1 - self.test_percentage - self.val_percentage) * len(unique_ids))
|
49
|
+
training_ids = np.random.choice(unique_ids, training_size, replace=False)
|
50
|
+
|
51
|
+
train_idx = self._x_data.index[self._x_data[panels_key].isin(training_ids)]
|
52
|
+
test_idx = self._x_data.index[~self._x_data[panels_key].isin(training_ids)]
|
53
|
+
|
54
|
+
self._create_datasets(train_idx, test_idx)
|
55
|
+
|
56
|
+
def _standard_data_partition(self):
|
57
|
+
total_samples = len(self._x_data)
|
58
|
+
training_size = int((1 - self.test_percentage - self.val_percentage) * total_samples)
|
59
|
+
training_indices = np.random.choice(total_samples, training_size, replace=False)
|
60
|
+
|
61
|
+
train_idx = np.array([i for i in range(total_samples) if i in training_indices])
|
62
|
+
test_idx = np.array([i for i in range(total_samples) if i not in training_indices])
|
63
|
+
|
64
|
+
self._create_datasets(train_idx, test_idx)
|
65
|
+
|
66
|
+
def _create_datasets(self, train_idx, test_idx):
|
67
|
+
self.df_train = self._x_data.loc[train_idx, :]
|
68
|
+
self.df_test = self._x_data.loc[test_idx, :]
|
69
|
+
self.y_train = self._y_data.loc[train_idx, :]
|
70
|
+
self.y_test = self._y_data.loc[test_idx, :]
|
71
|
+
|
72
|
+
self._x_data_test = self.df_test.copy()
|
73
|
+
self._y_data_test = self.y_test.astype('float').copy()
|
74
|
+
self._x_data = self.df_train.copy()
|
75
|
+
self._y_data = self.y_train.astype('float').copy()
|
76
|
+
|
77
|
+
# Handle different shapes
|
78
|
+
if self._x_data.ndim == 2: # Typical DataFrame
|
79
|
+
self._samples, self._characteristics = self._x_data.shape
|
80
|
+
self._panels = None
|
81
|
+
elif self._x_data.ndim == 3: # 3D structure, e.g., Panel or similar
|
82
|
+
self._samples, self._panels, self._characteristics = self._x_data.shape
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
@@ -5,6 +5,121 @@ import matplotlib.pyplot as plt
|
|
5
5
|
|
6
6
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
7
7
|
|
8
|
+
##Select the best Features Based on RF
|
9
|
+
def select_features(X_train, y_train, n_f=16):
|
10
|
+
try:
|
11
|
+
from sklearn.feature_selection import SelectKBest
|
12
|
+
from sklearn.feature_selection import f_regression
|
13
|
+
feature_names = X_train.columns
|
14
|
+
# configure to select all features
|
15
|
+
fs = SelectKBest(score_func=f_regression, k=16)
|
16
|
+
|
17
|
+
# learn relationship from training data
|
18
|
+
fs.fit(X_train, y_train)
|
19
|
+
|
20
|
+
mask = fs.get_support() # Boolean array of selected features
|
21
|
+
selected_features = [feature for bool, feature in zip(mask, feature_names) if bool]
|
22
|
+
X_train = X_train[selected_features]
|
23
|
+
except:
|
24
|
+
print('import error, not performing feature selection')
|
25
|
+
fs = X_train.columns #TODO check if this is actually getting the names
|
26
|
+
|
27
|
+
return X_train, fs
|
28
|
+
|
29
|
+
|
30
|
+
#Cutts off correlated data
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
def findCorrelation(corr, cutoff=0.9, exact=None): """
|
37
|
+
This function is the Python implementation of the R function
|
38
|
+
`findCorrelation()`.
|
39
|
+
|
40
|
+
Relies on numpy and pandas, so must have them pre-installed.
|
41
|
+
|
42
|
+
It searches through a correlation matrix and returns a list of column names
|
43
|
+
to remove to reduce pairwise correlations.
|
44
|
+
|
45
|
+
For the documentation of the R function, see
|
46
|
+
https://www.rdocumentation.org/packages/caret/topics/findCorrelation
|
47
|
+
and for the source code of `findCorrelation()`, see
|
48
|
+
https://github.com/topepo/caret/blob/master/pkg/caret/R/findCorrelation.R
|
49
|
+
|
50
|
+
-----------------------------------------------------------------------------
|
51
|
+
|
52
|
+
Parameters:
|
53
|
+
-----------
|
54
|
+
corr: pandas dataframe.
|
55
|
+
A correlation matrix as a pandas dataframe.
|
56
|
+
cutoff: float, default: 0.9.
|
57
|
+
A numeric value for the pairwise absolute correlation cutoff
|
58
|
+
exact: bool, default: None
|
59
|
+
A boolean value that determines whether the average correlations be
|
60
|
+
recomputed at each step
|
61
|
+
-----------------------------------------------------------------------------
|
62
|
+
Returns:
|
63
|
+
--------
|
64
|
+
list of column names
|
65
|
+
-----------------------------------------------------------------------------
|
66
|
+
Example:
|
67
|
+
--------
|
68
|
+
R1 = pd.DataFrame({
|
69
|
+
'x1': [1.0, 0.86, 0.56, 0.32, 0.85],
|
70
|
+
'x2': [0.86, 1.0, 0.01, 0.74, 0.32],
|
71
|
+
'x3': [0.56, 0.01, 1.0, 0.65, 0.91],
|
72
|
+
'x4': [0.32, 0.74, 0.65, 1.0, 0.36],
|
73
|
+
'x5': [0.85, 0.32, 0.91, 0.36, 1.0]
|
74
|
+
}, index=['x1', 'x2', 'x3', 'x4', 'x5'])
|
75
|
+
|
76
|
+
findCorrelation(R1, cutoff=0.6, exact=False) # ['x4', 'x5', 'x1', 'x3']
|
77
|
+
findCorrelation(R1, cutoff=0.6, exact=True) # ['x1', 'x5', 'x4']
|
78
|
+
"""
|
79
|
+
|
80
|
+
def _findCorrelation_fast(corr, avg, cutoff):
|
81
|
+
|
82
|
+
combsAboveCutoff = corr.where(lambda x: (np.tril(x) == 0) & (x > cutoff)).stack().index
|
83
|
+
|
84
|
+
rowsToCheck = combsAboveCutoff.get_level_values(0)
|
85
|
+
colsToCheck = combsAboveCutoff.get_level_values(1)
|
86
|
+
|
87
|
+
msk = avg[colsToCheck] > avg[rowsToCheck].values
|
88
|
+
deletecol = pd.unique(np.r_[colsToCheck[msk], rowsToCheck[~msk]]).tolist()
|
89
|
+
|
90
|
+
return deletecol
|
91
|
+
|
92
|
+
def _findCorrelation_exact(corr, avg, cutoff):
|
93
|
+
|
94
|
+
x = corr.loc[(*[avg.sort_values(ascending=False).index] * 2,)]
|
95
|
+
|
96
|
+
if (x.dtypes.values[:, None] == ['int64', 'int32', 'int16', 'int8']).any():
|
97
|
+
x = x.astype(float)
|
98
|
+
|
99
|
+
x.values[(*[np.arange(len(x))] * 2,)] = np.nan
|
100
|
+
|
101
|
+
deletecol = []
|
102
|
+
for ix, i in enumerate(x.columns[:-1]):
|
103
|
+
for j in x.columns[ix + 1:]:
|
104
|
+
if x.loc[i, j] > cutoff:
|
105
|
+
if x[i].mean() > x[j].mean():
|
106
|
+
deletecol.append(i)
|
107
|
+
x.loc[i] = x[i] = np.nan
|
108
|
+
else:
|
109
|
+
deletecol.append(j)
|
110
|
+
x.loc[j] = x[j] = np.nan
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
"""Funtion to Convert Data to Binaries """
|
116
|
+
def clean_data_types(df):
|
117
|
+
for col in df.columns:
|
118
|
+
if df[col].dtype == 'object':
|
119
|
+
# Attempt to convert the column to numeric type
|
120
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
121
|
+
return df
|
122
|
+
|
8
123
|
|
9
124
|
def drop_correlations(x_df, percentage=0.85):
|
10
125
|
cor_matrix = x_df.corr().abs()
|
metacountregressor/main.py
CHANGED
@@ -9,14 +9,12 @@ import numpy as np
|
|
9
9
|
import pandas as pd
|
10
10
|
from pandas import DataFrame
|
11
11
|
from pandas.io.parsers import TextFileReader
|
12
|
-
|
13
12
|
import helperprocess
|
14
13
|
from metaheuristics import (differential_evolution,
|
15
14
|
harmony_search,
|
16
15
|
simulated_annealing)
|
17
16
|
from solution import ObjectiveFunction
|
18
17
|
|
19
|
-
from test_motor import *
|
20
18
|
|
21
19
|
warnings.simplefilter("ignore")
|
22
20
|
|
@@ -37,7 +35,7 @@ def main(args, **kwargs):
|
|
37
35
|
# removing junk files if specicified
|
38
36
|
helperprocess.remove_files(args.get('removeFiles', True))
|
39
37
|
|
40
|
-
# do we want
|
38
|
+
# do we want to run a test
|
41
39
|
if args.get('com', False) == 'MetaCode':
|
42
40
|
print('Testing the Python Package') # TODO add in python package import
|
43
41
|
# Read data from CSV file
|
@@ -64,6 +62,7 @@ def main(args, **kwargs):
|
|
64
62
|
print('the dataset is', dataset)
|
65
63
|
manual_fit_spec = args.get('Manual_Fit', None)
|
66
64
|
if dataset == 1:
|
65
|
+
print('Stage 5 A Short.')
|
67
66
|
df = pd.read_csv('./data/1848.csv') # read in the data
|
68
67
|
y_df = df[['FSI']] # only consider crashes
|
69
68
|
y_df.rename(columns={"FSI": "Y"}, inplace=True)
|
@@ -71,6 +70,7 @@ def main(args, **kwargs):
|
|
71
70
|
x_df = helperprocess.as_wide_factor(x_df)
|
72
71
|
|
73
72
|
elif dataset == 3:
|
73
|
+
print('Stage 5 A Data Complete.')
|
74
74
|
x_df = pd.read_csv('./data/Stage5A_1848_All_Initial_Columns.csv') # drop the ID columns
|
75
75
|
drop_these = ['Id', 'ID', 'old', 'G_N']
|
76
76
|
for i in drop_these:
|
@@ -159,7 +159,28 @@ def main(args, **kwargs):
|
|
159
159
|
'transformations': ['no', 'no', 'no', 'no'],
|
160
160
|
'dispersion': 0
|
161
161
|
}
|
162
|
-
|
162
|
+
elif dataset == 8:
|
163
|
+
print('Main County')
|
164
|
+
df = pd.read_csv('./data/rural_int.csv') # read in the data
|
165
|
+
y_df = df[['crashes']].copy() # only consider crashes
|
166
|
+
y_df.rename(columns={"crashes": "Y"}, inplace=True)
|
167
|
+
panels = df['orig_ID']
|
168
|
+
try:
|
169
|
+
x_df = df.drop(columns=['crashes', 'year', 'orig_ID',
|
170
|
+
'jurisdiction', 'town', 'maint_region', 'weather_station', 'dummy_winter_2']) # was dropped postcode
|
171
|
+
print('dropping for test')
|
172
|
+
x_df = x_df.drop(columns=['month', 'inj.fat', 'PDO'])
|
173
|
+
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
174
|
+
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
175
|
+
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
176
|
+
|
177
|
+
except:
|
178
|
+
x_df = df.drop(columns=['Y']) # was dropped postcode
|
179
|
+
|
180
|
+
group_grab = x_df['county']
|
181
|
+
x_df = x_df.drop(columns =['county'])
|
182
|
+
x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
|
183
|
+
x_df['county'] = group_grab
|
163
184
|
|
164
185
|
elif dataset == 9:
|
165
186
|
df = pd.read_csv('panel_synth.csv') # read in the data
|
@@ -186,64 +207,7 @@ def main(args, **kwargs):
|
|
186
207
|
|
187
208
|
x_df = helperprocess.interactions(x_df, keep)
|
188
209
|
else: # the dataset has been selected in the program as something else
|
189
|
-
|
190
|
-
from tkinter.filedialog import askopenfilename
|
191
|
-
|
192
|
-
ASK_ANALALYST = 0
|
193
|
-
if ASK_ANALALYST:
|
194
|
-
root = Tk()
|
195
|
-
root.withdraw()
|
196
|
-
# Prompt the user to select a directory
|
197
|
-
directory = askopenfilename(title="Select File For Analysis")
|
198
|
-
skip_lines = int(input("Select the number of lines to skip, (numeric): "))
|
199
|
-
df = pd.read_csv(directory, skip_rows=skip_lines)
|
200
|
-
else:
|
201
|
-
df = pd.read_csv('data/rqc40516_MotorcycleQUT_engineer_crash.csv', skiprows=5)
|
202
|
-
df['CRASH_SPEED_LIMIT'] = df['CRASH_SPEED_LIMIT'].str.replace(' km/h', '').astype(int)
|
203
|
-
|
204
|
-
# Clean data types
|
205
|
-
df = clean_data_types(df)
|
206
|
-
|
207
|
-
# Encode categorical variables
|
208
|
-
categories = ['CRASH_SEVERITY', 'CRASH_TYPE', 'CRASH_NATURE', 'CRASH_ATMOSPHERIC_CONDITION']
|
209
|
-
df = pd.get_dummies(df, columns=categories)
|
210
|
-
|
211
|
-
# Select only numeric columns
|
212
|
-
numeric_types = ['int32', 'uint8', 'bool', 'int64', 'float64']
|
213
|
-
df = df.select_dtypes(include=numeric_types)
|
214
|
-
|
215
|
-
# Check for missing values and fill with column mean
|
216
|
-
missing_values_count = df['CASUALTY_TOTAL'].isnull().sum()
|
217
|
-
df.fillna(df.mean())
|
218
|
-
|
219
|
-
# Remove unnecessary columns
|
220
|
-
df.drop(columns=['CRASH_REF_NUMBER'], inplace=True)
|
221
|
-
y = df['CASUALTY_TOTAL']
|
222
|
-
# Define columns to exclude from the analysis
|
223
|
-
EXCLUDE = [
|
224
|
-
'LONGITUDE', 'YEAR', 'DCA', 'ID', 'LATIT', 'NAME', 'SEVERITY',
|
225
|
-
"CASUALTY", "CRASH_FIN_YEAR", "CRASH_HOUR", "MOPED"
|
226
|
-
]
|
227
|
-
|
228
|
-
# Filter out excluded columns
|
229
|
-
df = df[[col for col in df.columns if not any(ex in col for ex in EXCLUDE)]]
|
230
|
-
|
231
|
-
# Prepare target variable
|
232
|
-
|
233
|
-
# Check for finite values and compute correlations
|
234
|
-
finite_check = df.apply(np.isfinite).all()
|
235
|
-
df_clean = df.loc[:, finite_check]
|
236
|
-
corr = df_clean.corr()
|
237
|
-
|
238
|
-
# Identify and remove highly correlated features
|
239
|
-
hc = findCorrelation(corr, cutoff=0.5)
|
240
|
-
trimmed_df = df_clean.drop(columns=hc)
|
241
|
-
|
242
|
-
# Feature selection
|
243
|
-
df_cleaner, fs = select_features(trimmed_df, y)
|
244
|
-
x_df = df_cleaner
|
245
|
-
y_df = y.to_frame(name="Y")
|
246
|
-
# y_df.rename(columns={"CASUALTY_TOTAL": "Y"}, inplace=True)
|
210
|
+
print('TODO add in dataset')
|
247
211
|
|
248
212
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
249
213
|
if manual_fit_spec is None:
|
@@ -253,6 +217,7 @@ def main(args, **kwargs):
|
|
253
217
|
args['Manual_Fit'] = manual_fit_spec
|
254
218
|
|
255
219
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
220
|
+
print('Maine County Dataset.')
|
256
221
|
args['group'] = 'county'
|
257
222
|
args['panels'] = 'element_ID'
|
258
223
|
args['ID'] = 'element_ID'
|
@@ -264,9 +229,9 @@ def main(args, **kwargs):
|
|
264
229
|
|
265
230
|
args['complexity_level'] = args.get('complexity_level', 6)
|
266
231
|
|
267
|
-
|
268
|
-
AnalystSpecs
|
269
|
-
args['AnalystSpecs'] = AnalystSpecs
|
232
|
+
|
233
|
+
# Initialize AnalystSpecs to None if not manually provided
|
234
|
+
args['AnalystSpecs'] = args.get('AnalystSpecs', None)
|
270
235
|
|
271
236
|
if args['algorithm'] == 'sa':
|
272
237
|
args_hyperparameters = {'alpha': float(args['temp_scale']),
|
@@ -312,7 +277,7 @@ def main(args, **kwargs):
|
|
312
277
|
|
313
278
|
|
314
279
|
elif args['algorithm'] == 'de':
|
315
|
-
# force
|
280
|
+
# force variables
|
316
281
|
args['must_include'] = args.get('force', [])
|
317
282
|
|
318
283
|
args_hyperparameters = {'_AI': args.get('_AI', 2),
|
@@ -321,7 +286,6 @@ def main(args, **kwargs):
|
|
321
286
|
, '_pop_size': int(args['_hms']), 'instance_number': int(args['line'])
|
322
287
|
, 'Manual_Fit': args['Manual_Fit'],
|
323
288
|
'MP': int(args['MP'])
|
324
|
-
|
325
289
|
}
|
326
290
|
|
327
291
|
args_hyperparameters = dict(args_hyperparameters)
|
@@ -347,7 +311,7 @@ if __name__ == '__main__':
|
|
347
311
|
alg_parser.print_help()
|
348
312
|
parser = argparse.ArgumentParser(prog='main',
|
349
313
|
epilog=main.__doc__,
|
350
|
-
formatter_class=argparse.RawDescriptionHelpFormatter)
|
314
|
+
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
351
315
|
|
352
316
|
parser.add_argument('-line', type=int, default=44,
|
353
317
|
help='line to read in csv to pass in argument')
|
@@ -362,6 +326,7 @@ if __name__ == '__main__':
|
|
362
326
|
break
|
363
327
|
line_number_obs += 1
|
364
328
|
args = dict(args)
|
329
|
+
|
365
330
|
for key, value in args.items():
|
366
331
|
try:
|
367
332
|
# Attempt to parse the string value to a Python literal if value is a string.
|
@@ -378,6 +343,13 @@ if __name__ == '__main__':
|
|
378
343
|
if "-algorithm" in action.option_strings:
|
379
344
|
parser._optionals._actions[i].help = "optimization algorithm"
|
380
345
|
|
346
|
+
override = True
|
347
|
+
if override:
|
348
|
+
print('todo turn off, in testing phase')
|
349
|
+
parser.add_argument('-problem_number', default='8')
|
350
|
+
print('did it make it')
|
351
|
+
|
352
|
+
|
381
353
|
if 'algorithm' not in args:
|
382
354
|
parser.add_argument('-algorithm', type=str, default='hs',
|
383
355
|
help='optimization algorithm')
|
@@ -390,7 +362,7 @@ if __name__ == '__main__':
|
|
390
362
|
' we want to split the data for processing')
|
391
363
|
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
|
392
364
|
|
393
|
-
else: # DIDN"T SPECIFY LINES TRY EACH ONE
|
365
|
+
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
394
366
|
parser.add_argument('-com', type=str, default='MetaCode',
|
395
367
|
help='line to read csv')
|
396
368
|
|
@@ -15,8 +15,14 @@ from datetime import datetime
|
|
15
15
|
import numpy as np
|
16
16
|
import pandas as pd
|
17
17
|
|
18
|
-
|
19
|
-
from .
|
18
|
+
try:
|
19
|
+
from .pareto_file import Pareto, Solution
|
20
|
+
from .solution import ObjectiveFunction
|
21
|
+
except:
|
22
|
+
print('Exception relative import')
|
23
|
+
from metacountregressor.pareto_file import Pareto, Solution
|
24
|
+
from metacountregressor.solution import ObjectiveFunction
|
25
|
+
|
20
26
|
|
21
27
|
HarmonySearchResults = namedtuple('HarmonySearchResults',
|
22
28
|
['elapsed_time', 'best_harmony', 'best_fitness', 'harmony_memories',
|
@@ -32,7 +38,7 @@ DifferentialEvolutionMulti = namedtuple('DifferentialEvolutionMulti',
|
|
32
38
|
['elapsed_time', 'best_solutions', 'population_solutions'])
|
33
39
|
|
34
40
|
|
35
|
-
#helper function to plot the bic
|
41
|
+
# helper function to plot the bic
|
36
42
|
def _plot(x, y, z, xlabel=None, ylabel=None, zlabel=None, filename=None):
|
37
43
|
from matplotlib import pyplot as plt
|
38
44
|
|
@@ -54,7 +60,8 @@ def _plot(x, y, z, xlabel=None, ylabel=None, zlabel=None, filename=None):
|
|
54
60
|
plt.savefig('bic.png')
|
55
61
|
plt.show()
|
56
62
|
|
57
|
-
|
63
|
+
|
64
|
+
# helper function to grab dictionary means
|
58
65
|
def dict_mean(dict_list,
|
59
66
|
ignore=None):
|
60
67
|
if ignore is None:
|
@@ -204,8 +211,7 @@ def different_evolution(objective_function, initial_slns=None, **kwargs):
|
|
204
211
|
|
205
212
|
|
206
213
|
def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
207
|
-
|
208
|
-
raise Exception
|
214
|
+
|
209
215
|
start = datetime.now()
|
210
216
|
|
211
217
|
man = None
|
@@ -220,11 +226,8 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
|
220
226
|
de = Mutlithreaded_Meta(objective_function, **kwargs)
|
221
227
|
best, pare = de.run_mp(initial_slns=initial_slns, mod_init=man)
|
222
228
|
else:
|
223
|
-
|
224
229
|
print('Not Multi Threaded')
|
225
|
-
|
226
230
|
de = DifferentialEvolution(objective_function, **kwargs)
|
227
|
-
|
228
231
|
best, pare = de.differential_evolution_run(initial_slns=initial_slns, mod_init=man)
|
229
232
|
|
230
233
|
end = datetime.now()
|
@@ -393,12 +396,10 @@ class DifferentialEvolution(object):
|
|
393
396
|
"""
|
394
397
|
|
395
398
|
def __init__(self, objective_function, **kwargs):
|
396
|
-
|
397
|
-
if not isinstance(objective_function, ObjectiveFunction):
|
398
|
-
raise TypeError
|
399
399
|
self._obj_fun = objective_function
|
400
400
|
if self._obj_fun._obj_1 is None:
|
401
|
-
|
401
|
+
print('no objective found, automatically selecting BIC')
|
402
|
+
self._obj_fun._obj_1 = 'bic'
|
402
403
|
|
403
404
|
self._pop_size = kwargs.get('_pop_size', 20)
|
404
405
|
if not isinstance(self._pop_size, int):
|
@@ -406,7 +407,7 @@ class DifferentialEvolution(object):
|
|
406
407
|
elif self._pop_size <= 3:
|
407
408
|
raise ValueError("_pop_size must be greater than 4")
|
408
409
|
|
409
|
-
self.F = kwargs.get('_AI', 2) #
|
410
|
+
self.F = kwargs.get('_AI', 2) # mutation scale
|
410
411
|
self.iter = kwargs.get('_max_iter', 10000)
|
411
412
|
self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
|
412
413
|
self.instance_number = str(kwargs.get('instance_number', 1))
|
@@ -415,12 +416,9 @@ class DifferentialEvolution(object):
|
|
415
416
|
self._population = list()
|
416
417
|
self.it_process = 1
|
417
418
|
if objective_function.is_multi:
|
418
|
-
|
419
419
|
self.obj_1 = objective_function._obj_1
|
420
420
|
self.obj_2 = objective_function._obj_2
|
421
|
-
|
422
421
|
self.pf = Pareto(self.obj_1, self.obj_2, True)
|
423
|
-
|
424
422
|
self._pareto_population = list()
|
425
423
|
else:
|
426
424
|
self.obj_1 = objective_function._obj_1
|
@@ -555,7 +553,6 @@ class DifferentialEvolution(object):
|
|
555
553
|
average_iteration = 0
|
556
554
|
iterations_without_improvement = 0
|
557
555
|
|
558
|
-
|
559
556
|
start_time = datetime.now()
|
560
557
|
if self._obj_fun.use_random_seed():
|
561
558
|
self._obj_fun.set_random_seed()
|
@@ -949,10 +946,9 @@ class SimulatedAnnealing(object):
|
|
949
946
|
output_step.append(a)
|
950
947
|
output_energy.append(b)
|
951
948
|
output_best_energy.append(c)
|
952
|
-
|
953
949
|
|
954
|
-
return {'elapsed_time': elapsed_time, 'Iteration': iteration} #TODO make this reachavble
|
955
|
-
#return output_step, output_energy, output_best_energy, self.best_energy, self.best_struct
|
950
|
+
return {'elapsed_time': elapsed_time, 'Iteration': iteration} # TODO make this reachavble
|
951
|
+
# return output_step, output_energy, output_best_energy, self.best_energy, self.best_struct
|
956
952
|
|
957
953
|
def _get_neighbour(self, current, mutations=None):
|
958
954
|
neighbour = copy.deepcopy(current)
|
@@ -963,7 +959,6 @@ class SimulatedAnnealing(object):
|
|
963
959
|
|
964
960
|
# number of paramaters in the model #TODO get the last value if 2
|
965
961
|
|
966
|
-
|
967
962
|
num_of_changeablePARMs = 0
|
968
963
|
|
969
964
|
self._obj_fun.nbr_routine(current)
|
@@ -1242,7 +1237,8 @@ class HarmonySearch(object):
|
|
1242
1237
|
Initialize HS with the specified objective function. Note that this objective function must implement ObjectiveFunctionInterface.
|
1243
1238
|
"""
|
1244
1239
|
self._obj_fun = objective_function
|
1245
|
-
|
1240
|
+
# for printing basics metrics
|
1241
|
+
self.print_verbose = True
|
1246
1242
|
# harmony_memory stores the best hms harmonies
|
1247
1243
|
self._harmony_memory = list()
|
1248
1244
|
# harmony_history stores all hms harmonies every nth improvisations (i.e., one 'generation')
|
@@ -1294,7 +1290,7 @@ class HarmonySearch(object):
|
|
1294
1290
|
def does_it_appear(self, new):
|
1295
1291
|
for d in self._harmony_memory:
|
1296
1292
|
if self.mixed_list_chescker(d['layout'], new):
|
1297
|
-
#print('same sln appears in population')
|
1293
|
+
# print('same sln appears in population')
|
1298
1294
|
return True
|
1299
1295
|
|
1300
1296
|
return False
|
@@ -1314,6 +1310,7 @@ class HarmonySearch(object):
|
|
1314
1310
|
self._obj_fun.set_random_seed()
|
1315
1311
|
# fill harmony_memory using random parameter values by default, but with initial_harmonies if provided
|
1316
1312
|
self._initialize(initial_harmonies, mod_init)
|
1313
|
+
if self.print_verbose: print('Initialization complete')
|
1317
1314
|
if self.pf.get_objective_is_multi():
|
1318
1315
|
self._pareto_harmony_memory = self.pf.non_dominant_sorting(self._harmony_memory)
|
1319
1316
|
generation_best = self._pareto_harmony_memory[0]
|
@@ -1333,6 +1330,9 @@ class HarmonySearch(object):
|
|
1333
1330
|
iterations_without_improvement < self._obj_fun.get_termination_iter()):
|
1334
1331
|
# generate new harmony
|
1335
1332
|
elapsed_time = (datetime.now() - start_time).total_seconds()
|
1333
|
+
if self.print_verbose:
|
1334
|
+
print('Time: ', elapsed_time)
|
1335
|
+
print('Improvisation: ', num_imp)
|
1336
1336
|
harmony = list()
|
1337
1337
|
|
1338
1338
|
for i in range(0, self._obj_fun.get_num_parameters()):
|
@@ -1374,6 +1374,7 @@ class HarmonySearch(object):
|
|
1374
1374
|
self.pf.get_objective_is_multi())
|
1375
1375
|
num_imp += 1
|
1376
1376
|
if iterations_without_improvement == 0: # if there is any kind of improvement updae the logs
|
1377
|
+
if self.print_verbose: print('improvement found at improvisation', num_imp)
|
1377
1378
|
if self.pf.get_objective_is_multi():
|
1378
1379
|
try:
|
1379
1380
|
logger(num_imp, fitness, self._harmony_memory, True, self.get_instance_name(),
|