metacountregressor 0.1.69__py3-none-any.whl → 0.1.78__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/data_split_helper.py +90 -0
- metacountregressor/helperprocess.py +115 -0
- metacountregressor/main.py +42 -69
- metacountregressor/metaheuristics.py +27 -26
- metacountregressor/solution.py +189 -628
- {metacountregressor-0.1.69.dist-info → metacountregressor-0.1.78.dist-info}/METADATA +1 -1
- {metacountregressor-0.1.69.dist-info → metacountregressor-0.1.78.dist-info}/RECORD +10 -9
- {metacountregressor-0.1.69.dist-info → metacountregressor-0.1.78.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.69.dist-info → metacountregressor-0.1.78.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.69.dist-info → metacountregressor-0.1.78.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
|
5
|
+
|
6
|
+
|
7
|
+
class DataProcessor:
|
8
|
+
def __init__(self, x_data, y_data, kwargs):
|
9
|
+
self._obj_1 = kwargs.get('_obj_1')
|
10
|
+
self._obj_2 = kwargs.get('_obj_2')
|
11
|
+
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
12
|
+
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
13
|
+
self.is_multi = self.test_percentage != 0
|
14
|
+
self._x_data = x_data
|
15
|
+
self._y_data = y_data
|
16
|
+
self._process_data(kwargs)
|
17
|
+
|
18
|
+
def _process_data(self, kwargs):
|
19
|
+
if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
|
20
|
+
self._handle_special_conditions(kwargs)
|
21
|
+
else:
|
22
|
+
self._standard_data_partition()
|
23
|
+
|
24
|
+
self._characteristics_names = list(self._x_data.columns)
|
25
|
+
self._max_group_all_means = 1
|
26
|
+
self._exclude_this_test = [4]
|
27
|
+
|
28
|
+
def _handle_special_conditions(self, kwargs):
|
29
|
+
if 'panels' in kwargs:
|
30
|
+
self._process_panels_data(kwargs)
|
31
|
+
else:
|
32
|
+
self._standard_data_partition()
|
33
|
+
|
34
|
+
def _process_panels_data(self, kwargs):
|
35
|
+
group_key = kwargs['group']
|
36
|
+
panels_key = kwargs['panels']
|
37
|
+
|
38
|
+
# Process groups and panels
|
39
|
+
self._x_data[group_key] = self._x_data[group_key].astype('category').cat.codes
|
40
|
+
try:
|
41
|
+
self._x_data[panels_key] = self._x_data[panels_key].rank(method='dense').astype(int)
|
42
|
+
self._x_data[panels_key] -= self._x_data[panels_key].min() - 1
|
43
|
+
except KeyError:
|
44
|
+
pass
|
45
|
+
|
46
|
+
# Create training and test datasets
|
47
|
+
unique_ids = np.unique(self._x_data[panels_key])
|
48
|
+
training_size = int((1 - self.test_percentage - self.val_percentage) * len(unique_ids))
|
49
|
+
training_ids = np.random.choice(unique_ids, training_size, replace=False)
|
50
|
+
|
51
|
+
train_idx = self._x_data.index[self._x_data[panels_key].isin(training_ids)]
|
52
|
+
test_idx = self._x_data.index[~self._x_data[panels_key].isin(training_ids)]
|
53
|
+
|
54
|
+
self._create_datasets(train_idx, test_idx)
|
55
|
+
|
56
|
+
def _standard_data_partition(self):
|
57
|
+
total_samples = len(self._x_data)
|
58
|
+
training_size = int((1 - self.test_percentage - self.val_percentage) * total_samples)
|
59
|
+
training_indices = np.random.choice(total_samples, training_size, replace=False)
|
60
|
+
|
61
|
+
train_idx = np.array([i for i in range(total_samples) if i in training_indices])
|
62
|
+
test_idx = np.array([i for i in range(total_samples) if i not in training_indices])
|
63
|
+
|
64
|
+
self._create_datasets(train_idx, test_idx)
|
65
|
+
|
66
|
+
def _create_datasets(self, train_idx, test_idx):
|
67
|
+
self.df_train = self._x_data.loc[train_idx, :]
|
68
|
+
self.df_test = self._x_data.loc[test_idx, :]
|
69
|
+
self.y_train = self._y_data.loc[train_idx, :]
|
70
|
+
self.y_test = self._y_data.loc[test_idx, :]
|
71
|
+
|
72
|
+
self._x_data_test = self.df_test.copy()
|
73
|
+
self._y_data_test = self.y_test.astype('float').copy()
|
74
|
+
self._x_data = self.df_train.copy()
|
75
|
+
self._y_data = self.y_train.astype('float').copy()
|
76
|
+
|
77
|
+
# Handle different shapes
|
78
|
+
if self._x_data.ndim == 2: # Typical DataFrame
|
79
|
+
self._samples, self._characteristics = self._x_data.shape
|
80
|
+
self._panels = None
|
81
|
+
elif self._x_data.ndim == 3: # 3D structure, e.g., Panel or similar
|
82
|
+
self._samples, self._panels, self._characteristics = self._x_data.shape
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
@@ -5,6 +5,121 @@ import matplotlib.pyplot as plt
|
|
5
5
|
|
6
6
|
plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
|
7
7
|
|
8
|
+
##Select the best Features Based on RF
|
9
|
+
def select_features(X_train, y_train, n_f=16):
|
10
|
+
try:
|
11
|
+
from sklearn.feature_selection import SelectKBest
|
12
|
+
from sklearn.feature_selection import f_regression
|
13
|
+
feature_names = X_train.columns
|
14
|
+
# configure to select all features
|
15
|
+
fs = SelectKBest(score_func=f_regression, k=16)
|
16
|
+
|
17
|
+
# learn relationship from training data
|
18
|
+
fs.fit(X_train, y_train)
|
19
|
+
|
20
|
+
mask = fs.get_support() # Boolean array of selected features
|
21
|
+
selected_features = [feature for bool, feature in zip(mask, feature_names) if bool]
|
22
|
+
X_train = X_train[selected_features]
|
23
|
+
except:
|
24
|
+
print('import error, not performing feature selection')
|
25
|
+
fs = X_train.columns #TODO check if this is actually getting the names
|
26
|
+
|
27
|
+
return X_train, fs
|
28
|
+
|
29
|
+
|
30
|
+
#Cutts off correlated data
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
|
36
|
+
def findCorrelation(corr, cutoff=0.9, exact=None): """
|
37
|
+
This function is the Python implementation of the R function
|
38
|
+
`findCorrelation()`.
|
39
|
+
|
40
|
+
Relies on numpy and pandas, so must have them pre-installed.
|
41
|
+
|
42
|
+
It searches through a correlation matrix and returns a list of column names
|
43
|
+
to remove to reduce pairwise correlations.
|
44
|
+
|
45
|
+
For the documentation of the R function, see
|
46
|
+
https://www.rdocumentation.org/packages/caret/topics/findCorrelation
|
47
|
+
and for the source code of `findCorrelation()`, see
|
48
|
+
https://github.com/topepo/caret/blob/master/pkg/caret/R/findCorrelation.R
|
49
|
+
|
50
|
+
-----------------------------------------------------------------------------
|
51
|
+
|
52
|
+
Parameters:
|
53
|
+
-----------
|
54
|
+
corr: pandas dataframe.
|
55
|
+
A correlation matrix as a pandas dataframe.
|
56
|
+
cutoff: float, default: 0.9.
|
57
|
+
A numeric value for the pairwise absolute correlation cutoff
|
58
|
+
exact: bool, default: None
|
59
|
+
A boolean value that determines whether the average correlations be
|
60
|
+
recomputed at each step
|
61
|
+
-----------------------------------------------------------------------------
|
62
|
+
Returns:
|
63
|
+
--------
|
64
|
+
list of column names
|
65
|
+
-----------------------------------------------------------------------------
|
66
|
+
Example:
|
67
|
+
--------
|
68
|
+
R1 = pd.DataFrame({
|
69
|
+
'x1': [1.0, 0.86, 0.56, 0.32, 0.85],
|
70
|
+
'x2': [0.86, 1.0, 0.01, 0.74, 0.32],
|
71
|
+
'x3': [0.56, 0.01, 1.0, 0.65, 0.91],
|
72
|
+
'x4': [0.32, 0.74, 0.65, 1.0, 0.36],
|
73
|
+
'x5': [0.85, 0.32, 0.91, 0.36, 1.0]
|
74
|
+
}, index=['x1', 'x2', 'x3', 'x4', 'x5'])
|
75
|
+
|
76
|
+
findCorrelation(R1, cutoff=0.6, exact=False) # ['x4', 'x5', 'x1', 'x3']
|
77
|
+
findCorrelation(R1, cutoff=0.6, exact=True) # ['x1', 'x5', 'x4']
|
78
|
+
"""
|
79
|
+
|
80
|
+
def _findCorrelation_fast(corr, avg, cutoff):
|
81
|
+
|
82
|
+
combsAboveCutoff = corr.where(lambda x: (np.tril(x) == 0) & (x > cutoff)).stack().index
|
83
|
+
|
84
|
+
rowsToCheck = combsAboveCutoff.get_level_values(0)
|
85
|
+
colsToCheck = combsAboveCutoff.get_level_values(1)
|
86
|
+
|
87
|
+
msk = avg[colsToCheck] > avg[rowsToCheck].values
|
88
|
+
deletecol = pd.unique(np.r_[colsToCheck[msk], rowsToCheck[~msk]]).tolist()
|
89
|
+
|
90
|
+
return deletecol
|
91
|
+
|
92
|
+
def _findCorrelation_exact(corr, avg, cutoff):
|
93
|
+
|
94
|
+
x = corr.loc[(*[avg.sort_values(ascending=False).index] * 2,)]
|
95
|
+
|
96
|
+
if (x.dtypes.values[:, None] == ['int64', 'int32', 'int16', 'int8']).any():
|
97
|
+
x = x.astype(float)
|
98
|
+
|
99
|
+
x.values[(*[np.arange(len(x))] * 2,)] = np.nan
|
100
|
+
|
101
|
+
deletecol = []
|
102
|
+
for ix, i in enumerate(x.columns[:-1]):
|
103
|
+
for j in x.columns[ix + 1:]:
|
104
|
+
if x.loc[i, j] > cutoff:
|
105
|
+
if x[i].mean() > x[j].mean():
|
106
|
+
deletecol.append(i)
|
107
|
+
x.loc[i] = x[i] = np.nan
|
108
|
+
else:
|
109
|
+
deletecol.append(j)
|
110
|
+
x.loc[j] = x[j] = np.nan
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
"""Funtion to Convert Data to Binaries """
|
116
|
+
def clean_data_types(df):
|
117
|
+
for col in df.columns:
|
118
|
+
if df[col].dtype == 'object':
|
119
|
+
# Attempt to convert the column to numeric type
|
120
|
+
df[col] = pd.to_numeric(df[col], errors='coerce')
|
121
|
+
return df
|
122
|
+
|
8
123
|
|
9
124
|
def drop_correlations(x_df, percentage=0.85):
|
10
125
|
cor_matrix = x_df.corr().abs()
|
metacountregressor/main.py
CHANGED
@@ -9,14 +9,12 @@ import numpy as np
|
|
9
9
|
import pandas as pd
|
10
10
|
from pandas import DataFrame
|
11
11
|
from pandas.io.parsers import TextFileReader
|
12
|
-
|
13
12
|
import helperprocess
|
14
13
|
from metaheuristics import (differential_evolution,
|
15
14
|
harmony_search,
|
16
15
|
simulated_annealing)
|
17
16
|
from solution import ObjectiveFunction
|
18
17
|
|
19
|
-
from test_motor import *
|
20
18
|
|
21
19
|
warnings.simplefilter("ignore")
|
22
20
|
|
@@ -37,7 +35,7 @@ def main(args, **kwargs):
|
|
37
35
|
# removing junk files if specicified
|
38
36
|
helperprocess.remove_files(args.get('removeFiles', True))
|
39
37
|
|
40
|
-
# do we want
|
38
|
+
# do we want to run a test
|
41
39
|
if args.get('com', False) == 'MetaCode':
|
42
40
|
print('Testing the Python Package') # TODO add in python package import
|
43
41
|
# Read data from CSV file
|
@@ -64,6 +62,7 @@ def main(args, **kwargs):
|
|
64
62
|
print('the dataset is', dataset)
|
65
63
|
manual_fit_spec = args.get('Manual_Fit', None)
|
66
64
|
if dataset == 1:
|
65
|
+
print('Stage 5 A Short.')
|
67
66
|
df = pd.read_csv('./data/1848.csv') # read in the data
|
68
67
|
y_df = df[['FSI']] # only consider crashes
|
69
68
|
y_df.rename(columns={"FSI": "Y"}, inplace=True)
|
@@ -71,6 +70,7 @@ def main(args, **kwargs):
|
|
71
70
|
x_df = helperprocess.as_wide_factor(x_df)
|
72
71
|
|
73
72
|
elif dataset == 3:
|
73
|
+
print('Stage 5 A Data Complete.')
|
74
74
|
x_df = pd.read_csv('./data/Stage5A_1848_All_Initial_Columns.csv') # drop the ID columns
|
75
75
|
drop_these = ['Id', 'ID', 'old', 'G_N']
|
76
76
|
for i in drop_these:
|
@@ -159,7 +159,28 @@ def main(args, **kwargs):
|
|
159
159
|
'transformations': ['no', 'no', 'no', 'no'],
|
160
160
|
'dispersion': 0
|
161
161
|
}
|
162
|
-
|
162
|
+
elif dataset == 8:
|
163
|
+
print('Main County')
|
164
|
+
df = pd.read_csv('./data/rural_int.csv') # read in the data
|
165
|
+
y_df = df[['crashes']].copy() # only consider crashes
|
166
|
+
y_df.rename(columns={"crashes": "Y"}, inplace=True)
|
167
|
+
panels = df['orig_ID']
|
168
|
+
try:
|
169
|
+
x_df = df.drop(columns=['crashes', 'year', 'orig_ID',
|
170
|
+
'jurisdiction', 'town', 'maint_region', 'weather_station', 'dummy_winter_2']) # was dropped postcode
|
171
|
+
print('dropping for test')
|
172
|
+
x_df = x_df.drop(columns=['month', 'inj.fat', 'PDO'])
|
173
|
+
x_df = x_df.drop(columns = [ 'zonal_ID', 'ln_AADT', 'ln_seg'])
|
174
|
+
x_df['rumble_install_year'] = x_df['rumble_install_year'].astype('category').cat.codes
|
175
|
+
x_df.rename(columns={"rumble_install_year": "has_rumble"}, inplace=True)
|
176
|
+
|
177
|
+
except:
|
178
|
+
x_df = df.drop(columns=['Y']) # was dropped postcode
|
179
|
+
|
180
|
+
group_grab = x_df['county']
|
181
|
+
x_df = x_df.drop(columns =['county'])
|
182
|
+
x_df = helperprocess.interactions(x_df, drop_this_perc=0.8)
|
183
|
+
x_df['county'] = group_grab
|
163
184
|
|
164
185
|
elif dataset == 9:
|
165
186
|
df = pd.read_csv('panel_synth.csv') # read in the data
|
@@ -186,64 +207,7 @@ def main(args, **kwargs):
|
|
186
207
|
|
187
208
|
x_df = helperprocess.interactions(x_df, keep)
|
188
209
|
else: # the dataset has been selected in the program as something else
|
189
|
-
|
190
|
-
from tkinter.filedialog import askopenfilename
|
191
|
-
|
192
|
-
ASK_ANALALYST = 0
|
193
|
-
if ASK_ANALALYST:
|
194
|
-
root = Tk()
|
195
|
-
root.withdraw()
|
196
|
-
# Prompt the user to select a directory
|
197
|
-
directory = askopenfilename(title="Select File For Analysis")
|
198
|
-
skip_lines = int(input("Select the number of lines to skip, (numeric): "))
|
199
|
-
df = pd.read_csv(directory, skip_rows=skip_lines)
|
200
|
-
else:
|
201
|
-
df = pd.read_csv('data/rqc40516_MotorcycleQUT_engineer_crash.csv', skiprows=5)
|
202
|
-
df['CRASH_SPEED_LIMIT'] = df['CRASH_SPEED_LIMIT'].str.replace(' km/h', '').astype(int)
|
203
|
-
|
204
|
-
# Clean data types
|
205
|
-
df = clean_data_types(df)
|
206
|
-
|
207
|
-
# Encode categorical variables
|
208
|
-
categories = ['CRASH_SEVERITY', 'CRASH_TYPE', 'CRASH_NATURE', 'CRASH_ATMOSPHERIC_CONDITION']
|
209
|
-
df = pd.get_dummies(df, columns=categories)
|
210
|
-
|
211
|
-
# Select only numeric columns
|
212
|
-
numeric_types = ['int32', 'uint8', 'bool', 'int64', 'float64']
|
213
|
-
df = df.select_dtypes(include=numeric_types)
|
214
|
-
|
215
|
-
# Check for missing values and fill with column mean
|
216
|
-
missing_values_count = df['CASUALTY_TOTAL'].isnull().sum()
|
217
|
-
df.fillna(df.mean())
|
218
|
-
|
219
|
-
# Remove unnecessary columns
|
220
|
-
df.drop(columns=['CRASH_REF_NUMBER'], inplace=True)
|
221
|
-
y = df['CASUALTY_TOTAL']
|
222
|
-
# Define columns to exclude from the analysis
|
223
|
-
EXCLUDE = [
|
224
|
-
'LONGITUDE', 'YEAR', 'DCA', 'ID', 'LATIT', 'NAME', 'SEVERITY',
|
225
|
-
"CASUALTY", "CRASH_FIN_YEAR", "CRASH_HOUR", "MOPED"
|
226
|
-
]
|
227
|
-
|
228
|
-
# Filter out excluded columns
|
229
|
-
df = df[[col for col in df.columns if not any(ex in col for ex in EXCLUDE)]]
|
230
|
-
|
231
|
-
# Prepare target variable
|
232
|
-
|
233
|
-
# Check for finite values and compute correlations
|
234
|
-
finite_check = df.apply(np.isfinite).all()
|
235
|
-
df_clean = df.loc[:, finite_check]
|
236
|
-
corr = df_clean.corr()
|
237
|
-
|
238
|
-
# Identify and remove highly correlated features
|
239
|
-
hc = findCorrelation(corr, cutoff=0.5)
|
240
|
-
trimmed_df = df_clean.drop(columns=hc)
|
241
|
-
|
242
|
-
# Feature selection
|
243
|
-
df_cleaner, fs = select_features(trimmed_df, y)
|
244
|
-
x_df = df_cleaner
|
245
|
-
y_df = y.to_frame(name="Y")
|
246
|
-
# y_df.rename(columns={"CASUALTY_TOTAL": "Y"}, inplace=True)
|
210
|
+
print('TODO add in dataset')
|
247
211
|
|
248
212
|
if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
|
249
213
|
if manual_fit_spec is None:
|
@@ -253,6 +217,7 @@ def main(args, **kwargs):
|
|
253
217
|
args['Manual_Fit'] = manual_fit_spec
|
254
218
|
|
255
219
|
if args['problem_number'] == str(8) or args['problem_number'] == 8:
|
220
|
+
print('Maine County Dataset.')
|
256
221
|
args['group'] = 'county'
|
257
222
|
args['panels'] = 'element_ID'
|
258
223
|
args['ID'] = 'element_ID'
|
@@ -264,9 +229,9 @@ def main(args, **kwargs):
|
|
264
229
|
|
265
230
|
args['complexity_level'] = args.get('complexity_level', 6)
|
266
231
|
|
267
|
-
|
268
|
-
AnalystSpecs
|
269
|
-
args['AnalystSpecs'] = AnalystSpecs
|
232
|
+
|
233
|
+
# Initialize AnalystSpecs to None if not manually provided
|
234
|
+
args['AnalystSpecs'] = args.get('AnalystSpecs', None)
|
270
235
|
|
271
236
|
if args['algorithm'] == 'sa':
|
272
237
|
args_hyperparameters = {'alpha': float(args['temp_scale']),
|
@@ -312,7 +277,7 @@ def main(args, **kwargs):
|
|
312
277
|
|
313
278
|
|
314
279
|
elif args['algorithm'] == 'de':
|
315
|
-
# force
|
280
|
+
# force variables
|
316
281
|
args['must_include'] = args.get('force', [])
|
317
282
|
|
318
283
|
args_hyperparameters = {'_AI': args.get('_AI', 2),
|
@@ -321,7 +286,6 @@ def main(args, **kwargs):
|
|
321
286
|
, '_pop_size': int(args['_hms']), 'instance_number': int(args['line'])
|
322
287
|
, 'Manual_Fit': args['Manual_Fit'],
|
323
288
|
'MP': int(args['MP'])
|
324
|
-
|
325
289
|
}
|
326
290
|
|
327
291
|
args_hyperparameters = dict(args_hyperparameters)
|
@@ -347,7 +311,7 @@ if __name__ == '__main__':
|
|
347
311
|
alg_parser.print_help()
|
348
312
|
parser = argparse.ArgumentParser(prog='main',
|
349
313
|
epilog=main.__doc__,
|
350
|
-
formatter_class=argparse.RawDescriptionHelpFormatter)
|
314
|
+
formatter_class=argparse.RawDescriptionHelpFormatter, conflict_handler='resolve')
|
351
315
|
|
352
316
|
parser.add_argument('-line', type=int, default=44,
|
353
317
|
help='line to read in csv to pass in argument')
|
@@ -362,6 +326,7 @@ if __name__ == '__main__':
|
|
362
326
|
break
|
363
327
|
line_number_obs += 1
|
364
328
|
args = dict(args)
|
329
|
+
|
365
330
|
for key, value in args.items():
|
366
331
|
try:
|
367
332
|
# Attempt to parse the string value to a Python literal if value is a string.
|
@@ -378,6 +343,13 @@ if __name__ == '__main__':
|
|
378
343
|
if "-algorithm" in action.option_strings:
|
379
344
|
parser._optionals._actions[i].help = "optimization algorithm"
|
380
345
|
|
346
|
+
override = True
|
347
|
+
if override:
|
348
|
+
print('todo turn off, in testing phase')
|
349
|
+
parser.add_argument('-problem_number', default='8')
|
350
|
+
print('did it make it')
|
351
|
+
|
352
|
+
|
381
353
|
if 'algorithm' not in args:
|
382
354
|
parser.add_argument('-algorithm', type=str, default='hs',
|
383
355
|
help='optimization algorithm')
|
@@ -390,7 +362,7 @@ if __name__ == '__main__':
|
|
390
362
|
' we want to split the data for processing')
|
391
363
|
parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
|
392
364
|
|
393
|
-
else: # DIDN"T SPECIFY LINES TRY EACH ONE
|
365
|
+
else: # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
|
394
366
|
parser.add_argument('-com', type=str, default='MetaCode',
|
395
367
|
help='line to read csv')
|
396
368
|
|
@@ -404,5 +376,6 @@ if __name__ == '__main__':
|
|
404
376
|
profiler = cProfile.Profile()
|
405
377
|
profiler.runcall(main,args)
|
406
378
|
profiler.print_stats(sort='time')
|
379
|
+
#TOO MAX_TIME
|
407
380
|
|
408
381
|
|
@@ -15,8 +15,14 @@ from datetime import datetime
|
|
15
15
|
import numpy as np
|
16
16
|
import pandas as pd
|
17
17
|
|
18
|
-
|
19
|
-
from .
|
18
|
+
try:
|
19
|
+
from .pareto_file import Pareto, Solution
|
20
|
+
from .solution import ObjectiveFunction
|
21
|
+
except:
|
22
|
+
print('Exception relative import')
|
23
|
+
from metacountregressor.pareto_file import Pareto, Solution
|
24
|
+
from metacountregressor.solution import ObjectiveFunction
|
25
|
+
|
20
26
|
|
21
27
|
HarmonySearchResults = namedtuple('HarmonySearchResults',
|
22
28
|
['elapsed_time', 'best_harmony', 'best_fitness', 'harmony_memories',
|
@@ -32,7 +38,7 @@ DifferentialEvolutionMulti = namedtuple('DifferentialEvolutionMulti',
|
|
32
38
|
['elapsed_time', 'best_solutions', 'population_solutions'])
|
33
39
|
|
34
40
|
|
35
|
-
#helper function to plot the bic
|
41
|
+
# helper function to plot the bic
|
36
42
|
def _plot(x, y, z, xlabel=None, ylabel=None, zlabel=None, filename=None):
|
37
43
|
from matplotlib import pyplot as plt
|
38
44
|
|
@@ -54,7 +60,8 @@ def _plot(x, y, z, xlabel=None, ylabel=None, zlabel=None, filename=None):
|
|
54
60
|
plt.savefig('bic.png')
|
55
61
|
plt.show()
|
56
62
|
|
57
|
-
|
63
|
+
|
64
|
+
# helper function to grab dictionary means
|
58
65
|
def dict_mean(dict_list,
|
59
66
|
ignore=None):
|
60
67
|
if ignore is None:
|
@@ -75,10 +82,10 @@ def dict_mean(dict_list,
|
|
75
82
|
|
76
83
|
def pareto_logger(pareto_set, iteration, complexity, folder=None):
|
77
84
|
if folder is not None:
|
78
|
-
name = folder + '/
|
85
|
+
name = folder + '/pareto_logger_complex' + str(complexity) + ".csv"
|
79
86
|
|
80
87
|
else:
|
81
|
-
name = '
|
88
|
+
name = 'pareto_logger_complex' + str(complexity) + ".csv"
|
82
89
|
|
83
90
|
st22 = pd.DataFrame(pareto_set)
|
84
91
|
st22.to_csv(name, mode='a', index=False, header=True)
|
@@ -204,8 +211,7 @@ def different_evolution(objective_function, initial_slns=None, **kwargs):
|
|
204
211
|
|
205
212
|
|
206
213
|
def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
207
|
-
|
208
|
-
raise Exception
|
214
|
+
|
209
215
|
start = datetime.now()
|
210
216
|
|
211
217
|
man = None
|
@@ -220,11 +226,8 @@ def differential_evolution(objective_function, initial_slns=None, **kwargs):
|
|
220
226
|
de = Mutlithreaded_Meta(objective_function, **kwargs)
|
221
227
|
best, pare = de.run_mp(initial_slns=initial_slns, mod_init=man)
|
222
228
|
else:
|
223
|
-
|
224
229
|
print('Not Multi Threaded')
|
225
|
-
|
226
230
|
de = DifferentialEvolution(objective_function, **kwargs)
|
227
|
-
|
228
231
|
best, pare = de.differential_evolution_run(initial_slns=initial_slns, mod_init=man)
|
229
232
|
|
230
233
|
end = datetime.now()
|
@@ -393,12 +396,10 @@ class DifferentialEvolution(object):
|
|
393
396
|
"""
|
394
397
|
|
395
398
|
def __init__(self, objective_function, **kwargs):
|
396
|
-
|
397
|
-
if not isinstance(objective_function, ObjectiveFunction):
|
398
|
-
raise TypeError
|
399
399
|
self._obj_fun = objective_function
|
400
400
|
if self._obj_fun._obj_1 is None:
|
401
|
-
|
401
|
+
print('no objective found, automatically selecting BIC')
|
402
|
+
self._obj_fun._obj_1 = 'bic'
|
402
403
|
|
403
404
|
self._pop_size = kwargs.get('_pop_size', 20)
|
404
405
|
if not isinstance(self._pop_size, int):
|
@@ -406,7 +407,7 @@ class DifferentialEvolution(object):
|
|
406
407
|
elif self._pop_size <= 3:
|
407
408
|
raise ValueError("_pop_size must be greater than 4")
|
408
409
|
|
409
|
-
self.F = kwargs.get('_AI', 2) #
|
410
|
+
self.F = kwargs.get('_AI', 2) # mutation scale
|
410
411
|
self.iter = kwargs.get('_max_iter', 10000)
|
411
412
|
self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
|
412
413
|
self.instance_number = str(kwargs.get('instance_number', 1))
|
@@ -415,12 +416,9 @@ class DifferentialEvolution(object):
|
|
415
416
|
self._population = list()
|
416
417
|
self.it_process = 1
|
417
418
|
if objective_function.is_multi:
|
418
|
-
|
419
419
|
self.obj_1 = objective_function._obj_1
|
420
420
|
self.obj_2 = objective_function._obj_2
|
421
|
-
|
422
421
|
self.pf = Pareto(self.obj_1, self.obj_2, True)
|
423
|
-
|
424
422
|
self._pareto_population = list()
|
425
423
|
else:
|
426
424
|
self.obj_1 = objective_function._obj_1
|
@@ -555,7 +553,6 @@ class DifferentialEvolution(object):
|
|
555
553
|
average_iteration = 0
|
556
554
|
iterations_without_improvement = 0
|
557
555
|
|
558
|
-
|
559
556
|
start_time = datetime.now()
|
560
557
|
if self._obj_fun.use_random_seed():
|
561
558
|
self._obj_fun.set_random_seed()
|
@@ -949,10 +946,9 @@ class SimulatedAnnealing(object):
|
|
949
946
|
output_step.append(a)
|
950
947
|
output_energy.append(b)
|
951
948
|
output_best_energy.append(c)
|
952
|
-
|
953
949
|
|
954
|
-
return {'elapsed_time': elapsed_time, 'Iteration': iteration} #TODO make this reachavble
|
955
|
-
#return output_step, output_energy, output_best_energy, self.best_energy, self.best_struct
|
950
|
+
return {'elapsed_time': elapsed_time, 'Iteration': iteration} # TODO make this reachavble
|
951
|
+
# return output_step, output_energy, output_best_energy, self.best_energy, self.best_struct
|
956
952
|
|
957
953
|
def _get_neighbour(self, current, mutations=None):
|
958
954
|
neighbour = copy.deepcopy(current)
|
@@ -963,7 +959,6 @@ class SimulatedAnnealing(object):
|
|
963
959
|
|
964
960
|
# number of paramaters in the model #TODO get the last value if 2
|
965
961
|
|
966
|
-
|
967
962
|
num_of_changeablePARMs = 0
|
968
963
|
|
969
964
|
self._obj_fun.nbr_routine(current)
|
@@ -1242,7 +1237,8 @@ class HarmonySearch(object):
|
|
1242
1237
|
Initialize HS with the specified objective function. Note that this objective function must implement ObjectiveFunctionInterface.
|
1243
1238
|
"""
|
1244
1239
|
self._obj_fun = objective_function
|
1245
|
-
|
1240
|
+
# for printing basics metrics
|
1241
|
+
self.print_verbose = True
|
1246
1242
|
# harmony_memory stores the best hms harmonies
|
1247
1243
|
self._harmony_memory = list()
|
1248
1244
|
# harmony_history stores all hms harmonies every nth improvisations (i.e., one 'generation')
|
@@ -1294,7 +1290,7 @@ class HarmonySearch(object):
|
|
1294
1290
|
def does_it_appear(self, new):
|
1295
1291
|
for d in self._harmony_memory:
|
1296
1292
|
if self.mixed_list_chescker(d['layout'], new):
|
1297
|
-
#print('same sln appears in population')
|
1293
|
+
# print('same sln appears in population')
|
1298
1294
|
return True
|
1299
1295
|
|
1300
1296
|
return False
|
@@ -1314,6 +1310,7 @@ class HarmonySearch(object):
|
|
1314
1310
|
self._obj_fun.set_random_seed()
|
1315
1311
|
# fill harmony_memory using random parameter values by default, but with initial_harmonies if provided
|
1316
1312
|
self._initialize(initial_harmonies, mod_init)
|
1313
|
+
if self.print_verbose: print('Initialization complete')
|
1317
1314
|
if self.pf.get_objective_is_multi():
|
1318
1315
|
self._pareto_harmony_memory = self.pf.non_dominant_sorting(self._harmony_memory)
|
1319
1316
|
generation_best = self._pareto_harmony_memory[0]
|
@@ -1333,6 +1330,9 @@ class HarmonySearch(object):
|
|
1333
1330
|
iterations_without_improvement < self._obj_fun.get_termination_iter()):
|
1334
1331
|
# generate new harmony
|
1335
1332
|
elapsed_time = (datetime.now() - start_time).total_seconds()
|
1333
|
+
if self.print_verbose:
|
1334
|
+
print('Time: ', elapsed_time)
|
1335
|
+
print('Improvisation: ', num_imp)
|
1336
1336
|
harmony = list()
|
1337
1337
|
|
1338
1338
|
for i in range(0, self._obj_fun.get_num_parameters()):
|
@@ -1374,6 +1374,7 @@ class HarmonySearch(object):
|
|
1374
1374
|
self.pf.get_objective_is_multi())
|
1375
1375
|
num_imp += 1
|
1376
1376
|
if iterations_without_improvement == 0: # if there is any kind of improvement updae the logs
|
1377
|
+
if self.print_verbose: print('improvement found at improvisation', num_imp)
|
1377
1378
|
if self.pf.get_objective_is_multi():
|
1378
1379
|
try:
|
1379
1380
|
logger(num_imp, fitness, self._harmony_memory, True, self.get_instance_name(),
|