SearchLibrium 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_code/__init__.py +8 -0
- old_code/_choice_model.py +1363 -0
- old_code/_device.py +145 -0
- old_code/akshay_test.py +125 -0
- old_code/boxcox_functions.py +116 -0
- old_code/draws.py +128 -0
- old_code/harmony.py +1261 -0
- old_code/latent_class_constrained.py +434 -0
- old_code/latent_class_mixed_model.py +1566 -0
- old_code/latent_class_model.py +1281 -0
- old_code/latent_main.py +945 -0
- old_code/main.py +1880 -0
- old_code/main_ol.py +127 -0
- old_code/misc.py +303 -0
- old_code/mixed_logit.py +1553 -0
- old_code/multinomial_logit.py +559 -0
- old_code/ordered_logit.py +1641 -0
- old_code/ordered_logit_mixed.py +103 -0
- old_code/ordered_logit_multinomial.py +701 -0
- old_code/r_ordered.py +168 -0
- old_code/rrm.py +521 -0
- old_code/search.py +3485 -0
- old_code/siman.py +1023 -0
- old_code/threshold.py +777 -0
- searchlibrium-0.0.1.dist-info/METADATA +21 -0
- searchlibrium-0.0.1.dist-info/RECORD +28 -0
- searchlibrium-0.0.1.dist-info/WHEEL +5 -0
- searchlibrium-0.0.1.dist-info/top_level.txt +1 -0
old_code/latent_main.py
ADDED
|
@@ -0,0 +1,945 @@
|
|
|
1
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
2
|
+
SOLUTION OF EXAMPLE DISCRETE CHOICE MODELS
|
|
3
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
4
|
+
|
|
5
|
+
# NOTE:
|
|
6
|
+
# varnames: All explanatory variables that have been defined
|
|
7
|
+
# isvars: Individual specific variables These variables do not vary across alternatives
|
|
8
|
+
# asvars: Alternative specific variables These variables vary across alternatives.
|
|
9
|
+
# alts: Alternatives for each choice. E.g., Choice = transport mode, Alternatives = {car, bus, train}
|
|
10
|
+
# base_alts: The base (a.k.a., reference) alternative
|
|
11
|
+
# transvars: Variables that have transformations applied to them
|
|
12
|
+
# randvars: Ramdom variables
|
|
13
|
+
# corvars: Correlated variables
|
|
14
|
+
# bcvars: Box Cox transformed variables
|
|
15
|
+
|
|
16
|
+
''' ---------------------------------------------------------- '''
|
|
17
|
+
''' LIBRARIES '''
|
|
18
|
+
''' ---------------------------------------------------------- '''
|
|
19
|
+
import scipy
|
|
20
|
+
|
|
21
|
+
from harmony import*
|
|
22
|
+
from siman import*
|
|
23
|
+
from threshold import*
|
|
24
|
+
from latent_class_mixed_model import LatentClassMixedModel
|
|
25
|
+
from latent_class_model import LatentClassModel
|
|
26
|
+
from mixed_logit import MixedLogit
|
|
27
|
+
from multinomial_logit import MultinomialLogit
|
|
28
|
+
import pandas as pd
|
|
29
|
+
import argparse
|
|
30
|
+
import os
|
|
31
|
+
import numpy as np
|
|
32
|
+
#import time
|
|
33
|
+
|
|
34
|
+
'''' ---------------------------------------------------------- '''
|
|
35
|
+
''' SCRIPT. MULTINOMIAL '''
|
|
36
|
+
''' ----------------------------------------------------------- '''
|
|
37
|
+
def fit_mnl_example():
|
|
38
|
+
# {
|
|
39
|
+
df = pd.read_csv("Swissmetro_final.csv")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
varnames = ['COST', 'TIME', 'HEADWAY', 'SEATS', 'AGE']
|
|
44
|
+
|
|
45
|
+
isvars = ['AGE']
|
|
46
|
+
mnl = MultinomialLogit()
|
|
47
|
+
mnl.setup(X=df[varnames], y=df['CHOICE'], varnames=varnames, isvars = isvars,
|
|
48
|
+
fit_intercept=True, alts=df['alt'], ids=df['custom_id'],
|
|
49
|
+
avail=df['AV'], base_alt='SM', gtol=1e-04)
|
|
50
|
+
mnl.fit()
|
|
51
|
+
mnl.get_loglik_null()
|
|
52
|
+
mnl.summarise()
|
|
53
|
+
# }
|
|
54
|
+
|
|
55
|
+
'''' ---------------------------------------------------------- '''
|
|
56
|
+
''' SCRIPT. MULTINOMIAL '''
|
|
57
|
+
''' ----------------------------------------------------------- '''
|
|
58
|
+
def fit_mnl_box_example():
|
|
59
|
+
# {
|
|
60
|
+
df = pd.read_csv("artificial_1b_multi_nonlinear.csv")
|
|
61
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
|
|
62
|
+
'added_fixed7', 'added_fixed8', 'added_fixed9', 'added_fixed10', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
63
|
+
|
|
64
|
+
'added_isvar1', 'added_isvar2']
|
|
65
|
+
X = df[varnames].values
|
|
66
|
+
y = df['choice'].values
|
|
67
|
+
isvars = ['added_isvar1', 'added_isvar2']
|
|
68
|
+
transvars = ['added_fixed1', 'added_fixed2']
|
|
69
|
+
|
|
70
|
+
mnl = MultinomialLogit()
|
|
71
|
+
mnl.setup(X, y, ids=df['id'], varnames=varnames, isvars=isvars, transvars=transvars, alts=df['alt'])
|
|
72
|
+
mnl.fit()
|
|
73
|
+
mnl.get_loglik_null()
|
|
74
|
+
mnl.summarise()
|
|
75
|
+
# }
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
''' ----------------------------------------------------------- '''
|
|
79
|
+
''' SCRIPT. MIXED LOGIT '''
|
|
80
|
+
''' ----------------------------------------------------------- '''
|
|
81
|
+
def fit_mxl_example():
|
|
82
|
+
# {
|
|
83
|
+
|
|
84
|
+
df = pd.read_csv("artificial_1h_mixed_corr_trans.csv")
|
|
85
|
+
|
|
86
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3',
|
|
87
|
+
'added_fixed4','added_fixed5', 'added_fixed6',
|
|
88
|
+
'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
89
|
+
'added_random1', 'added_random2', 'added_random3',
|
|
90
|
+
'added_random4', 'added_random5', 'added_random6', 'added_random7']
|
|
91
|
+
|
|
92
|
+
isvars = []
|
|
93
|
+
transvars = [] #['added_random4', 'added_random5']
|
|
94
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
|
|
95
|
+
'added_random4': 'n', 'added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
|
|
96
|
+
|
|
97
|
+
correlated_vars = ['added_random1', 'added_random2', 'added_random3']
|
|
98
|
+
|
|
99
|
+
model = MixedLogit()
|
|
100
|
+
model.setup(X=df[varnames].values, y=df['choice'].values, ids=df['choice_id'].values,
|
|
101
|
+
panels=df['ind_id'].values, varnames=varnames,
|
|
102
|
+
isvars=isvars, transvars=transvars, correlated_vars=correlated_vars,
|
|
103
|
+
randvars=randvars, fit_intercept=False, alts=df['alt'], n_draws=200)
|
|
104
|
+
|
|
105
|
+
model.fit()
|
|
106
|
+
model.summarise()
|
|
107
|
+
# }
|
|
108
|
+
|
|
109
|
+
''' ----------------------------------------------------------- '''
|
|
110
|
+
''' SCRIPT. MIXED LOGIT '''
|
|
111
|
+
''' ----------------------------------------------------------- '''
|
|
112
|
+
def fit_mxl_box_example():
|
|
113
|
+
# {
|
|
114
|
+
df = pd.read_csv("artificial_1h_mixed_corr_trans.csv")
|
|
115
|
+
df['bc_added_random4'] = scipy.stats.boxcox(df['added_random4'], 0.01)
|
|
116
|
+
df['bc_added_random5'] = scipy.stats.boxcox(df['added_random5'], 0.0)
|
|
117
|
+
|
|
118
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
|
|
119
|
+
#'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
120
|
+
'added_random1', 'added_random2', 'added_random3', 'added_random4', 'added_random5', 'added_random6',
|
|
121
|
+
'added_random7']
|
|
122
|
+
|
|
123
|
+
isvars = []
|
|
124
|
+
transvars = ['added_random4', 'added_random5']
|
|
125
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
|
|
126
|
+
'added_random4': 'n', 'added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
|
|
127
|
+
|
|
128
|
+
correlated_vars = ['added_random1', 'added_random2', 'added_random3']
|
|
129
|
+
|
|
130
|
+
mxl = MixedLogit()
|
|
131
|
+
mxl.setup(X=df[varnames].values, y=df['choice'].values, ids=df['choice_id'].values,
|
|
132
|
+
panels=df['ind_id'].values, varnames=varnames,
|
|
133
|
+
isvars=isvars, transvars=transvars, correlated_vars=correlated_vars,
|
|
134
|
+
randvars=randvars, fit_intercept=False, alts=df['alt'],
|
|
135
|
+
n_draws=200)
|
|
136
|
+
|
|
137
|
+
mxl.fit()
|
|
138
|
+
mxl.get_loglik_null()
|
|
139
|
+
mxl.summarise()
|
|
140
|
+
|
|
141
|
+
# }
|
|
142
|
+
|
|
143
|
+
''' ----------------------------------------------------------- '''
|
|
144
|
+
''' SCRIPT. LATENT CLASS '''
|
|
145
|
+
''' ----------------------------------------------------------- '''
|
|
146
|
+
def fit_lc_example():
|
|
147
|
+
# {
|
|
148
|
+
df = pd.read_csv("artificial_latent_new.csv")
|
|
149
|
+
varnames = ['income', 'age', 'price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp','nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
|
|
150
|
+
#'nonsig_isvar1', 'nonsig_isvar2'
|
|
151
|
+
# ]
|
|
152
|
+
X = df[varnames].values
|
|
153
|
+
y = df['choice'].values
|
|
154
|
+
member_params_spec = np.array([['income', 'age']], dtype='object')
|
|
155
|
+
class_params_spec = np.array([['price', 'time', 'conven', 'comfort'],
|
|
156
|
+
['price', 'time', 'meals', 'petfr', 'emipp']], dtype='object') # Two latent classes
|
|
157
|
+
|
|
158
|
+
model = LatentClassModel() # Derived from MultinomialLogit
|
|
159
|
+
model.setup(X, y, varnames=varnames, ids=df['id'], num_classes=2,
|
|
160
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
161
|
+
alts=[1,2,3], ftol_lccm=1e-3, gtol=1e-3)
|
|
162
|
+
|
|
163
|
+
model.fit()
|
|
164
|
+
model.summarise()
|
|
165
|
+
# }
|
|
166
|
+
|
|
167
|
+
''' ----------------------------------------------------------- '''
|
|
168
|
+
''' SCRIPT. LATENT CLASS MIXED '''
|
|
169
|
+
''' ----------------------------------------------------------- '''
|
|
170
|
+
def fit_lcm_example():
|
|
171
|
+
# {
|
|
172
|
+
|
|
173
|
+
df = pd.read_csv("synth_latent_mixed_3classes.csv")
|
|
174
|
+
|
|
175
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2', 'income', 'age']
|
|
176
|
+
X = df[varnames].values
|
|
177
|
+
y = df['choice'].values
|
|
178
|
+
|
|
179
|
+
member_params_spec = np.array([['income', 'age'], ['income', 'age']], dtype='object')
|
|
180
|
+
|
|
181
|
+
# Define three latent classes:
|
|
182
|
+
class_params_spec = np.array([['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2'],
|
|
183
|
+
['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2'],
|
|
184
|
+
['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2']],
|
|
185
|
+
dtype='object')
|
|
186
|
+
|
|
187
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n'}
|
|
188
|
+
init_class_thetas = np.array([0.41381657745904565, -0.19457547164109434, -0.41381657745904565, 0.3891509432821887])
|
|
189
|
+
init_class_thetas = np.array([-1, 5.6, -7.61381657745904565, 10.5])
|
|
190
|
+
init_class_betas = [
|
|
191
|
+
np.array([.181, -.35, 2.411337674531561, 2.1511169162160617, 0.8752373368149019, 0.7313773222836617]),
|
|
192
|
+
np.array([0.23, 0, -0.6268738608685024, -1.3812810694501136, 0.8591208458201691, 1.2928663669444755]),
|
|
193
|
+
np.array([0, .94, 0.8382701667527453, 1.3112939261751486, 1.0298368042405897, 1.0076129422492865])
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
model = LatentClassMixedModel()
|
|
197
|
+
model.setup(X, y, panels=df['ind_id'], n_draws=200, varnames=varnames, num_classes=3,
|
|
198
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
199
|
+
gtol=1e-5, init_class_thetas=init_class_thetas, init_class_betas=init_class_betas,
|
|
200
|
+
randvars=randvars, alts=[1,2,3])
|
|
201
|
+
model.fit()
|
|
202
|
+
model.summarise()
|
|
203
|
+
|
|
204
|
+
# }
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
208
|
+
# META HEURISTIC OPTIMISATION APPROACH
|
|
209
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
210
|
+
|
|
211
|
+
def call_harmony(parameters, init_sol=None):
|
|
212
|
+
# {
|
|
213
|
+
solver = HarmonySearch(parameters, init_sol)
|
|
214
|
+
solver.max_mem = 25
|
|
215
|
+
solver.maxiter = 500
|
|
216
|
+
solver.run()
|
|
217
|
+
# }
|
|
218
|
+
|
|
219
|
+
def call_siman(parameters, init_sol=None, **kwargs):
|
|
220
|
+
# {
|
|
221
|
+
ctrl = kwargs.get('ctrl', (1000, 0.001, 20, 20)) # i.e. (tI, tF, max_temp_steps, max_iter)
|
|
222
|
+
if 'ctrl' in kwargs:
|
|
223
|
+
# Need to delete the 'ctrl' key from kwargs
|
|
224
|
+
# This is because the function has a parameter named 'ctrl'
|
|
225
|
+
# and the 'ctrl' key in kwargs would be a duplicate parameter
|
|
226
|
+
del kwargs['ctrl']
|
|
227
|
+
# ctrl = (1000, 0.001, 20, 20) # i.e. (tI,tF,max_temp_steps,max_iter)
|
|
228
|
+
id_num = kwargs.get('id_num', None)
|
|
229
|
+
solver = SA(parameters, init_sol, ctrl, id_num, **kwargs)
|
|
230
|
+
solver.run()
|
|
231
|
+
solver.close_files()
|
|
232
|
+
return solver.return_best()
|
|
233
|
+
# }
|
|
234
|
+
|
|
235
|
+
def call_parsa(parameters, init_sol=None, nthrds=4, **kwargs):
|
|
236
|
+
# {
|
|
237
|
+
# ctrl = (10, 0.001, 10, 10) # i.e. (tI, tF, max_temp_steps, max_iter)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
ctrl = kwargs.get('ctrl',(10, 0.001, 10, 10))
|
|
241
|
+
|
|
242
|
+
if 'ctrl' in kwargs:
|
|
243
|
+
# Need to delete the 'ctrl' key from kwargs
|
|
244
|
+
# This is because the function has a parameter named 'ctrl'
|
|
245
|
+
# and the 'ctrl' key in kwargs would be a duplicate parameter
|
|
246
|
+
del kwargs['ctrl']
|
|
247
|
+
parsa = PARSA(parameters, init_sol, ctrl, nthrds=nthrds)
|
|
248
|
+
parsa.run()
|
|
249
|
+
# }
|
|
250
|
+
|
|
251
|
+
def call_parcopsa(parameters, init_sol=None, nthrds=8):
|
|
252
|
+
# {
|
|
253
|
+
ctrl = (10, 0.001, 10, 10) # i.e. (tI, tF, max_temp_steps, max_iter)
|
|
254
|
+
parcopsa = PARCOPSA(parameters, init_sol, ctrl, nthrds=nthrds)
|
|
255
|
+
|
|
256
|
+
# Optional. Set a different behaviour for each solver
|
|
257
|
+
#tI = [1, 10, 100, 1000, np.random.randint(1, 10000), np.random.randint(1, 10000),
|
|
258
|
+
#np.random.randint(1, 10000), np.random.randint(1, 10000)]
|
|
259
|
+
#for i in range(8):
|
|
260
|
+
# parcopsa.solvers[i].revise_tI(tI[i])
|
|
261
|
+
|
|
262
|
+
parcopsa.run()
|
|
263
|
+
# }
|
|
264
|
+
|
|
265
|
+
def call_threshold(parameters, init_sol=None, hm=False):
|
|
266
|
+
# {
|
|
267
|
+
ctrl = (10, 20, 20) # i.e., threshold, max_steps, max_iter
|
|
268
|
+
#ctrl = (10, 10, 1) # i.e., threshold, max_steps, max_iter
|
|
269
|
+
solver = TA(parameters, init_sol, ctrl)
|
|
270
|
+
solver.run()
|
|
271
|
+
solver.close_files()
|
|
272
|
+
# }
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
''' ----------------------------------------------------------- '''
|
|
276
|
+
''' SCRIPT '''
|
|
277
|
+
''' ----------------------------------------------------------- '''
|
|
278
|
+
|
|
279
|
+
def optimise_synth_latent():
|
|
280
|
+
# {
|
|
281
|
+
df = pd.read_csv("synth_latent_mixed_3classes.csv")
|
|
282
|
+
df_test = None
|
|
283
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2', 'income', 'age']
|
|
284
|
+
asvarnames = varnames
|
|
285
|
+
isvarnames = []
|
|
286
|
+
|
|
287
|
+
choice_id = df['choice_id']
|
|
288
|
+
ind_id = df['ind_id']
|
|
289
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
290
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
291
|
+
base_alt = None # Reference alternative
|
|
292
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
293
|
+
choice_set = ['1', '2', '3']
|
|
294
|
+
criterions = [['loglik',1]]
|
|
295
|
+
#criterions = [['loglik',1], ['mae',-1]]
|
|
296
|
+
|
|
297
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
298
|
+
|
|
299
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
300
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames, choices=choices,
|
|
301
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=True, allow_random=True, base_alt=base_alt,
|
|
302
|
+
allow_bcvars=False, n_draws=200)
|
|
303
|
+
|
|
304
|
+
init_sol = None
|
|
305
|
+
|
|
306
|
+
call_siman(parameters, init_sol)
|
|
307
|
+
#call_thresold(parameters, init_sol)
|
|
308
|
+
#call_parcopsa(parameters, init_sol)
|
|
309
|
+
|
|
310
|
+
# }
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def optimise_latent_3_phase_search(num_classes = 3, num_of_iterations = 1000, initial_iterations = 200):
|
|
314
|
+
df = pd.read_csv("electricity.csv")
|
|
315
|
+
df_test = None
|
|
316
|
+
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas'] # all explanatory variables to be included in the model
|
|
317
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
318
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
319
|
+
memvarnames = [name for name in varnames if name != ['listofunwantednamesinmember']] #member-specific variables
|
|
320
|
+
choice_id = df['chid']
|
|
321
|
+
ind_id = df['id']
|
|
322
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
323
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
324
|
+
base_alt = None # Reference alternative
|
|
325
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
326
|
+
choice_set = ['1', '2', '3', '4']
|
|
327
|
+
|
|
328
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
329
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
330
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
331
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
#criterions = [['loglik', 1]]
|
|
335
|
+
criterions = [['bic',-1]]
|
|
336
|
+
# criterions = [['aic',-1]]
|
|
337
|
+
|
|
338
|
+
# criterions = [['loglik',1], ['mae',-1]]
|
|
339
|
+
# criterions = [['bic',-1], ['mae',-1]]
|
|
340
|
+
|
|
341
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
342
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
343
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
344
|
+
|
|
345
|
+
latent_class = True # True
|
|
346
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
347
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
348
|
+
mem_vars = memvarnames, choices=choices,
|
|
349
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
350
|
+
base_alt=base_alt,
|
|
351
|
+
allow_bcvars=False, n_draws=200, min_classes = num_classes, max_classes = num_classes, num_classes = num_classes, ps_intercept = True, optimise_class = True)
|
|
352
|
+
|
|
353
|
+
# Setting up for fixed thetas
|
|
354
|
+
parameters_2nd = parameters
|
|
355
|
+
parameters_2nd.fixed_thetas = True
|
|
356
|
+
#adding in asvars
|
|
357
|
+
parameters_2nd.isvarnames = varnames
|
|
358
|
+
parameters_2nd.optimise_class = True #adding as true
|
|
359
|
+
|
|
360
|
+
parameters_3rd = parameters_2nd
|
|
361
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
362
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
363
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
364
|
+
|
|
365
|
+
init_sol = None
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
369
|
+
# RUN THE SEARCH
|
|
370
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
371
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
372
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
373
|
+
#phase 1 optimise membership
|
|
374
|
+
print(f"1st Phase, Optimize Membership")
|
|
375
|
+
sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2),'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True, 'id_num': f'Elec_c{num_classes}_p1'}
|
|
376
|
+
#sa_parms = {'ctrl': (10, 0.001, 200, 10), 'max_classes': 4, 'min_classes': 3}
|
|
377
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
381
|
+
print(f"2nd Phase, Optimize Classes")
|
|
382
|
+
sa_parms = {'ctrl': (10, 0.001, num_of_iterations, 10), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': False, 'optimise_class': True, 'fixed_solution':best_member, 'id_num': f'Elec_c{num_classes}_p2'}
|
|
383
|
+
best_joint = call_siman(parameters_2nd, init_sol, **sa_parms)
|
|
384
|
+
"""Final Fit"""
|
|
385
|
+
print(f"Final Phase")
|
|
386
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True,
|
|
387
|
+
'optimise_class': True, 'id_num': f'Elec_c{num_classes}_p3'}
|
|
388
|
+
''' Injecting the best joint solution to start'''
|
|
389
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
390
|
+
|
|
391
|
+
def optimise_latent_swiss(num_classes = 3, num_of_iterations = 1000, number_of_initials = 200):
|
|
392
|
+
df = pd.read_csv("swissmetro_long_1.csv")
|
|
393
|
+
df_test = None
|
|
394
|
+
varnames = ['TT_SCALED', 'CO_SCALED', 'HE', 'SEATS', ] # all explanatory variables to be included in the model
|
|
395
|
+
memer = ['AGE', 'MALE', 'INCOME', 'GA', 'WHO', 'FIRST', 'LUGGAGE']
|
|
396
|
+
|
|
397
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
398
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
399
|
+
memvarnames = [name for name in varnames if name != ['listofunwantednamesinmember']] #member-specific variables
|
|
400
|
+
choice_id = df['CHID']
|
|
401
|
+
ind_id = df['ID']
|
|
402
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
403
|
+
alt_var = df['ALT'] # the df column name containing the alternative variable
|
|
404
|
+
base_alt = None # Reference alternative
|
|
405
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
406
|
+
choice_set = ['CAR', 'SM', 'TRAIN'] # 1 2 3 redcode if broken
|
|
407
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
408
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
409
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
410
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
#criterions = [['loglik', 1]]
|
|
414
|
+
criterions = [['bic',-1]]
|
|
415
|
+
# criterions = [['aic',-1]]
|
|
416
|
+
|
|
417
|
+
# criterions = [['loglik',1], ['mae',-1]]
|
|
418
|
+
# criterions = [['bic',-1], ['mae',-1]]
|
|
419
|
+
|
|
420
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
421
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
422
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
423
|
+
|
|
424
|
+
latent_class = True # True
|
|
425
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
426
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
427
|
+
mem_vars = memvarnames, choices=choices,
|
|
428
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
429
|
+
base_alt=base_alt,
|
|
430
|
+
allow_bcvars=False, n_draws=200, min_classes = num_classes, max_classes = num_classes, num_classes = num_classes, ps_intercept = True, optimise_class = True)
|
|
431
|
+
|
|
432
|
+
# Setting up for fixed thetas
|
|
433
|
+
parameters_2nd = parameters
|
|
434
|
+
parameters_2nd.fixed_thetas = True
|
|
435
|
+
#adding in asvars
|
|
436
|
+
parameters_2nd.isvarnames = varnames
|
|
437
|
+
parameters_2nd.optimise_class = True #adding as true
|
|
438
|
+
|
|
439
|
+
parameters_3rd = parameters_2nd
|
|
440
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
441
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
442
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
443
|
+
|
|
444
|
+
init_sol = None
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
448
|
+
# RUN THE SEARCH
|
|
449
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
450
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
451
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
452
|
+
#phase 1 optimise membership
|
|
453
|
+
print(f"1st Phase, Optimize Membership")
|
|
454
|
+
sa_parms = {'ctrl': (10, 0.001, number_of_initials, 10),'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True, 'id_num': f'Swiss_c{num_classes}_p1'}
|
|
455
|
+
#sa_parms = {'ctrl': (10, 0.001, 200, 10), 'max_classes': 4, 'min_classes': 3}
|
|
456
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
460
|
+
print(f"2nd Phase, Optimize Classes")
|
|
461
|
+
sa_parms = {'ctrl': (10, 0.001, num_of_iterations, 10), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': False, 'optimise_class': True, 'fixed_solution':best_member, 'id_num': f'Swiss_c{num_classes}_p2'}
|
|
462
|
+
best_joint = call_siman(parameters_2nd, init_sol, **sa_parms)
|
|
463
|
+
"""Final Fit"""
|
|
464
|
+
print(f"Final Phase")
|
|
465
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True,
|
|
466
|
+
'optimise_class': True, 'id_num': f'Swiss_c{num_classes}_p3'}
|
|
467
|
+
''' Injecting the best joint solution to start'''
|
|
468
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
''' ----------------------------------------------------------- '''
|
|
474
|
+
''' SCRIPT '''
|
|
475
|
+
''' ----------------------------------------------------------- '''
|
|
476
|
+
'TEST FOR FITTING LATENT CLASS MODEL'
|
|
477
|
+
def latent_synth_4():
|
|
478
|
+
print('testing intercept model')
|
|
479
|
+
df = pd.read_csv("artificial_latent_new_4classes_mnl.csv")
|
|
480
|
+
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp', 'income', 'age']
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
print('done')
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
print('testing synthetic experiment for the laten class, 4 class ')
|
|
487
|
+
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp', 'income', 'age','ones'
|
|
488
|
+
# 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
489
|
+
# 'nonsig_isvar1', 'nonsig_isvar2'
|
|
490
|
+
]
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
df = pd.read_csv("artificial_latent_new_4classes_mnl.csv")
|
|
495
|
+
df = df.assign(ones= 1)
|
|
496
|
+
model = LatentClassModel()
|
|
497
|
+
|
|
498
|
+
X = df[varnames].values
|
|
499
|
+
y = df['choice'].values
|
|
500
|
+
member_params_spec = np.array([['_inter',]
|
|
501
|
+
], dtype='object')
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
class_params_spec = np.array([['ones'],
|
|
505
|
+
['ones']]
|
|
506
|
+
, dtype='object')
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
print('do i need to declare intecept')
|
|
510
|
+
model.setup(X, y, panels=df['id'].values, varnames=varnames, num_classes=2,
|
|
511
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
512
|
+
alts=[1, 2, 3])
|
|
513
|
+
model.reassign_penalty(0.10)
|
|
514
|
+
model.fit()
|
|
515
|
+
model.summarise()
|
|
516
|
+
print('finished')
|
|
517
|
+
# }
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def MaaS_search(number_of_classes = 3, number_of_iterations = 1000, initial_iterations = 200, **kwargs):
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
df = pd.read_csv('MassLong.csv')
|
|
524
|
+
print('Running Latent Class Search')
|
|
525
|
+
|
|
526
|
+
varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
|
|
527
|
+
'Gender', 'Household', 'Education', 'Employment', 'WFH', 'Income',
|
|
528
|
+
'Follow-up', 'Residential', 'Technology',
|
|
529
|
+
'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
530
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
531
|
+
'PT_averse', 'LGA_1', 'LGA_2', 'LGA_3', 'Age_1', 'Age_2', 'Age_3', 'Live_alone',
|
|
532
|
+
'Live_housemate', 'Fam_nokid', 'Fam_kid', 'Fam_singl', 'Full_time', 'Part_time',
|
|
533
|
+
'Casual', 'Home_duties', 'Unemployed', 'Full_student', 'Part_student', 'Retired',
|
|
534
|
+
'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
|
|
535
|
+
|
|
536
|
+
varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
|
|
537
|
+
'Gender', 'Household', 'Education', 'Employment', 'WFH', 'Income',
|
|
538
|
+
'Follow-up', 'Residential', 'Technology',
|
|
539
|
+
'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
540
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
541
|
+
'PT_averse', 'LGA_1', 'LGA_3', 'Age_1', 'Age_3', 'Live_alone', 'Fam_nokid', 'Fam_kid', 'Full_time', 'Part_time',
|
|
542
|
+
'Casual', 'Home_duties', 'Unemployed', 'Full_student', 'Retired',
|
|
543
|
+
'Income_1', 'Income_2', 'Income_3']
|
|
544
|
+
|
|
545
|
+
'''Here we define the search options'''
|
|
546
|
+
df_test = None
|
|
547
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
548
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
549
|
+
unwanted_class = ['PT', 'Rideshare', 'Ebike', 'Addon', 'Age', 'Gender',
|
|
550
|
+
'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
551
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
552
|
+
'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Live_alone', 'Unemployed']
|
|
553
|
+
unwanted_member = ['MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
554
|
+
'Price', 'PT'
|
|
555
|
+
]
|
|
556
|
+
memvarnames = [name for name in varnames if name not in unwanted_member] # member-specific variables
|
|
557
|
+
asvarnames = [name for name in varnames if name not in unwanted_class] # class-specific variables
|
|
558
|
+
choice_id = df['CHID']
|
|
559
|
+
ind_id = df['ID'] # I believe this is also panels
|
|
560
|
+
|
|
561
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
562
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
563
|
+
base_alt = None # Reference alternative
|
|
564
|
+
distr = ['n', 'u', 't'] # List of random distributions to select fr choice_set = ['1', '2', '3', '4']
|
|
565
|
+
choice_set = ['1', '2', '3', '4']
|
|
566
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
567
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
568
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
569
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
570
|
+
if kwargs.get('multiobjective', 0):
|
|
571
|
+
criterions = [['bic', -1], ['mae', -1]]
|
|
572
|
+
else:
|
|
573
|
+
criterions = [['bic', -1]]
|
|
574
|
+
|
|
575
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
576
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
577
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
578
|
+
|
|
579
|
+
latent_class = True # True
|
|
580
|
+
|
|
581
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
582
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
583
|
+
mem_vars=memvarnames, choices=choices,
|
|
584
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
585
|
+
base_alt=base_alt,
|
|
586
|
+
allow_bcvars=False, n_draws=200, min_classes=number_of_classes,
|
|
587
|
+
max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=True,
|
|
588
|
+
optimise_class=True, ftol_lccm=1e-4, ps_asvars = ['Price'])
|
|
589
|
+
|
|
590
|
+
# Setting up for fixed thetas
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
parameters_2nd = parameters
|
|
594
|
+
parameters_2nd.fixed_thetas = True
|
|
595
|
+
# adding in asvars
|
|
596
|
+
parameters_2nd.isvarnames = varnames
|
|
597
|
+
parameters_2nd.ps_vars = ['Price']
|
|
598
|
+
parameters_2nd.optimise_class = True # adding as true
|
|
599
|
+
|
|
600
|
+
parameters_3rd = parameters_2nd
|
|
601
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
602
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
603
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
604
|
+
|
|
605
|
+
init_sol = None
|
|
606
|
+
|
|
607
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
608
|
+
# RUN THE SEARCH
|
|
609
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
610
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
611
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
612
|
+
# phase 1 optimise membership
|
|
613
|
+
print(f"1st Phase, Optimize Membership")
|
|
614
|
+
# TODO turn back on, just checking that this doesn't fall over
|
|
615
|
+
#initial_iterations = 2
|
|
616
|
+
sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes,
|
|
617
|
+
'optimise_membership': True, 'id_num': f'MaaS_c{number_of_classes}_p1'}
|
|
618
|
+
# sa_parms = {'ctrl': (10, 0.001, 20, 1), 'max_classes': 4, 'min_classes': 3}
|
|
619
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
620
|
+
# TODO if perturb randvar, need to add it into one of the classes
|
|
621
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
622
|
+
print(f"2nd Phase, Optimize Classes")
|
|
623
|
+
sa_parms = {'ctrl': (100, 0.001, number_of_iterations, 5), 'max_classes': number_of_classes,
|
|
624
|
+
'min_classes': number_of_classes, 'optimise_membership': False,
|
|
625
|
+
'optimise_class': True, 'fixed_solution': best_member, 'id_num': f'MaaS_c{number_of_classes}_p2'}
|
|
626
|
+
#best_joint = call_harmony(parameters_2nd, best_member)
|
|
627
|
+
best_joint = call_siman(parameters_2nd, best_member, **sa_parms)
|
|
628
|
+
"""Final Fit"""
|
|
629
|
+
print(f"Final Phase")
|
|
630
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'MaaS_c{number_of_classes}_p3'}
|
|
631
|
+
''' Injecting the best joint solution to start'''
|
|
632
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
633
|
+
|
|
634
|
+
def ashkay_search(number_of_classes = 3, number_of_iterations = 1000, initial_iterations = 200, *args, **kwargs):
|
|
635
|
+
max_time = kwargs.get('run_time', 60*60*24)
|
|
636
|
+
df = pd.read_csv('akshay_long_true.csv')
|
|
637
|
+
df_test = None
|
|
638
|
+
RUN_AKSHAY = 0
|
|
639
|
+
if RUN_AKSHAY:
|
|
640
|
+
print('testing against Akshays model')
|
|
641
|
+
model = LatentClassModel()
|
|
642
|
+
varnames = ['InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male',
|
|
643
|
+
'Children', 'Income', 'NDI',
|
|
644
|
+
'LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG',
|
|
645
|
+
'BikesharePayG',
|
|
646
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
647
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'
|
|
648
|
+
]
|
|
649
|
+
|
|
650
|
+
X = df[varnames].values
|
|
651
|
+
y = df['CHOICE'].values
|
|
652
|
+
member_params_spec = np.array([['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
653
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
654
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
655
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
656
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
657
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
658
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
659
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI']],
|
|
660
|
+
dtype='object')
|
|
661
|
+
|
|
662
|
+
class_params_spec = np.array(
|
|
663
|
+
[['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
664
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
665
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
666
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
667
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
668
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
669
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
670
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
671
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
672
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
673
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
674
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
675
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
676
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
677
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers']],
|
|
678
|
+
dtype='object')
|
|
679
|
+
|
|
680
|
+
init_class_thetas = np.array(
|
|
681
|
+
[-1.321318, -0.254239, -0.137624, -9.159877, 0.009594, 1.189211, -0.084255, 0.437849, 0.222736, -2.338727,
|
|
682
|
+
-0.220732, 0.206103,
|
|
683
|
+
0.293479, 0.17829, -0.293836, -0.499868, -0.336, 0.588949, 0.0357, 0.393709, -0.215125, -0.28694, -0.264146,
|
|
684
|
+
-0.871409,
|
|
685
|
+
-1.160788, 0.752398, -0.054771, 0.554518, -0.559022, 0.633359, -0.150176, 0.020715, -0.23028, 0.185878,
|
|
686
|
+
-0.219888, -1.531753,
|
|
687
|
+
-0.833134, -0.168312, -2.27768, 1.136705, 0.093996, 1.672507, 1.29167, 1.49679, 0.423603, 0.249344, -0.832107,
|
|
688
|
+
-2.778636])
|
|
689
|
+
|
|
690
|
+
init_class_betas = [np.array([0.441269, 0.448334, 0.288787, 0.35502, 0.216816, 0.198564, 0.069477,
|
|
691
|
+
0.346543, 0.233089, 0.323059, 0.333928, 0.149546, 0.124614, 0.0443181,
|
|
692
|
+
-0.00741137, 0.036144, -0.00298227, 0.140595, 0.046312]), # Class 1
|
|
693
|
+
np.array([0.801542, 0.483616, 0.546757, 0.498264, 0.206961, 0.367382, 0.00124702,
|
|
694
|
+
0.587733, 0.398037, 0.5319, 0.369294, 0.246564, -0.100532, -0.141248,
|
|
695
|
+
-0.019849, 0.038627, -0.104714, 0.173183, 0.0905047]), # Class 2
|
|
696
|
+
np.array([1.28245, 0.704765, 0.8016, 0.145479, 0.340825, 0.554092, -0.0942558,
|
|
697
|
+
12.6054, 83.2791, 27.7743, -14.1763, 26.7106, 21.6308, -2.87297,
|
|
698
|
+
-32.6663, 0.528885, 0.375195, 0.367734, 0.343927]), # Class 3
|
|
699
|
+
np.array([1.18916, 0.562234, 0.58024, -0.00850272, 0.122827, 0.619118, 0.0330975,
|
|
700
|
+
0.970455, 0.24954, 0.698946, 0.172871, 0.64793, -0.395843, 0.00472563,
|
|
701
|
+
-0.425557, 0.157351, 0.0453663, 0.194574, 0.0677801]), # Class 4
|
|
702
|
+
np.array([0, 0, 0, 0, 0, 0, 0,
|
|
703
|
+
0, 0, 0, 0, 0, 0, 0,
|
|
704
|
+
0, 0, 0, 0, 0])] # Class 5
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
model.setup(X, y, ids=df['CHID'], panels=df['indID'],
|
|
709
|
+
varnames=varnames,
|
|
710
|
+
num_classes=5,
|
|
711
|
+
class_params_spec=class_params_spec,
|
|
712
|
+
member_params_spec=member_params_spec,
|
|
713
|
+
init_class_thetas=init_class_thetas,
|
|
714
|
+
init_class_betas=init_class_betas,
|
|
715
|
+
alts=[1, 2],
|
|
716
|
+
ftol_lccm=1e-2,
|
|
717
|
+
gtol=1e-3,
|
|
718
|
+
# verbose = 2
|
|
719
|
+
)
|
|
720
|
+
model.fit()
|
|
721
|
+
model.summarise()
|
|
722
|
+
print('completed Ashkays model')
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
print('Running Latent Class Search')
|
|
726
|
+
model = LatentClassModel()
|
|
727
|
+
varnames = ['InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male',
|
|
728
|
+
'Children', 'Income', 'NDI',
|
|
729
|
+
'LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG',
|
|
730
|
+
'BikesharePayG',
|
|
731
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
732
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'
|
|
733
|
+
]
|
|
734
|
+
|
|
735
|
+
X = df[varnames].values
|
|
736
|
+
y = df['CHOICE'].values
|
|
737
|
+
|
|
738
|
+
|
|
739
|
+
'''Here we define the search options'''
|
|
740
|
+
|
|
741
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
742
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
743
|
+
|
|
744
|
+
unwanted_member = ['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG',
|
|
745
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl',
|
|
746
|
+
'BikeshareUnl',
|
|
747
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers', 'BikesharePayG'
|
|
748
|
+
]
|
|
749
|
+
unwanted_class = ['InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male',
|
|
750
|
+
'Children', 'Income', 'NDI']
|
|
751
|
+
memvarnames = [name for name in varnames if name not in unwanted_member] # member-specific variables
|
|
752
|
+
asvarnames = [name for name in varnames if name not in unwanted_class] # class-specific variables
|
|
753
|
+
choice_id = df['CHID']
|
|
754
|
+
ind_id = df['indID'] #I believe this is also panels
|
|
755
|
+
|
|
756
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
757
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
758
|
+
base_alt = None # Reference alternative
|
|
759
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
760
|
+
choice_set = ['1', '2', '3', '4']
|
|
761
|
+
|
|
762
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
763
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
764
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
765
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
766
|
+
|
|
767
|
+
criterions = [['bic', -1]]
|
|
768
|
+
|
|
769
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
770
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
771
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
772
|
+
|
|
773
|
+
latent_class = True # True
|
|
774
|
+
|
|
775
|
+
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
779
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
780
|
+
mem_vars=memvarnames, choices=choices,
|
|
781
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
782
|
+
base_alt=base_alt,
|
|
783
|
+
allow_bcvars=False, n_draws=200, min_classes=number_of_classes, max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=True,
|
|
784
|
+
optimise_class=True, ftol_lccm=1e-4)
|
|
785
|
+
|
|
786
|
+
# Setting up for fixed thetas
|
|
787
|
+
parameters_2nd = parameters
|
|
788
|
+
parameters_2nd.fixed_thetas = True
|
|
789
|
+
# adding in asvars
|
|
790
|
+
parameters_2nd.isvarnames = asvarnames
|
|
791
|
+
parameters_2nd.optimise_class = True # adding as true
|
|
792
|
+
|
|
793
|
+
parameters_3rd = parameters_2nd
|
|
794
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
795
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
796
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
797
|
+
|
|
798
|
+
init_sol = None
|
|
799
|
+
|
|
800
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
801
|
+
# RUN THE SEARCH
|
|
802
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
803
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
804
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
805
|
+
# phase 1 optimise membership
|
|
806
|
+
print(f"1st Phase, Optimize Membership")
|
|
807
|
+
#TODO turn back on, just checking that this doesn't fall over
|
|
808
|
+
sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'optimise_membership': True, 'id_num': f'Ashkay_c{number_of_classes}_p1'}
|
|
809
|
+
#sa_parms = {'ctrl': (10, 0.001, 20, 1), 'max_classes': 4, 'min_classes': 3}
|
|
810
|
+
|
|
811
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
812
|
+
#TODO if perturb randvar, need to add it into one of the classes
|
|
813
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
814
|
+
print(f"2nd Phase, Optimize Classes")
|
|
815
|
+
sa_parms = {'ctrl': (10, 0.001, number_of_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'optimise_membership': False,
|
|
816
|
+
'optimise_class': True, 'fixed_solution': best_member, 'id_num': f'Ashkay_c{number_of_classes}_p2'}
|
|
817
|
+
best_joint = call_siman(parameters_2nd, best_member, **sa_parms)
|
|
818
|
+
"""Final Fit"""
|
|
819
|
+
print(f"Final Phase")
|
|
820
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'Ashkay_c{number_of_classes}_p3'}
|
|
821
|
+
''' Injecting the best joint solution to start'''
|
|
822
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def main(args):
|
|
831
|
+
|
|
832
|
+
np.random.seed(100) # THIS SEED CAUSES THE EXCEPTION.
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
# Replace the following with the specific function you want to run
|
|
836
|
+
#ashkay_search(args.num_classes)
|
|
837
|
+
#fit_lc_example()
|
|
838
|
+
#fit_lcm_example()
|
|
839
|
+
#exit()
|
|
840
|
+
# Call other functions based on the arguments
|
|
841
|
+
if args.model_run_item == 1:
|
|
842
|
+
print(f'running askay with {args.num_classes}')
|
|
843
|
+
ashkay_search(args.num_classes, args.iterations, args.iterations_i,**vars(args))
|
|
844
|
+
elif args.model_run_item == 2:
|
|
845
|
+
print(f'running laten with {args.num_classes}')
|
|
846
|
+
optimise_latent_3_phase_search(args.num_classes, args.iterations, args.iterations_i)
|
|
847
|
+
elif args.model_run_item == 3:
|
|
848
|
+
print(f'running MaaS with {args.num_classes}')
|
|
849
|
+
MaaS_search(args.num_classes, args.iterations, args.iterations_i, **vars(args))
|
|
850
|
+
elif args.model_run_item == 4:
|
|
851
|
+
print(f'running Swiss with {args.num_classes}')
|
|
852
|
+
optimise_latent_swiss(args.num_classes, args.iterations, args.iterations_i)
|
|
853
|
+
|
|
854
|
+
else:
|
|
855
|
+
ashkay_search(args.num_classes)
|
|
856
|
+
print('Finished...')
|
|
857
|
+
|
|
858
|
+
'''' ---------------------------------------------------------- '''
|
|
859
|
+
''' MAIN PROGRAM '''
|
|
860
|
+
''' ----------------------------------------------------------- '''
|
|
861
|
+
|
|
862
|
+
if __name__ == '__main__':
|
|
863
|
+
# {
|
|
864
|
+
#np.random.seed(int(time.time()))
|
|
865
|
+
parser = argparse.ArgumentParser(description='Script for model fitting and optimization.')
|
|
866
|
+
parser.add_argument('--seed', type= int, default=1, help='Random seed for reproducibilityr -rf .git/modules')
|
|
867
|
+
parser.add_argument('--optimise', action='store_true', help='Run optimization functions')
|
|
868
|
+
parser.add_argument('--multiobjective', default=0, help='single or multiobjective search')
|
|
869
|
+
parser.add_argument('--num_classes', type = int, default=2, help='Number of latent classes')
|
|
870
|
+
parser.add_argument('--model_run_item', type = int, default=3, help= 'run which dataset')
|
|
871
|
+
parser.add_argument('--iterations', type= int, default= 200, help = 'max number of iterations')
|
|
872
|
+
parser.add_argument('--iterations_i', type= int, default= 5, help = 'first phase number of iterations')
|
|
873
|
+
parser.add_argument('--run_time', type = int, default = 60000*60*4, help = 'termination of run with respect to time in seconds.')
|
|
874
|
+
|
|
875
|
+
args = parser.parse_args()
|
|
876
|
+
main(args)
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
#np.random.seed(1)
|
|
881
|
+
|
|
882
|
+
# Testing model fitting:
|
|
883
|
+
#fit_mnl_example() # Originally ran in 0.1-0.2s
|
|
884
|
+
#fit_mnl_box_example() # Originally ran in 1s
|
|
885
|
+
#fit_mxl_example() # Originally ran in about 12s +- 3s
|
|
886
|
+
#fit_mxl_box_example() # Originally ran in about 20s
|
|
887
|
+
#fit_lc_example() # Originally ran in about 6s +- 2s
|
|
888
|
+
#synth_3()
|
|
889
|
+
#fit_lcm_example() # Originally ran in about 160s + 30s
|
|
890
|
+
#fit_electricity_mxl()
|
|
891
|
+
|
|
892
|
+
# Optimisation:
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
#ashkay_search()
|
|
896
|
+
|
|
897
|
+
#optimise_electricity()
|
|
898
|
+
#optimise_latent_3_phase_search()
|
|
899
|
+
#ashkay_search()
|
|
900
|
+
#optimise()
|
|
901
|
+
#run_latent_class_mixed()
|
|
902
|
+
#print('this is for testing')
|
|
903
|
+
#latent_synth_4()
|
|
904
|
+
#print('this is for searching for the model')
|
|
905
|
+
#optimise_latent_3_phase_search()
|
|
906
|
+
#optimise_electricity()
|
|
907
|
+
#optimise_synth_latent()
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
911
|
+
# DEBUGGING PARETO FRONT GENERATION
|
|
912
|
+
'''soln = [{'obj1': 45, 'obj2':2}, {'obj1': 64, 'obj2':8}, {'obj1': 21, 'obj2':2},
|
|
913
|
+
{'obj1': 88, 'obj2':7}, {'obj1': 13, 'obj2':5}, {'obj1': 36, 'obj2':5}, {'obj1': 83, 'obj2':1},
|
|
914
|
+
{'obj1': 39, 'obj2':10}, {'obj1': 45, 'obj2':10}, {'obj1': 60, 'obj2':9}]
|
|
915
|
+
fronts = rank_solutions(soln, 'obj1', 'obj2')
|
|
916
|
+
print("Fronts=",fronts)
|
|
917
|
+
crowd = {}
|
|
918
|
+
key = 'obj2'
|
|
919
|
+
max_val = max(soln[i][key] for i in range(len(soln))) # Compute max value of objective 'key'
|
|
920
|
+
min_val = min(soln[i][key] for i in range(len(soln))) # Compute min value of objective 'key'
|
|
921
|
+
for front in fronts.values():
|
|
922
|
+
compute_crowding_dist_front(front, soln, crowd, key, max_val, min_val)
|
|
923
|
+
#print(crowd)
|
|
924
|
+
|
|
925
|
+
sorted = sort_solutions(fronts, crowd, soln)
|
|
926
|
+
print(sorted)
|
|
927
|
+
'''
|
|
928
|
+
# }
|
|
929
|
+
|
|
930
|
+
# RULES:
|
|
931
|
+
# --------------------------------------------------------------------------
|
|
932
|
+
"""
|
|
933
|
+
1. A variable cannot be an isvar and asvar simultaneously.
|
|
934
|
+
2. An isvar or asvar can be a random variable – I don’t understand this?
|
|
935
|
+
3. An isvar cannot be a randvar
|
|
936
|
+
4. A bcvar cannot be a corvar at the same time
|
|
937
|
+
5. corvar should be a list of at least 2 randvars
|
|
938
|
+
6. num_classes (Q) should be > 1, for estimating latent class models
|
|
939
|
+
7. length of member_params_spec should be == Q-1
|
|
940
|
+
8. length of class_params_spec should be == Q
|
|
941
|
+
9. coefficients for member_params_spec cannot be in randvars
|
|
942
|
+
|
|
943
|
+
|
|
944
|
+
Randvars are required for MixedLogit models!
|
|
945
|
+
"""
|