SearchLibrium 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_code/__init__.py +8 -0
- old_code/_choice_model.py +1363 -0
- old_code/_device.py +145 -0
- old_code/akshay_test.py +125 -0
- old_code/boxcox_functions.py +116 -0
- old_code/draws.py +128 -0
- old_code/harmony.py +1261 -0
- old_code/latent_class_constrained.py +434 -0
- old_code/latent_class_mixed_model.py +1566 -0
- old_code/latent_class_model.py +1281 -0
- old_code/latent_main.py +945 -0
- old_code/main.py +1880 -0
- old_code/main_ol.py +127 -0
- old_code/misc.py +303 -0
- old_code/mixed_logit.py +1553 -0
- old_code/multinomial_logit.py +559 -0
- old_code/ordered_logit.py +1641 -0
- old_code/ordered_logit_mixed.py +103 -0
- old_code/ordered_logit_multinomial.py +701 -0
- old_code/r_ordered.py +168 -0
- old_code/rrm.py +521 -0
- old_code/search.py +3485 -0
- old_code/siman.py +1023 -0
- old_code/threshold.py +777 -0
- searchlibrium-0.0.1.dist-info/METADATA +21 -0
- searchlibrium-0.0.1.dist-info/RECORD +28 -0
- searchlibrium-0.0.1.dist-info/WHEEL +5 -0
- searchlibrium-0.0.1.dist-info/top_level.txt +1 -0
old_code/main.py
ADDED
|
@@ -0,0 +1,1880 @@
|
|
|
1
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
2
|
+
SOLUTION OF EXAMPLE DISCRETE CHOICE MODELS
|
|
3
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
4
|
+
from tabnanny import verbose
|
|
5
|
+
|
|
6
|
+
#from searchlogit.ordered_logit_mixed import OrderedMixedLogit
|
|
7
|
+
|
|
8
|
+
# NOTE:
|
|
9
|
+
# varnames: All explanatory variables that have been defined
|
|
10
|
+
# isvars: Individual specific variables These variables do not vary across alternatives
|
|
11
|
+
# asvars: Alternative specific variables These variables vary across alternatives.
|
|
12
|
+
# alts: Alternatives for each choice. E.g., Choice = transport mode, Alternatives = {car, bus, train}
|
|
13
|
+
# base_alts: The base (a.k.a., reference) alternative
|
|
14
|
+
# transvars: Variables that have transformations applied to them
|
|
15
|
+
# randvars: Ramdom variables
|
|
16
|
+
# corvars: Correlated variables
|
|
17
|
+
# bcvars: Box Cox transformed variables
|
|
18
|
+
|
|
19
|
+
''' ---------------------------------------------------------- '''
|
|
20
|
+
''' LIBRARIES '''
|
|
21
|
+
''' ---------------------------------------------------------- '''
|
|
22
|
+
import scipy
|
|
23
|
+
from harmony import*
|
|
24
|
+
from siman import*
|
|
25
|
+
from threshold import*
|
|
26
|
+
from latent_class_mixed_model import LatentClassMixedModel
|
|
27
|
+
from latent_class_model import LatentClassModel
|
|
28
|
+
from mixed_logit import*
|
|
29
|
+
from multinomial_logit import MultinomialLogit
|
|
30
|
+
import pandas as pd
|
|
31
|
+
import argparse
|
|
32
|
+
import os
|
|
33
|
+
from ordered_logit import OrderedLogit, OrderedLogitLong, MixedOrderedLogit
|
|
34
|
+
#import time
|
|
35
|
+
|
|
36
|
+
'''' ---------------------------------------------------------- '''
|
|
37
|
+
''' SCRIPT. MULTINOMIAL '''
|
|
38
|
+
''' ----------------------------------------------------------- '''
|
|
39
|
+
def fit_mnl_example():
|
|
40
|
+
# {
|
|
41
|
+
df = pd.read_csv("Swissmetro_final.csv")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
varnames = ['COST', 'TIME', 'HEADWAY', 'SEATS', 'AGE']
|
|
46
|
+
|
|
47
|
+
isvars = ['AGE']
|
|
48
|
+
mnl = MultinomialLogit()
|
|
49
|
+
mnl.setup(X=df[varnames], y=df['CHOICE'], varnames=varnames, isvars = isvars,
|
|
50
|
+
fit_intercept=True, alts=df['alt'], ids=df['custom_id'],
|
|
51
|
+
avail=df['AV'], base_alt='SM', gtol=1e-04)
|
|
52
|
+
mnl.fit()
|
|
53
|
+
mnl.get_loglik_null()
|
|
54
|
+
mnl.summarise()
|
|
55
|
+
# }
|
|
56
|
+
|
|
57
|
+
'''' ---------------------------------------------------------- '''
|
|
58
|
+
''' SCRIPT. MULTINOMIAL '''
|
|
59
|
+
''' ----------------------------------------------------------- '''
|
|
60
|
+
def fit_mnl_box_example():
|
|
61
|
+
# {
|
|
62
|
+
df = pd.read_csv("artificial_1b_multi_nonlinear.csv")
|
|
63
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
|
|
64
|
+
'added_fixed7', 'added_fixed8', 'added_fixed9', 'added_fixed10', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
65
|
+
|
|
66
|
+
'added_isvar1', 'added_isvar2']
|
|
67
|
+
X = df[varnames].values
|
|
68
|
+
y = df['choice'].values
|
|
69
|
+
isvars = ['added_isvar1', 'added_isvar2']
|
|
70
|
+
transvars = ['added_fixed1', 'added_fixed2']
|
|
71
|
+
|
|
72
|
+
mnl = MultinomialLogit()
|
|
73
|
+
mnl.setup(X, y, ids=df['id'], varnames=varnames, isvars=isvars, transvars=transvars, alts=df['alt'])
|
|
74
|
+
mnl.fit()
|
|
75
|
+
mnl.get_loglik_null()
|
|
76
|
+
mnl.summarise()
|
|
77
|
+
# }
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
''' ----------------------------------------------------------- '''
|
|
81
|
+
''' SCRIPT. MIXED LOGIT '''
|
|
82
|
+
''' ----------------------------------------------------------- '''
|
|
83
|
+
def fit_mxl_example():
|
|
84
|
+
# {
|
|
85
|
+
|
|
86
|
+
df = pd.read_csv("artificial_1h_mixed_corr_trans.csv")
|
|
87
|
+
|
|
88
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3',
|
|
89
|
+
'added_fixed4','added_fixed5', 'added_fixed6',
|
|
90
|
+
'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
91
|
+
'added_random1', 'added_random2', 'added_random3',
|
|
92
|
+
'added_random4', 'added_random5', 'added_random6', 'added_random7']
|
|
93
|
+
|
|
94
|
+
isvars = []
|
|
95
|
+
transvars = [] #['added_random4', 'added_random5']
|
|
96
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
|
|
97
|
+
'added_random4': 'n', 'added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
|
|
98
|
+
|
|
99
|
+
correlated_vars = ['added_random1', 'added_random2', 'added_random3']
|
|
100
|
+
|
|
101
|
+
model = MixedLogit()
|
|
102
|
+
model.setup(X=df[varnames].values, y=df['choice'].values, ids=df['choice_id'].values,
|
|
103
|
+
panels=df['ind_id'].values, varnames=varnames,
|
|
104
|
+
isvars=isvars, transvars=transvars, correlated_vars=correlated_vars,
|
|
105
|
+
randvars=randvars, fit_intercept=False, alts=df['alt'], n_draws=200)
|
|
106
|
+
|
|
107
|
+
model.fit()
|
|
108
|
+
model.summarise()
|
|
109
|
+
# }
|
|
110
|
+
|
|
111
|
+
''' ----------------------------------------------------------- '''
|
|
112
|
+
''' SCRIPT. MIXED LOGIT '''
|
|
113
|
+
''' ----------------------------------------------------------- '''
|
|
114
|
+
def fit_mxl_box_example():
|
|
115
|
+
# {
|
|
116
|
+
df = pd.read_csv("artificial_1h_mixed_corr_trans.csv")
|
|
117
|
+
df['bc_added_random4'] = scipy.stats.boxcox(df['added_random4'], 0.01)
|
|
118
|
+
df['bc_added_random5'] = scipy.stats.boxcox(df['added_random5'], 0.0)
|
|
119
|
+
|
|
120
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
|
|
121
|
+
#'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
122
|
+
'added_random1', 'added_random2', 'added_random3', 'added_random4', 'added_random5', 'added_random6',
|
|
123
|
+
'added_random7']
|
|
124
|
+
|
|
125
|
+
isvars = []
|
|
126
|
+
transvars = ['added_random4', 'added_random5']
|
|
127
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
|
|
128
|
+
'added_random4': 'n', 'added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
|
|
129
|
+
|
|
130
|
+
correlated_vars = ['added_random1', 'added_random2', 'added_random3']
|
|
131
|
+
|
|
132
|
+
mxl = MixedLogit()
|
|
133
|
+
mxl.setup(X=df[varnames].values, y=df['choice'].values, ids=df['choice_id'].values,
|
|
134
|
+
panels=df['ind_id'].values, varnames=varnames,
|
|
135
|
+
isvars=isvars, transvars=transvars, correlated_vars=correlated_vars,
|
|
136
|
+
randvars=randvars, fit_intercept=False, alts=df['alt'],
|
|
137
|
+
n_draws=200)
|
|
138
|
+
|
|
139
|
+
mxl.fit()
|
|
140
|
+
mxl.get_loglik_null()
|
|
141
|
+
mxl.summarise()
|
|
142
|
+
|
|
143
|
+
# }
|
|
144
|
+
|
|
145
|
+
''' ----------------------------------------------------------- '''
|
|
146
|
+
''' SCRIPT. LATENT CLASS '''
|
|
147
|
+
''' ----------------------------------------------------------- '''
|
|
148
|
+
def fit_lc_example():
|
|
149
|
+
# {
|
|
150
|
+
df = pd.read_csv("artificial_latent_new.csv")
|
|
151
|
+
varnames = ['income', 'age', 'price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp','nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
|
|
152
|
+
#'nonsig_isvar1', 'nonsig_isvar2'
|
|
153
|
+
# ]
|
|
154
|
+
X = df[varnames].values
|
|
155
|
+
y = df['choice'].values
|
|
156
|
+
member_params_spec = np.array([['income', 'age']], dtype='object')
|
|
157
|
+
class_params_spec = np.array([['price', 'time', 'conven', 'comfort'],
|
|
158
|
+
['price', 'time', 'meals', 'petfr', 'emipp']], dtype='object') # Two latent classes
|
|
159
|
+
|
|
160
|
+
model = LatentClassModel() # Derived from MultinomialLogit
|
|
161
|
+
model.setup(X, y, varnames=varnames, ids=df['id'], num_classes=2,
|
|
162
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
163
|
+
alts=[1,2,3], ftol_lccm=1e-3, gtol=1e-3)
|
|
164
|
+
|
|
165
|
+
model.fit()
|
|
166
|
+
model.summarise()
|
|
167
|
+
# }
|
|
168
|
+
|
|
169
|
+
''' ----------------------------------------------------------- '''
|
|
170
|
+
''' SCRIPT. LATENT CLASS MIXED '''
|
|
171
|
+
''' ----------------------------------------------------------- '''
|
|
172
|
+
def fit_lcm_example():
|
|
173
|
+
# {
|
|
174
|
+
|
|
175
|
+
df = pd.read_csv("synth_latent_mixed_3classes.csv")
|
|
176
|
+
|
|
177
|
+
varnames = ['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2', 'income', 'age']
|
|
178
|
+
X = df[varnames].values
|
|
179
|
+
y = df['choice'].values
|
|
180
|
+
|
|
181
|
+
member_params_spec = np.array([['income', 'age'], ['income', 'age']], dtype='object')
|
|
182
|
+
|
|
183
|
+
# Define three latent classes:
|
|
184
|
+
class_params_spec = np.array([['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2'],
|
|
185
|
+
['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2'],
|
|
186
|
+
['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2']],
|
|
187
|
+
dtype='object')
|
|
188
|
+
|
|
189
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n'}
|
|
190
|
+
init_class_thetas = np.array([0.41381657745904565, -0.19457547164109434, -0.41381657745904565, 0.3891509432821887])
|
|
191
|
+
init_class_thetas = np.array([-1, 5.6, -7.61381657745904565, 10.5])
|
|
192
|
+
init_class_betas = [
|
|
193
|
+
np.array([.181, -.35, 2.411337674531561, 2.1511169162160617, 0.8752373368149019, 0.7313773222836617]),
|
|
194
|
+
np.array([0.23, 0, -0.6268738608685024, -1.3812810694501136, 0.8591208458201691, 1.2928663669444755]),
|
|
195
|
+
np.array([0, .94, 0.8382701667527453, 1.3112939261751486, 1.0298368042405897, 1.0076129422492865])
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
model = LatentClassMixedModel()
|
|
199
|
+
model.setup(X, y, panels=df['ind_id'], n_draws=200, varnames=varnames, num_classes=3,
|
|
200
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
201
|
+
gtol=1e-5, init_class_thetas=init_class_thetas, init_class_betas=init_class_betas,
|
|
202
|
+
randvars=randvars, alts=[1,2,3])
|
|
203
|
+
model.fit()
|
|
204
|
+
model.summarise()
|
|
205
|
+
|
|
206
|
+
# }
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
210
|
+
# META HEURISTIC OPTIMISATION APPROACH
|
|
211
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
212
|
+
|
|
213
|
+
def call_harmony(parameters, init_sol=None):
|
|
214
|
+
# {
|
|
215
|
+
solver = HarmonySearch(parameters, init_sol)
|
|
216
|
+
solver.max_mem = 25
|
|
217
|
+
solver.maxiter = 5000
|
|
218
|
+
solver.run()
|
|
219
|
+
# }
|
|
220
|
+
|
|
221
|
+
def call_siman(parameters, init_sol=None, **kwargs):
|
|
222
|
+
# {
|
|
223
|
+
ctrl = kwargs.get('ctrl', (10000, 0.001, 20, 20000)) # i.e. (tI, tF, max_temp_steps, max_iter)
|
|
224
|
+
if 'ctrl' in kwargs:
|
|
225
|
+
# Need to delete the 'ctrl' key from kwargs
|
|
226
|
+
# This is because the function has a parameter named 'ctrl'
|
|
227
|
+
# and the 'ctrl' key in kwargs would be a duplicate parameter
|
|
228
|
+
del kwargs['ctrl']
|
|
229
|
+
# ctrl = (1000, 0.001, 20, 20) # i.e. (tI,tF,max_temp_steps,max_iter)
|
|
230
|
+
id_num = kwargs.get('id_num', None)
|
|
231
|
+
solver = SA(parameters, init_sol, ctrl, id_num, **kwargs)
|
|
232
|
+
solver.run()
|
|
233
|
+
solver.close_files()
|
|
234
|
+
return solver.return_best()
|
|
235
|
+
# }
|
|
236
|
+
|
|
237
|
+
def call_parsa(parameters, init_sol=None, nthrds=4, **kwargs):
|
|
238
|
+
# {
|
|
239
|
+
# ctrl = (10, 0.001, 10, 10) # i.e. (tI, tF, max_temp_steps, max_iter)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
ctrl = kwargs.get('ctrl',(10, 0.001, 10, 10))
|
|
243
|
+
|
|
244
|
+
if 'ctrl' in kwargs:
|
|
245
|
+
# Need to delete the 'ctrl' key from kwargs
|
|
246
|
+
# This is because the function has a parameter named 'ctrl'
|
|
247
|
+
# and the 'ctrl' key in kwargs would be a duplicate parameter
|
|
248
|
+
del kwargs['ctrl']
|
|
249
|
+
parsa = PARSA(parameters, init_sol, ctrl, nthrds=nthrds)
|
|
250
|
+
parsa.run()
|
|
251
|
+
# }
|
|
252
|
+
|
|
253
|
+
def call_parcopsa(parameters, init_sol=None, nthrds=8):
|
|
254
|
+
# {
|
|
255
|
+
ctrl = (10, 0.001, 10, 10) # i.e. (tI, tF, max_temp_steps, max_iter)
|
|
256
|
+
parcopsa = PARCOPSA(parameters, init_sol, ctrl, nthrds=nthrds)
|
|
257
|
+
|
|
258
|
+
# Optional. Set a different behaviour for each solver
|
|
259
|
+
#tI = [1, 10, 100, 1000, np.random.randint(1, 10000), np.random.randint(1, 10000),
|
|
260
|
+
#np.random.randint(1, 10000), np.random.randint(1, 10000)]
|
|
261
|
+
#for i in range(8):
|
|
262
|
+
# parcopsa.solvers[i].revise_tI(tI[i])
|
|
263
|
+
|
|
264
|
+
parcopsa.run()
|
|
265
|
+
# }
|
|
266
|
+
|
|
267
|
+
def call_threshold(parameters, init_sol=None, hm=False):
|
|
268
|
+
# {
|
|
269
|
+
ctrl = (10, 20, 20) # i.e., threshold, max_steps, max_iter
|
|
270
|
+
#ctrl = (10, 10, 1) # i.e., threshold, max_steps, max_iter
|
|
271
|
+
solver = TA(parameters, init_sol, ctrl)
|
|
272
|
+
solver.run()
|
|
273
|
+
solver.close_files()
|
|
274
|
+
# }
|
|
275
|
+
|
|
276
|
+
def covering_arrays(index = 0):
|
|
277
|
+
# Define parameter ranges
|
|
278
|
+
tI_values = [500, 1000, 1500]
|
|
279
|
+
tF_values = [0.001, 0.01, 0.1]
|
|
280
|
+
max_temp_steps_values = [10, 20, 30]
|
|
281
|
+
max_iter_values = [10, 20, 50]
|
|
282
|
+
|
|
283
|
+
# Generate a full factorial design for illustration (use a library for pairwise if needed)
|
|
284
|
+
import itertools
|
|
285
|
+
all_combinations = list(itertools.product(tI_values, tF_values, max_temp_steps_values, max_iter_values))
|
|
286
|
+
|
|
287
|
+
# If you want pairwise, you may need a library like `allpairspy` or a manual covering array generator
|
|
288
|
+
# Example of a manually reduced covering array for simplicity:
|
|
289
|
+
covering_array = [
|
|
290
|
+
(500, 0.001, 10, 10),
|
|
291
|
+
(500, 0.01, 20, 20),
|
|
292
|
+
(500, 0.1, 30, 50),
|
|
293
|
+
(1000, 0.001, 20, 50),
|
|
294
|
+
(1000, 0.01, 30, 10),
|
|
295
|
+
(1500, 0.001, 30, 20),
|
|
296
|
+
(1500, 0.1, 10, 50),
|
|
297
|
+
(1500, 0.01, 20, 10),
|
|
298
|
+
]
|
|
299
|
+
print("Covering Array:")
|
|
300
|
+
for row in covering_array:
|
|
301
|
+
print(row)
|
|
302
|
+
if index < len(covering_array):
|
|
303
|
+
return covering_array[index]
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
''' ----------------------------------------------------------- '''
|
|
307
|
+
''' SCRIPT '''
|
|
308
|
+
''' ----------------------------------------------------------- '''
|
|
309
|
+
|
|
310
|
+
def optimise_synth_latent(index=0):
|
|
311
|
+
# {
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
# Example Usage
|
|
315
|
+
number_of_classes = 3 # Define the number of latent classes
|
|
316
|
+
df = pd.read_csv("data/artificial_latent_3classes_mnl_22.04.2025.csv")
|
|
317
|
+
df_test = None
|
|
318
|
+
# Initialize the LatentClasses object with 3 latent classes
|
|
319
|
+
latent_classes = LatentClassConstrained(num_classes=number_of_classes)
|
|
320
|
+
asvarnames = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
|
|
321
|
+
memvarnames = ['z1', 'z2', 'nonsig_isvar1', 'nonsig_isvar2']
|
|
322
|
+
# Populate data for latent_class_1
|
|
323
|
+
latent_classes.populate_class(
|
|
324
|
+
"latent_class_1",
|
|
325
|
+
asvar=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5', '_int_individual'],
|
|
326
|
+
isvars=[],
|
|
327
|
+
randvars=[],
|
|
328
|
+
memvars=[], #cant have a membership here
|
|
329
|
+
req_asvar=[],
|
|
330
|
+
req_isvars=[],
|
|
331
|
+
req_randvars=[],
|
|
332
|
+
req_memvars=[], #cant have a membership here
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Populate data for latent_class_2
|
|
336
|
+
latent_classes.populate_class(
|
|
337
|
+
"latent_class_2",
|
|
338
|
+
asvar=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5','_int_individual'],
|
|
339
|
+
isvars=[],
|
|
340
|
+
randvars=[],
|
|
341
|
+
memvars= ['z1', 'z2', 'nonsig_isvar1', 'nonsig_isvar2'],
|
|
342
|
+
req_asvar=[],
|
|
343
|
+
req_isvars=[],
|
|
344
|
+
req_randvars=[],
|
|
345
|
+
req_memvars=[]
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
latent_classes.populate_class(
|
|
349
|
+
"latent_class_3",
|
|
350
|
+
asvar=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5', '_int_individual'],
|
|
351
|
+
isvars=[],
|
|
352
|
+
randvars=[],
|
|
353
|
+
memvars= ['z1', 'z2', 'nonsig_isvar1', 'nonsig_isvar2'],
|
|
354
|
+
req_asvar=[],
|
|
355
|
+
req_isvars=[],
|
|
356
|
+
req_randvars=[],
|
|
357
|
+
req_memvars=[]
|
|
358
|
+
)
|
|
359
|
+
# Retrieve and print data for latent_class_1
|
|
360
|
+
print("Latent Class 1 Data:")
|
|
361
|
+
print(latent_classes.get_class("latent_class_1"))
|
|
362
|
+
|
|
363
|
+
# Retrieve and print all latent classes
|
|
364
|
+
print("\nAll Latent Classes:")
|
|
365
|
+
import pprint
|
|
366
|
+
pprint.pprint(latent_classes.get_all_classes())
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
varnames_gbl = latent_classes.get_global_asvars_randvars()
|
|
370
|
+
gbl_asvars = varnames_gbl['asvars']
|
|
371
|
+
gbl_isvars = varnames_gbl['isvars']
|
|
372
|
+
|
|
373
|
+
gbl_memvars = varnames_gbl['memvars']
|
|
374
|
+
varnames = list(set(gbl_asvars + gbl_isvars +gbl_isvars+gbl_memvars))
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
print(gbl_asvars)
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
print('Running Latent Class Search')
|
|
381
|
+
model = LatentClassModel()
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
X = df[varnames].values
|
|
385
|
+
y = df['choice'].values
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
'''Here we define the search options'''
|
|
389
|
+
|
|
390
|
+
asvarnames = gbl_asvars # class-specific variables
|
|
391
|
+
isvarnames = gbl_isvars # class-ind specific variables
|
|
392
|
+
memvarnames = gbl_memvars # class mem specific variables
|
|
393
|
+
|
|
394
|
+
choice_id = df['id']
|
|
395
|
+
ind_id = df['id']
|
|
396
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
397
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
398
|
+
base_alt = None # Reference alternative
|
|
399
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
400
|
+
choice_set = ['1', '2', '3']
|
|
401
|
+
criterions = [['bic',-1]]
|
|
402
|
+
|
|
403
|
+
#choice_id = df['CHID']
|
|
404
|
+
#ind_id = df['indID'] #I believe this is also panels
|
|
405
|
+
|
|
406
|
+
#choices = df['CHOICE'] # the df column name containing the choice variable
|
|
407
|
+
#alt_var = df['alt'] # the df column name containing the alternative variable
|
|
408
|
+
#base_alt = None # Reference alternative
|
|
409
|
+
#distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
410
|
+
#choice_set = ['1', '2', '3', '4']
|
|
411
|
+
|
|
412
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
413
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
414
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
415
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
416
|
+
|
|
417
|
+
#criterions = [['bic', -1]]
|
|
418
|
+
|
|
419
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
420
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
421
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
422
|
+
|
|
423
|
+
latent_class = True # True
|
|
424
|
+
|
|
425
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
426
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
427
|
+
mem_vars=memvarnames, choices=choices,
|
|
428
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=False,
|
|
429
|
+
base_alt=base_alt,
|
|
430
|
+
allow_bcvars=False, n_draws=200, min_classes=number_of_classes, max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=False,
|
|
431
|
+
optimise_class=True, ftol_lccm=1e-4, LCR = latent_classes)
|
|
432
|
+
|
|
433
|
+
# Setting up for fixed thetas
|
|
434
|
+
parameters_2nd = copy.deepcopy(parameters)
|
|
435
|
+
parameters_2nd.fixed_thetas = True
|
|
436
|
+
# adding in asvars
|
|
437
|
+
parameters_2nd.isvarnames = asvarnames
|
|
438
|
+
parameters_2nd.optimise_class = True # adding as true
|
|
439
|
+
|
|
440
|
+
parameters_3rd = copy.deepcopy(parameters_2nd)
|
|
441
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
442
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
443
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
444
|
+
|
|
445
|
+
init_sol = None
|
|
446
|
+
|
|
447
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
448
|
+
# RUN THE SEARCH
|
|
449
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
450
|
+
initial_iterations = 20
|
|
451
|
+
number_of_iterations = 5000
|
|
452
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
453
|
+
# phase 1 optimise membership
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
"""Final Fit"""
|
|
457
|
+
print(f"Final Phase")
|
|
458
|
+
cntr_arr = covering_arrays(index)
|
|
459
|
+
sa_parms = {'ctrl': cntr_arr, 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'Ashkay_c{number_of_classes}_p3'}
|
|
460
|
+
''' Injecting the best joint solution to start'''
|
|
461
|
+
final_sol = call_siman(parameters_3rd, None, **sa_parms)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
# }
|
|
465
|
+
|
|
466
|
+
''' ----------------------------------------------------------- '''
|
|
467
|
+
''' SCRIPT '''
|
|
468
|
+
''' ----------------------------------------------------------- '''
|
|
469
|
+
def optimise_electricity():
|
|
470
|
+
# {
|
|
471
|
+
"""
|
|
472
|
+
Description of electricity data: the choice of electricity supplier data collected in California by the
|
|
473
|
+
Electric Power Research Institute (Goett, 1998). A stated-preference survey was conducted on 361 residential
|
|
474
|
+
customers to study their preferences regarding electricity plans. The panel dataset includes a total of 4,308
|
|
475
|
+
observations wherein each customer faced up to 12 choice scenarios with four different plans to choose from.
|
|
476
|
+
Each choice scenario was designed using six attributes, including a fixed price (pf) for an electricity plan
|
|
477
|
+
(7 or 9 cents/kWh), contract length (cl) during which a penalty is imposed if the customer chooses to
|
|
478
|
+
switch plans (no contract, 1 year or 5 years), a dummy variable indicating if the supplier was well-known (wk),
|
|
479
|
+
time of the day rates (tod) (11 cents/kWh from 8AM to 8PM and 5 cents/kWh from 8PM to 8AM), seasonal rates (seas)
|
|
480
|
+
(10 cents/kWh for summer, 8 cents/kWh for winter and 6 cents/kWh in spring and fall) and, a dummy variable
|
|
481
|
+
indicating if the supplier was a local (loc).
|
|
482
|
+
"""
|
|
483
|
+
|
|
484
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
485
|
+
# LOAD THE PROBLEM DATA
|
|
486
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
487
|
+
|
|
488
|
+
df = pd.read_csv("electricity.csv")
|
|
489
|
+
df_test = None
|
|
490
|
+
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas'] # all explanatory variables to be included in the model
|
|
491
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
492
|
+
#now trying is varnames
|
|
493
|
+
isvarnames = varnames # individual-specific variables in varnames
|
|
494
|
+
choice_id = df['chid']
|
|
495
|
+
ind_id = df['id']
|
|
496
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
497
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
498
|
+
base_alt = None # Reference alternative
|
|
499
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
500
|
+
choice_set = ['1', '2', '3', '4']
|
|
501
|
+
|
|
502
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
503
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
504
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
505
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
506
|
+
|
|
507
|
+
criterions = [['loglik', 1]] # Options: {mae:-1, bic:-1, aic:-1, loglik:1}
|
|
508
|
+
|
|
509
|
+
#criterions = [['loglik',1], ['mae',-1]] # Option
|
|
510
|
+
#criterions = [['bic',-1], ['mae',-1]] # Option
|
|
511
|
+
|
|
512
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
513
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
514
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
515
|
+
|
|
516
|
+
latent_class = False # Define as True or False
|
|
517
|
+
num_latent_classes = 2 # When latent_class=True choose a value from {2,3,4,5}
|
|
518
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
519
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames, choices=choices,
|
|
520
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class,
|
|
521
|
+
allow_random=True, base_alt=base_alt, allow_bcvars=True, n_draws=200, verbose=True)
|
|
522
|
+
|
|
523
|
+
# Note: allow_corvars is True by default
|
|
524
|
+
|
|
525
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
526
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
527
|
+
# CAVEAT: THE USER MUST KNOW WHAT THEY ARE DOING. THEY MUST KNOW THE RULES
|
|
528
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
529
|
+
|
|
530
|
+
init_sol = None
|
|
531
|
+
|
|
532
|
+
'''nb_crit = len(criterions)
|
|
533
|
+
init_sol = Solution(nb_crit)
|
|
534
|
+
init_sol.set_asvar(['cl','wk','tod'])
|
|
535
|
+
init_sol.set_randvar(['cl','tod','wk'], ['t','t','u'])
|
|
536
|
+
'''
|
|
537
|
+
|
|
538
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
539
|
+
# RUN THE SEARCH
|
|
540
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
541
|
+
sa_parms = {'ctrl': (10, 0.001, 1000, 10)}
|
|
542
|
+
call_siman(parameters, init_sol, **sa_parms)
|
|
543
|
+
#call_threshold(parameters, init_sol)
|
|
544
|
+
#call_parsa(parameters, init_sol, 2)
|
|
545
|
+
#call_parcopsa(parameters, init_sol, 2)
|
|
546
|
+
# }
|
|
547
|
+
def optimise_latent_3_phase_search(num_classes = 3, num_of_iterations = 1000, initial_iterations = 200):
|
|
548
|
+
df = pd.read_csv("electricity.csv")
|
|
549
|
+
df_test = None
|
|
550
|
+
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas'] # all explanatory variables to be included in the model
|
|
551
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
552
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
553
|
+
memvarnames = [name for name in varnames if name != ['listofunwantednamesinmember']] #member-specific variables
|
|
554
|
+
choice_id = df['chid']
|
|
555
|
+
ind_id = df['id']
|
|
556
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
557
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
558
|
+
base_alt = None # Reference alternative
|
|
559
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
560
|
+
choice_set = ['1', '2', '3', '4']
|
|
561
|
+
|
|
562
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
563
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
564
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
565
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
566
|
+
|
|
567
|
+
|
|
568
|
+
#criterions = [['loglik', 1]]
|
|
569
|
+
criterions = [['bic',-1]]
|
|
570
|
+
# criterions = [['aic',-1]]
|
|
571
|
+
|
|
572
|
+
# criterions = [['loglik',1], ['mae',-1]]
|
|
573
|
+
# criterions = [['bic',-1], ['mae',-1]]
|
|
574
|
+
|
|
575
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
576
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
577
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
578
|
+
|
|
579
|
+
latent_class = True # True
|
|
580
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
581
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
582
|
+
mem_vars = memvarnames, choices=choices,
|
|
583
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
584
|
+
base_alt=base_alt,
|
|
585
|
+
allow_bcvars=False, n_draws=200, min_classes = num_classes, max_classes = num_classes,
|
|
586
|
+
num_classes = num_classes, ps_intercept = True, optimise_class = True
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
# Setting up for fixed thetas
|
|
590
|
+
parameters_2nd = parameters
|
|
591
|
+
parameters_2nd.fixed_thetas = True
|
|
592
|
+
#adding in asvars
|
|
593
|
+
parameters_2nd.isvarnames = varnames
|
|
594
|
+
parameters_2nd.optimise_class = True #adding as true
|
|
595
|
+
|
|
596
|
+
parameters_3rd = parameters_2nd
|
|
597
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
598
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
599
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
600
|
+
|
|
601
|
+
init_sol = None
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
605
|
+
# RUN THE SEARCH
|
|
606
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
607
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
608
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
609
|
+
#phase 1 optimise membership
|
|
610
|
+
print(f"1st Phase, Optimize Membership")
|
|
611
|
+
sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2),'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True, 'id_num': f'Elec_c{num_classes}_p1'}
|
|
612
|
+
#sa_parms = {'ctrl': (10, 0.001, 200, 10), 'max_classes': 4, 'min_classes': 3}
|
|
613
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
617
|
+
print(f"2nd Phase, Optimize Classes")
|
|
618
|
+
sa_parms = {'ctrl': (10, 0.001, num_of_iterations, 10), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': False, 'optimise_class': True, 'fixed_solution':best_member, 'id_num': f'Elec_c{num_classes}_p2'}
|
|
619
|
+
best_joint = call_siman(parameters_2nd, init_sol, **sa_parms)
|
|
620
|
+
"""Final Fit"""
|
|
621
|
+
print(f"Final Phase")
|
|
622
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True,
|
|
623
|
+
'optimise_class': True, 'id_num': f'Elec_c{num_classes}_p3'}
|
|
624
|
+
''' Injecting the best joint solution to start'''
|
|
625
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
626
|
+
|
|
627
|
+
def optimise_latent_swiss(num_classes = 3, num_of_iterations = 1000, number_of_initials = 200):
|
|
628
|
+
df = pd.read_csv("swissmetro_long_1.csv")
|
|
629
|
+
df_test = None
|
|
630
|
+
varnames = ['TT_SCALED', 'CO_SCALED', 'HE', 'SEATS', ] # all explanatory variables to be included in the model
|
|
631
|
+
memer = ['AGE', 'MALE', 'INCOME', 'GA', 'WHO', 'FIRST', 'LUGGAGE']
|
|
632
|
+
|
|
633
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
634
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
635
|
+
memvarnames = [name for name in varnames if name != ['listofunwantednamesinmember']] #member-specific variables
|
|
636
|
+
choice_id = df['CHID']
|
|
637
|
+
ind_id = df['ID']
|
|
638
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
639
|
+
alt_var = df['ALT'] # the df column name containing the alternative variable
|
|
640
|
+
base_alt = None # Reference alternative
|
|
641
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
642
|
+
choice_set = ['CAR', 'SM', 'TRAIN'] # 1 2 3 redcode if broken
|
|
643
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
644
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
645
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
646
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
#criterions = [['loglik', 1]]
|
|
650
|
+
criterions = [['bic',-1]]
|
|
651
|
+
# criterions = [['aic',-1]]
|
|
652
|
+
|
|
653
|
+
# criterions = [['loglik',1], ['mae',-1]]
|
|
654
|
+
# criterions = [['bic',-1], ['mae',-1]]
|
|
655
|
+
|
|
656
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
657
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
658
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
659
|
+
|
|
660
|
+
latent_class = True # True
|
|
661
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
662
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
663
|
+
mem_vars = memvarnames, choices=choices,
|
|
664
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
665
|
+
base_alt=base_alt,
|
|
666
|
+
allow_bcvars=False, n_draws=200, min_classes = num_classes, max_classes = num_classes, num_classes = num_classes, ps_intercept = True, optimise_class = True)
|
|
667
|
+
|
|
668
|
+
# Setting up for fixed thetas
|
|
669
|
+
parameters_2nd = parameters
|
|
670
|
+
parameters_2nd.fixed_thetas = True
|
|
671
|
+
#adding in asvars
|
|
672
|
+
parameters_2nd.isvarnames = varnames
|
|
673
|
+
parameters_2nd.optimise_class = True #adding as true
|
|
674
|
+
|
|
675
|
+
parameters_3rd = parameters_2nd
|
|
676
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
677
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
678
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
679
|
+
|
|
680
|
+
init_sol = None
|
|
681
|
+
|
|
682
|
+
|
|
683
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
684
|
+
# RUN THE SEARCH
|
|
685
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
686
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
687
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
688
|
+
#phase 1 optimise membership
|
|
689
|
+
print(f"1st Phase, Optimize Membership")
|
|
690
|
+
sa_parms = {'ctrl': (10, 0.001, number_of_initials, 10),'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True, 'id_num': f'Swiss_c{num_classes}_p1'}
|
|
691
|
+
#sa_parms = {'ctrl': (10, 0.001, 200, 10), 'max_classes': 4, 'min_classes': 3}
|
|
692
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
696
|
+
print(f"2nd Phase, Optimize Classes")
|
|
697
|
+
sa_parms = {'ctrl': (10, 0.001, num_of_iterations, 10), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': False, 'optimise_class': True, 'fixed_solution':best_member, 'id_num': f'Swiss_c{num_classes}_p2'}
|
|
698
|
+
best_joint = call_siman(parameters_2nd, init_sol, **sa_parms)
|
|
699
|
+
"""Final Fit"""
|
|
700
|
+
print(f"Final Phase")
|
|
701
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True,
|
|
702
|
+
'optimise_class': True, 'id_num': f'Swiss_c{num_classes}_p3'}
|
|
703
|
+
''' Injecting the best joint solution to start'''
|
|
704
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
705
|
+
|
|
706
|
+
''' ----------------------------------------------------------- '''
|
|
707
|
+
''' SCRIPT '''
|
|
708
|
+
''' ----------------------------------------------------------- '''
|
|
709
|
+
def optimise_new_syn():
|
|
710
|
+
# {
|
|
711
|
+
|
|
712
|
+
df = pd.read_csv("New_Syn_MOOF_TRAIN_seed6.csv")
|
|
713
|
+
df_test = pd.read_csv("New_Syn_MOOF_TEST_seed6.csv")
|
|
714
|
+
|
|
715
|
+
# Manually transforming the variable to avoid estimation of lambda for better convergence
|
|
716
|
+
df['bc_added_random4'] = scipy.stats.boxcox(df['added_random4'], 0.01)
|
|
717
|
+
|
|
718
|
+
# Manually transforming the variable to avoid estimation of lambda for better convergence
|
|
719
|
+
df['bc_added_random5'] = scipy.stats.boxcox(df['added_random5'], 0.05)
|
|
720
|
+
|
|
721
|
+
# Manually transforming the variable to avoid estimation of lambda for better convergence
|
|
722
|
+
df_test['bc_added_random4'] = scipy.stats.boxcox(df_test['added_random4'], 0.01)
|
|
723
|
+
|
|
724
|
+
# Manually transforming the variable to avoid estimation of lambda for better convergence
|
|
725
|
+
df_test['bc_added_random5'] = scipy.stats.boxcox(df_test['added_random5'], 0.05)
|
|
726
|
+
|
|
727
|
+
choice_id = df['choice_id']
|
|
728
|
+
test_choice_id = df_test['choice_id']
|
|
729
|
+
|
|
730
|
+
ind_id = df['ind_id']
|
|
731
|
+
test_ind_id = df_test['ind_id']
|
|
732
|
+
|
|
733
|
+
alt_var = df['alt']
|
|
734
|
+
test_alt_var = df_test['alt']
|
|
735
|
+
|
|
736
|
+
distr = ['n', 'u', 't']
|
|
737
|
+
choice_set = ['1', '2', '3']
|
|
738
|
+
|
|
739
|
+
asvarnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5',
|
|
740
|
+
'added_fixed6', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5', 'added_random1',
|
|
741
|
+
'added_random2', 'added_random3', 'added_random4',
|
|
742
|
+
'added_random5', 'added_random6', 'added_random7']
|
|
743
|
+
|
|
744
|
+
isvarnames = []
|
|
745
|
+
varnames = asvarnames + isvarnames
|
|
746
|
+
# UNUSED CODE: trans_asvars = []
|
|
747
|
+
choices = df['choice']
|
|
748
|
+
test_choices = df_test['choice'] # CHANGED the df column name containing the choice variable
|
|
749
|
+
|
|
750
|
+
criterions = [['loglik', 1]]
|
|
751
|
+
# criterions = [['loglik', 1], ['mae', -1]]
|
|
752
|
+
|
|
753
|
+
parameters = Parameters(criterions=criterions,df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
754
|
+
alt_var=alt_var, test_alt_var=test_alt_var, varnames=varnames, isvarnames=isvarnames,
|
|
755
|
+
asvarnames=asvarnames, choices=choices, test_choices=test_choices, choice_id=choice_id,
|
|
756
|
+
test_choice_id=test_choice_id, ind_id=ind_id, test_ind_id=test_ind_id, latent_class=False,
|
|
757
|
+
allow_random=True, base_alt=None, allow_bcvars=False, n_draws=200,
|
|
758
|
+
|
|
759
|
+
# gtol=1e-2,
|
|
760
|
+
# avail_latent=avail_latent,# p_val=0.01,
|
|
761
|
+
# ="Synth_SOOF_seed6"
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
|
|
765
|
+
init_sol = None
|
|
766
|
+
call_siman(parameters, init_sol)
|
|
767
|
+
# call_thresold(parameters, init_sol)
|
|
768
|
+
# call_parcopsa(parameters, init_sol)
|
|
769
|
+
|
|
770
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
771
|
+
# FIT MIXED LOGIT
|
|
772
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
773
|
+
|
|
774
|
+
'''varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
|
|
775
|
+
'added_random1', 'added_random2', 'added_random3',
|
|
776
|
+
'bc_added_random4', 'bc_added_random5', 'added_random6', 'added_random7']
|
|
777
|
+
|
|
778
|
+
X = df[varnames].values
|
|
779
|
+
y = df['choice'].values
|
|
780
|
+
av = None
|
|
781
|
+
test_av = None
|
|
782
|
+
weight_var = None
|
|
783
|
+
test_weight_var = None
|
|
784
|
+
isvars = []
|
|
785
|
+
transvars = [] # ['added_random4', 'added_random5']
|
|
786
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
|
|
787
|
+
'bc_added_random4': 'n', 'bc_added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
|
|
788
|
+
|
|
789
|
+
correlated_vars = ['added_random1', 'added_random2', 'added_random3']
|
|
790
|
+
model = MixedLogit()
|
|
791
|
+
model.setup(X,y, ids=df['choice_id'].values, panels=df['ind_id'].values, varnames=varnames,
|
|
792
|
+
isvars=isvars, n_draws=200, correlated_vars=correlated_vars, transvars=transvars, randvars=randvars, alts=df['alt'] )
|
|
793
|
+
# gtol=2e-6, ftol=1e-8,method="L-BFGS-B",
|
|
794
|
+
model.fit()
|
|
795
|
+
model.summarise()
|
|
796
|
+
|
|
797
|
+
choice_set = [1,2,3]
|
|
798
|
+
def_vals = model.coeff_est
|
|
799
|
+
X_test = df_test[varnames].values
|
|
800
|
+
y_test = df_test['choice'].values
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
# Calculating MAE
|
|
804
|
+
# Choice frequecy obtained from estimated model applied on testing sample
|
|
805
|
+
predicted_probabilities_val = model.pred_prob * 100
|
|
806
|
+
obs_freq = model.obs_prob * 100
|
|
807
|
+
MAE = round((1 / len(choice_set)) * (np.sum(abs(predicted_probabilities_val - obs_freq))), 2)
|
|
808
|
+
MAPE = round((1 / len(choice_set)) * (np.sum(abs((predicted_probabilities_val - obs_freq) / obs_freq))))
|
|
809
|
+
print("MAE = ", MAE,"; MAPE = ", MAPE)'''
|
|
810
|
+
# }
|
|
811
|
+
|
|
812
|
+
|
|
813
|
+
''' ----------------------------------------------------------- '''
|
|
814
|
+
''' SCRIPT '''
|
|
815
|
+
''' ----------------------------------------------------------- '''
|
|
816
|
+
'TEST FOR FITTING LATENT CLASS MODEL'
|
|
817
|
+
def latent_synth_4():
|
|
818
|
+
print('testing intercept model')
|
|
819
|
+
df = pd.read_csv("artificial_latent_new_4classes_mnl.csv")
|
|
820
|
+
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp', 'income', 'age']
|
|
821
|
+
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
|
|
825
|
+
|
|
826
|
+
print('testing synthetic experiment for the laten class, 4 class ')
|
|
827
|
+
varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp', 'income', 'age','ones'
|
|
828
|
+
# 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
|
|
829
|
+
# 'nonsig_isvar1', 'nonsig_isvar2'
|
|
830
|
+
]
|
|
831
|
+
|
|
832
|
+
|
|
833
|
+
|
|
834
|
+
df = pd.read_csv("artificial_latent_new_4classes_mnl.csv")
|
|
835
|
+
df = df.assign(ones= 1)
|
|
836
|
+
model = LatentClassModel()
|
|
837
|
+
|
|
838
|
+
X = df[varnames].values
|
|
839
|
+
y = df['choice'].values
|
|
840
|
+
member_params_spec = np.array([['_inter',]
|
|
841
|
+
], dtype='object')
|
|
842
|
+
|
|
843
|
+
|
|
844
|
+
class_params_spec = np.array([['ones'],
|
|
845
|
+
['ones']]
|
|
846
|
+
, dtype='object')
|
|
847
|
+
|
|
848
|
+
|
|
849
|
+
print('do i need to declare intecept')
|
|
850
|
+
model.setup(X, y, panels=df['id'].values, varnames=varnames, num_classes=2,
|
|
851
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
852
|
+
alts=[1, 2, 3])
|
|
853
|
+
model.reassign_penalty(0.10)
|
|
854
|
+
model.fit()
|
|
855
|
+
model.summarise()
|
|
856
|
+
print('finished')
|
|
857
|
+
# }
|
|
858
|
+
def synth_3():
|
|
859
|
+
# {
|
|
860
|
+
print('testing synthetic experiment for the mixed latent class random parameters...')
|
|
861
|
+
df = pd.read_csv("synth_latent_mixed_3classes.csv")
|
|
862
|
+
model = LatentClassMixedModel()
|
|
863
|
+
varnames = ['added_fixed1', 'added_fixed2', 'nonsig1', 'nonsig2', 'nonsig3',
|
|
864
|
+
'added_random1', 'added_random2',
|
|
865
|
+
'income', 'age', 'gender'
|
|
866
|
+
# 'nonsig1', 'nonsig2', 'nonsig3',
|
|
867
|
+
# 'nonsig4', 'nonsig5', 'nonsig_isvar1', 'nonsig_isvar2'
|
|
868
|
+
]
|
|
869
|
+
|
|
870
|
+
X = df[varnames].values
|
|
871
|
+
y = df['choice'].values
|
|
872
|
+
member_params_spec = np.array([['income', 'gender'],
|
|
873
|
+
['income', 'age']], dtype='object')
|
|
874
|
+
class_params_spec = np.array([['added_fixed1', 'added_fixed2'],
|
|
875
|
+
['added_fixed1', 'added_random1'],
|
|
876
|
+
['added_fixed2', 'added_random2']],
|
|
877
|
+
dtype='object')
|
|
878
|
+
|
|
879
|
+
randvars = {'added_random1': 'n', 'added_random2': 'n'}
|
|
880
|
+
init_class_thetas = np.array([0.1, -0.03, -0.1, 0.02])
|
|
881
|
+
init_class_betas = [np.array([-1, 2.5, 1.242992317, 2.040125077, 1.02, 0.90]),
|
|
882
|
+
np.array([1.5, -1, 0.74, 0.81, 1.47, 1.36]),
|
|
883
|
+
np.array([-2, 1, 1.20, 1.65, 1.27, 1.07])]
|
|
884
|
+
|
|
885
|
+
model.setup(X, y, panels=df['ind_id'], n_draws=100, varnames=varnames, num_classes=3,
|
|
886
|
+
class_params_spec=class_params_spec, member_params_spec=member_params_spec,
|
|
887
|
+
# ftol=1e-3,
|
|
888
|
+
gtol=1e-5, ftol_lccmm=1e-3,
|
|
889
|
+
# init_class_betas=init_class_betas,
|
|
890
|
+
randvars=randvars, alts=[1, 2, 3])
|
|
891
|
+
#model.reassign_penalty(0.1)
|
|
892
|
+
model.fit()
|
|
893
|
+
model.summarise()
|
|
894
|
+
# }
|
|
895
|
+
|
|
896
|
+
def Non_Latent_Search_Template():
|
|
897
|
+
df = pd.read_csv('MassLong.csv')
|
|
898
|
+
print('Pleae Change Data Set ')
|
|
899
|
+
varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
|
|
900
|
+
'Follow-up', 'Residential', 'Technology',
|
|
901
|
+
'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
902
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
903
|
+
'Age_2', 'Age_3', 'Live_alone',
|
|
904
|
+
'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
|
|
905
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
906
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
907
|
+
unwanted_class = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age', 'Gender',
|
|
908
|
+
'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
909
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
910
|
+
'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
|
|
911
|
+
unwanted_member = ['MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Driving', 'Bike', 'Scooter', 'Multimode',
|
|
912
|
+
'Public_Transit',
|
|
913
|
+
'Price', 'PT'
|
|
914
|
+
]
|
|
915
|
+
memvarnames = [name for name in varnames if name not in unwanted_member] # member-specific variables
|
|
916
|
+
asvarnames = [name for name in varnames if name not in unwanted_class] # class-specific variables
|
|
917
|
+
choice_id = df['CHID']
|
|
918
|
+
ind_id = df['ID'] # I believe this is also panels
|
|
919
|
+
|
|
920
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
921
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
922
|
+
base_alt = None # Reference alternative
|
|
923
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
924
|
+
choice_set = ['1', '2', '3', '4']
|
|
925
|
+
|
|
926
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
927
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
928
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
929
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
930
|
+
|
|
931
|
+
criterions = [['bic', -1]]
|
|
932
|
+
|
|
933
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
934
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
935
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
936
|
+
|
|
937
|
+
latent_class = False # True
|
|
938
|
+
df_test = None
|
|
939
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
940
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
941
|
+
choices=choices,
|
|
942
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
943
|
+
base_alt=base_alt,
|
|
944
|
+
allow_bcvars=False, n_draws=200,
|
|
945
|
+
ps_intercept=True)
|
|
946
|
+
|
|
947
|
+
# Setting up for fixed thetas
|
|
948
|
+
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
|
|
952
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
953
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
954
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
955
|
+
|
|
956
|
+
init_sol = None
|
|
957
|
+
|
|
958
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
959
|
+
# RUN THE SEARCH
|
|
960
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
961
|
+
|
|
962
|
+
# TODO turn back on, just checking that this doesn't fall over
|
|
963
|
+
|
|
964
|
+
"""Final Fit"""
|
|
965
|
+
print(f"Final Phase")
|
|
966
|
+
sa_parms = {'ctrl': (10, 0.001, 50, 1),
|
|
967
|
+
'id_num': f'MaaS_c_p3'}
|
|
968
|
+
''' Injecting the best joint solution to start'''
|
|
969
|
+
final_sol = call_siman(parameters, init_sol, **sa_parms)
|
|
970
|
+
|
|
971
|
+
|
|
972
|
+
def optimise_bstm():
|
|
973
|
+
# {
|
|
974
|
+
df = pd.read_csv("BSTM_HBS_CAL_ALL.csv")
|
|
975
|
+
df_test = pd.read_csv("BSTM_HBS_VAL_ALL.csv")
|
|
976
|
+
df_test = None
|
|
977
|
+
varnames = ['TT', 'TC', 'TT_CAD', 'TT_CAP',
|
|
978
|
+
'TCPC', 'EMPDENS_CAD', 'EMPDENS_PT', 'VEHADUL_CAD', 'VEHADUL_CAP', 'VEHADUL_W2PT',
|
|
979
|
+
'VEHADUL_PR', 'VEHADUL_KR', 'VEHADUL_CYCLE', 'VEHADUL_WALK', 'VEHPER_CAD', 'PC',
|
|
980
|
+
'TT_CADL1', 'TT_CADL2', 'TT_CAPL1', 'TT_CAPL2', 'TT_W2PTL1', 'TT_W2PTL2', 'TT_KRL1', 'TT_KRL2',
|
|
981
|
+
'TT_PRL1', 'TT_PRL2', 'TT_CYCLEL1', 'TT_CYCLEL1',
|
|
982
|
+
'TT_WALKL1', 'TT_WALKL2', 'TCPCL1', 'TCPCL2', 'WAT']
|
|
983
|
+
#varnames = ['TT', 'TC', 'TT_CAD', 'TT_CAP',
|
|
984
|
+
# 'TCPC', 'EMPDENS_PT', 'VEHADUL_CAD', 'VEHADUL_CAP', 'VEHADUL_W2PT',
|
|
985
|
+
# 'VEHADUL_PR', 'VEHADUL_CYCLE', 'VEHPER_CAD', 'PC',
|
|
986
|
+
# 'TT_CADL1', 'TT_CADL2', 'TT_CAPL1', 'TT_W2PTL1', 'TT_KRL2', 'TT_PRL2', 'TT_CYCLEL1',
|
|
987
|
+
# 'TT_WALKL1', 'TCPCL1', 'TCPCL2', 'WAT']
|
|
988
|
+
|
|
989
|
+
asvarnames = varnames
|
|
990
|
+
isvarnames = []
|
|
991
|
+
|
|
992
|
+
choice_id = df['TRIPID']
|
|
993
|
+
ind_id = df['TRIPID']
|
|
994
|
+
choices = df['Chosen_Mode'] # the df column name containing the choice variable
|
|
995
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
996
|
+
base_alt = 'WALK' # Reference alternative
|
|
997
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
998
|
+
choice_set = ['CAD', 'CAP', 'W2PT', 'PR', 'KR', 'CYCLE', 'WALK']
|
|
999
|
+
criterions = [['bic',-1]]
|
|
1000
|
+
# criterions = [['loglik',1], ['mae',-1]]
|
|
1001
|
+
|
|
1002
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1003
|
+
|
|
1004
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
1005
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
1006
|
+
choices=choices,
|
|
1007
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=False, allow_random=True,
|
|
1008
|
+
base_alt=base_alt,
|
|
1009
|
+
allow_bcvars=False, n_draws=200)
|
|
1010
|
+
|
|
1011
|
+
init_sol = None
|
|
1012
|
+
if init_sol is None:
|
|
1013
|
+
nb_crit = len(criterions)
|
|
1014
|
+
init_sol = Solution(nb_crit)
|
|
1015
|
+
init_sol.set_asvar(['TCPC', 'TT_CAD', 'VEHPER_CAD', 'TT_CAP', 'VEHADUL_CAP',
|
|
1016
|
+
'TT_W2PT', 'EMPDENS_PT', 'VEHADUL_W2PT', 'TT_CYCLE', 'TT_WALK', 'VEHADUL_WALK'])
|
|
1017
|
+
init_sol_v = Search(parameters).evaluate_mnl(init_sol)
|
|
1018
|
+
init_sol['aic'] = float(init_sol_v[0])
|
|
1019
|
+
init_sol['loglik'] = init_sol_v[2]
|
|
1020
|
+
init_sol['bic'] = init_sol_v[1]
|
|
1021
|
+
init_sol['obj'] = [init_sol_v[1]]
|
|
1022
|
+
init_sol['loglik'] = init_sol_v[2]
|
|
1023
|
+
print(f'inital_solution{init_sol_v[1]}')
|
|
1024
|
+
#init_sol.set_randvar(['cl', 'tod', 'wk'], ['t', 't', 'u'])
|
|
1025
|
+
|
|
1026
|
+
|
|
1027
|
+
asvars = ['TCPC', 'TT_CAD', 'VEHPER_CAD', 'TT_CAP', 'VEHADUL_CAP',
|
|
1028
|
+
'TT_W2PT', 'EMPDENS_PT', 'VEHADUL_W2PT', 'TT_CYCLE', 'TT_WALK', 'VEHADUL_WALK']
|
|
1029
|
+
isvars = []
|
|
1030
|
+
asc_ind = True
|
|
1031
|
+
randvars = {}
|
|
1032
|
+
bcvars = []
|
|
1033
|
+
corvars = []
|
|
1034
|
+
bctrans = False
|
|
1035
|
+
class_param_spec = None
|
|
1036
|
+
member_params_spec = None
|
|
1037
|
+
model = MultinomialLogit()
|
|
1038
|
+
#varnames = ['COST', 'TIME', 'HEADWAY', 'LUGGAGE_CAR', 'SEATS', 'AGE_TRAIN']
|
|
1039
|
+
varnames = asvars
|
|
1040
|
+
mnl = MultinomialLogit()
|
|
1041
|
+
mnl.setup(X=df[varnames], y=df['Chosen_Mode'], varnames=varnames,
|
|
1042
|
+
fit_intercept=True, alts=df['alt'], ids=ind_id,
|
|
1043
|
+
avail=None, base_alt='WALK', gtol=1e-04)
|
|
1044
|
+
mnl.fit()
|
|
1045
|
+
|
|
1046
|
+
#mnl.summarise()
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
|
|
1050
|
+
|
|
1051
|
+
|
|
1052
|
+
sa_parms = {'ctrl': (100, 0.001, 1000,1),
|
|
1053
|
+
'id_num': f'bstm'}
|
|
1054
|
+
call_siman(parameters, init_sol, **sa_parms)
|
|
1055
|
+
# call_thresold(parameters, init_sol)
|
|
1056
|
+
# call_parcopsa(parameters, init_sol)
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
# }
|
|
1060
|
+
|
|
1061
|
+
|
|
1062
|
+
def MaaS_search(number_of_classes = 3, number_of_iterations = 1000, initial_iterations = 200, **kwargs):
|
|
1063
|
+
|
|
1064
|
+
|
|
1065
|
+
df = pd.read_csv('MassLong.csv')
|
|
1066
|
+
print('Running Latent Class Search')
|
|
1067
|
+
|
|
1068
|
+
varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
|
|
1069
|
+
'Gender', 'Household', 'Education', 'Employment', 'WFH', 'Income',
|
|
1070
|
+
'Follow-up', 'Residential', 'Technology',
|
|
1071
|
+
'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
1072
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
1073
|
+
'PT_averse', 'LGA_1', 'LGA_2', 'LGA_3', 'Age_1', 'Age_2', 'Age_3', 'Live_alone',
|
|
1074
|
+
'Live_housemate', 'Fam_nokid', 'Fam_kid', 'Fam_singl', 'Full_time', 'Part_time',
|
|
1075
|
+
'Casual', 'Home_duties', 'Unemployed', 'Full_student', 'Part_student', 'Retired',
|
|
1076
|
+
'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
|
|
1077
|
+
|
|
1078
|
+
varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
|
|
1079
|
+
'Gender', 'Household', 'Education', 'Employment', 'WFH', 'Income',
|
|
1080
|
+
'Follow-up', 'Residential', 'Technology',
|
|
1081
|
+
'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
1082
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
1083
|
+
'PT_averse', 'LGA_1', 'LGA_3', 'Age_1', 'Age_3', 'Live_alone', 'Fam_nokid', 'Fam_kid', 'Full_time', 'Part_time',
|
|
1084
|
+
'Casual', 'Home_duties', 'Unemployed', 'Full_student', 'Retired',
|
|
1085
|
+
'Income_1', 'Income_2', 'Income_3']
|
|
1086
|
+
|
|
1087
|
+
'''Here we define the search options'''
|
|
1088
|
+
df_test = None
|
|
1089
|
+
asvarnames = varnames # alternative-specific variables in varnames
|
|
1090
|
+
isvarnames = [] # individual-specific variables in varnames
|
|
1091
|
+
unwanted_class = ['PT', 'Rideshare', 'Ebike', 'Addon', 'Age', 'Gender',
|
|
1092
|
+
'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
1093
|
+
'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
|
|
1094
|
+
'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Live_alone', 'Unemployed']
|
|
1095
|
+
unwanted_member = ['MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
|
|
1096
|
+
'Price', 'PT'
|
|
1097
|
+
]
|
|
1098
|
+
memvarnames = [name for name in varnames if name not in unwanted_member] # member-specific variables
|
|
1099
|
+
asvarnames = [name for name in varnames if name not in unwanted_class] # class-specific variables
|
|
1100
|
+
choice_id = df['CHID']
|
|
1101
|
+
ind_id = df['ID'] # I believe this is also panels
|
|
1102
|
+
|
|
1103
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
1104
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
1105
|
+
base_alt = None # Reference alternative
|
|
1106
|
+
distr = ['n', 'u', 't'] # List of random distributions to select fr choice_set = ['1', '2', '3', '4']
|
|
1107
|
+
choice_set = ['1', '2', '3', '4']
|
|
1108
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1109
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
1110
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
1111
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1112
|
+
if kwargs.get('multiobjective', 0):
|
|
1113
|
+
criterions = [['bic', -1], ['mae', -1]]
|
|
1114
|
+
else:
|
|
1115
|
+
criterions = [['bic', -1]]
|
|
1116
|
+
|
|
1117
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1118
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
1119
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1120
|
+
|
|
1121
|
+
latent_class = True # True
|
|
1122
|
+
|
|
1123
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
1124
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
1125
|
+
mem_vars=memvarnames, choices=choices,
|
|
1126
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=False,
|
|
1127
|
+
base_alt=base_alt,
|
|
1128
|
+
allow_bcvars=False, n_draws=200, min_classes=number_of_classes,
|
|
1129
|
+
max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=True,
|
|
1130
|
+
optimise_class=True, ftol_lccm=1e-5, ps_asvars = ['Price'])
|
|
1131
|
+
|
|
1132
|
+
# Setting up for fixed thetas
|
|
1133
|
+
|
|
1134
|
+
|
|
1135
|
+
parameters_2nd = parameters
|
|
1136
|
+
parameters_2nd.fixed_thetas = True
|
|
1137
|
+
# adding in asvars
|
|
1138
|
+
parameters_2nd.isvarnames = varnames
|
|
1139
|
+
parameters_2nd.ps_vars = ['Price']
|
|
1140
|
+
parameters_2nd.optimise_class = True # adding as true
|
|
1141
|
+
|
|
1142
|
+
parameters_3rd = parameters_2nd
|
|
1143
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1144
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
1145
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1146
|
+
|
|
1147
|
+
init_sol = None
|
|
1148
|
+
|
|
1149
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1150
|
+
# RUN THE SEARCH
|
|
1151
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1152
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
1153
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
1154
|
+
# phase 1 optimise membership
|
|
1155
|
+
print(f"1st Phase, Optimize Membership")
|
|
1156
|
+
# TODO turn back on, just checking that this doesn't fall over
|
|
1157
|
+
#initial_iterations = 2
|
|
1158
|
+
sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes,
|
|
1159
|
+
'optimise_membership': True, 'id_num': f'MaaS_c{number_of_classes}_p1'}
|
|
1160
|
+
# sa_parms = {'ctrl': (10, 0.001, 20, 1), 'max_classes': 4, 'min_classes': 3}
|
|
1161
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
1162
|
+
# TODO if perturb randvar, need to add it into one of the classes
|
|
1163
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
1164
|
+
print(f"2nd Phase, Optimize Classes")
|
|
1165
|
+
sa_parms = {'ctrl': (100, 0.001, number_of_iterations, 5), 'max_classes': number_of_classes,
|
|
1166
|
+
'min_classes': number_of_classes, 'optimise_membership': False,
|
|
1167
|
+
'optimise_class': True, 'fixed_solution': best_member, 'id_num': f'MaaS_c{number_of_classes}_p2'}
|
|
1168
|
+
#best_joint = call_harmony(parameters_2nd, best_member)
|
|
1169
|
+
best_joint = call_siman(parameters_2nd, best_member, **sa_parms)
|
|
1170
|
+
"""Final Fit"""
|
|
1171
|
+
print(f"Final Phase")
|
|
1172
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'MaaS_c{number_of_classes}_p3'}
|
|
1173
|
+
''' Injecting the best joint solution to start'''
|
|
1174
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
1175
|
+
|
|
1176
|
+
def ashkay_search(number_of_classes = 3, number_of_iterations = 1000, initial_iterations = 200, *args, **kwargs):
|
|
1177
|
+
max_time = kwargs.get('run_time', 60*60*12)
|
|
1178
|
+
df = pd.read_csv('akshay_long_true.csv')
|
|
1179
|
+
|
|
1180
|
+
df_test = None
|
|
1181
|
+
RUN_AKSHAY = 1
|
|
1182
|
+
if RUN_AKSHAY:
|
|
1183
|
+
print('testing against Akshays model')
|
|
1184
|
+
model = LatentClassModel()
|
|
1185
|
+
varnames = ['InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male',
|
|
1186
|
+
'Children', 'Income', 'NDI',
|
|
1187
|
+
'LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG',
|
|
1188
|
+
'BikesharePayG',
|
|
1189
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1190
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'
|
|
1191
|
+
]
|
|
1192
|
+
|
|
1193
|
+
X = df[varnames].values
|
|
1194
|
+
y = df['CHOICE'].values
|
|
1195
|
+
member_params_spec = np.array([['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
1196
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
1197
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
1198
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
1199
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
1200
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
1201
|
+
['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
|
|
1202
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI']],
|
|
1203
|
+
dtype='object')
|
|
1204
|
+
|
|
1205
|
+
class_params_spec = np.array(
|
|
1206
|
+
[['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1207
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1208
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1209
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1210
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1211
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1212
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1213
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1214
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1215
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1216
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1217
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1218
|
+
['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1219
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1220
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers']],
|
|
1221
|
+
dtype='object')
|
|
1222
|
+
|
|
1223
|
+
init_class_thetas = np.array(
|
|
1224
|
+
[-1.321318, -0.254239, -0.137624, -9.159877, 0.009594, 1.189211, -0.084255, 0.437849, 0.222736, -2.338727,
|
|
1225
|
+
-0.220732, 0.206103,
|
|
1226
|
+
0.293479, 0.17829, -0.293836, -0.499868, -0.336, 0.588949, 0.0357, 0.393709, -0.215125, -0.28694, -0.264146,
|
|
1227
|
+
-0.871409,
|
|
1228
|
+
-1.160788, 0.752398, -0.054771, 0.554518, -0.559022, 0.633359, -0.150176, 0.020715, -0.23028, 0.185878,
|
|
1229
|
+
-0.219888, -1.531753,
|
|
1230
|
+
-0.833134, -0.168312, -2.27768, 1.136705, 0.093996, 1.672507, 1.29167, 1.49679, 0.423603, 0.249344, -0.832107,
|
|
1231
|
+
-2.778636])
|
|
1232
|
+
|
|
1233
|
+
init_class_betas = [np.array([0.441269, 0.448334, 0.288787, 0.35502, 0.216816, 0.198564, 0.069477,
|
|
1234
|
+
0.346543, 0.233089, 0.323059, 0.333928, 0.149546, 0.124614, 0.0443181,
|
|
1235
|
+
-0.00741137, 0.036144, -0.00298227, 0.140595, 0.046312]), # Class 1
|
|
1236
|
+
np.array([0.801542, 0.483616, 0.546757, 0.498264, 0.206961, 0.367382, 0.00124702,
|
|
1237
|
+
0.587733, 0.398037, 0.5319, 0.369294, 0.246564, -0.100532, -0.141248,
|
|
1238
|
+
-0.019849, 0.038627, -0.104714, 0.173183, 0.0905047]), # Class 2
|
|
1239
|
+
np.array([1.28245, 0.704765, 0.8016, 0.145479, 0.340825, 0.554092, -0.0942558,
|
|
1240
|
+
12.6054, 83.2791, 27.7743, -14.1763, 26.7106, 21.6308, -2.87297,
|
|
1241
|
+
-32.6663, 0.528885, 0.375195, 0.367734, 0.343927]), # Class 3
|
|
1242
|
+
np.array([1.18916, 0.562234, 0.58024, -0.00850272, 0.122827, 0.619118, 0.0330975,
|
|
1243
|
+
0.970455, 0.24954, 0.698946, 0.172871, 0.64793, -0.395843, 0.00472563,
|
|
1244
|
+
-0.425557, 0.157351, 0.0453663, 0.194574, 0.0677801]), # Class 4
|
|
1245
|
+
np.array([0, 0, 0, 0, 0, 0, 0,
|
|
1246
|
+
0, 0, 0, 0, 0, 0, 0,
|
|
1247
|
+
0, 0, 0, 0, 0])] # Class 5
|
|
1248
|
+
|
|
1249
|
+
|
|
1250
|
+
|
|
1251
|
+
model.setup(X, y, ids=df['CHID'], panels=df['indID'],
|
|
1252
|
+
varnames=varnames,
|
|
1253
|
+
num_classes=5,
|
|
1254
|
+
class_params_spec=class_params_spec,
|
|
1255
|
+
member_params_spec=member_params_spec,
|
|
1256
|
+
init_class_thetas=init_class_thetas,
|
|
1257
|
+
init_class_betas=init_class_betas,
|
|
1258
|
+
alts=[1, 2],
|
|
1259
|
+
ftol_lccm=1e-2,
|
|
1260
|
+
gtol=1e-3,
|
|
1261
|
+
# verbose = 2
|
|
1262
|
+
)
|
|
1263
|
+
model.fit()
|
|
1264
|
+
model.summarise()
|
|
1265
|
+
print('completed Ashkays model')
|
|
1266
|
+
|
|
1267
|
+
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
from latent_class_constrained import LatentClassConstrained
|
|
1272
|
+
|
|
1273
|
+
# Example Usage
|
|
1274
|
+
# Initialize the LatentClasses object with 3 latent classes
|
|
1275
|
+
latent_classes = LatentClassConstrained(num_classes=number_of_classes)
|
|
1276
|
+
|
|
1277
|
+
# Populate data for latent_class_1
|
|
1278
|
+
latent_classes.populate_class(
|
|
1279
|
+
"latent_class_1",
|
|
1280
|
+
asvar=['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1281
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1282
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1283
|
+
isvars=[],
|
|
1284
|
+
randvars=[],
|
|
1285
|
+
memvars=[], #cant have a membership here
|
|
1286
|
+
req_asvar=["Cost", "BikeshareUnl", "CarshareUnl", "RideshareUnl"],
|
|
1287
|
+
req_isvars=[],
|
|
1288
|
+
req_randvars=[],
|
|
1289
|
+
req_memvars=[], #cant have a membership here
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
# Populate data for latent_class_2
|
|
1293
|
+
latent_classes.populate_class(
|
|
1294
|
+
"latent_class_2",
|
|
1295
|
+
asvar=['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1296
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1297
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1298
|
+
isvars=[],
|
|
1299
|
+
randvars=[],
|
|
1300
|
+
memvars= ['InnerCity', 'InnerRegional', 'Under30', 'College', 'FullTime',
|
|
1301
|
+
'PartTime', 'Male', 'Children', 'Income', 'NDI'],
|
|
1302
|
+
req_asvar=['Cost', 'TaxiPayG', 'CarRentalPayG'],
|
|
1303
|
+
req_isvars=[],
|
|
1304
|
+
req_randvars=[],
|
|
1305
|
+
req_memvars=['_inter', 'Male', 'FullTime']
|
|
1306
|
+
)
|
|
1307
|
+
|
|
1308
|
+
latent_classes.populate_class(
|
|
1309
|
+
"latent_class_3",
|
|
1310
|
+
asvar=['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
|
|
1311
|
+
'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
|
|
1312
|
+
'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
|
|
1313
|
+
isvars=[],
|
|
1314
|
+
randvars=[],
|
|
1315
|
+
memvars= ['InnerCity', 'InnerRegional', 'Under30', 'College',
|
|
1316
|
+
'PartTime', 'Income'],
|
|
1317
|
+
req_asvar=['Cost'],
|
|
1318
|
+
req_isvars=[],
|
|
1319
|
+
req_randvars=[],
|
|
1320
|
+
req_memvars=['_inter', 'PartTime', 'College']
|
|
1321
|
+
)
|
|
1322
|
+
# Retrieve and print data for latent_class_1
|
|
1323
|
+
print("Latent Class 1 Data:")
|
|
1324
|
+
print(latent_classes.get_class("latent_class_1"))
|
|
1325
|
+
|
|
1326
|
+
# Retrieve and print all latent classes
|
|
1327
|
+
print("\nAll Latent Classes:")
|
|
1328
|
+
import pprint
|
|
1329
|
+
pprint.pprint(latent_classes.get_all_classes())
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
varnames_gbl = latent_classes.get_global_asvars_randvars()
|
|
1333
|
+
gbl_asvars = varnames_gbl['asvars']
|
|
1334
|
+
gbl_isvars = varnames_gbl['isvars']
|
|
1335
|
+
#gbl_asvars = varnames_gbl['isvars']
|
|
1336
|
+
gbl_memvars = varnames_gbl['memvars']
|
|
1337
|
+
varnames = list(set(gbl_asvars + gbl_isvars +gbl_isvars+gbl_memvars))
|
|
1338
|
+
|
|
1339
|
+
|
|
1340
|
+
|
|
1341
|
+
|
|
1342
|
+
|
|
1343
|
+
print('Running Latent Class Search')
|
|
1344
|
+
model = LatentClassModel()
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
X = df[varnames].values
|
|
1348
|
+
y = df['CHOICE'].values
|
|
1349
|
+
|
|
1350
|
+
|
|
1351
|
+
'''Here we define the search options'''
|
|
1352
|
+
|
|
1353
|
+
asvarnames = gbl_asvars # class-specific variables
|
|
1354
|
+
isvarnames = gbl_isvars # class-ind specific variables
|
|
1355
|
+
memvarnames = gbl_memvars # class mem specific variables
|
|
1356
|
+
|
|
1357
|
+
choice_id = df['CHID']
|
|
1358
|
+
ind_id = df['indID'] #I believe this is also panels
|
|
1359
|
+
|
|
1360
|
+
choices = df['CHOICE'] # the df column name containing the choice variable
|
|
1361
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
1362
|
+
base_alt = None # Reference alternative
|
|
1363
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
1364
|
+
choice_set = ['1', '2', '3', '4']
|
|
1365
|
+
|
|
1366
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1367
|
+
# CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
|
|
1368
|
+
# SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
|
|
1369
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1370
|
+
|
|
1371
|
+
criterions = [['bic', -1]]
|
|
1372
|
+
|
|
1373
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1374
|
+
# DEFINE PARAMETERS FOR THE SEARCH
|
|
1375
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1376
|
+
|
|
1377
|
+
latent_class = True # True
|
|
1378
|
+
|
|
1379
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
1380
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
1381
|
+
mem_vars=memvarnames, choices=choices,
|
|
1382
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
|
|
1383
|
+
base_alt=base_alt,
|
|
1384
|
+
allow_bcvars=False, n_draws=200, min_classes=number_of_classes, max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=False,
|
|
1385
|
+
optimise_class=True, ftol_lccm=1e-4, LCR = latent_classes)
|
|
1386
|
+
|
|
1387
|
+
# Setting up for fixed thetas
|
|
1388
|
+
parameters_2nd = copy.deepcopy(parameters)
|
|
1389
|
+
parameters_2nd.fixed_thetas = True
|
|
1390
|
+
# adding in asvars
|
|
1391
|
+
parameters_2nd.isvarnames = asvarnames
|
|
1392
|
+
parameters_2nd.optimise_class = True # adding as true
|
|
1393
|
+
|
|
1394
|
+
parameters_3rd = copy.deepcopy(parameters_2nd)
|
|
1395
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1396
|
+
# DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
|
|
1397
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1398
|
+
|
|
1399
|
+
init_sol = None
|
|
1400
|
+
|
|
1401
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1402
|
+
# RUN THE SEARCH
|
|
1403
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1404
|
+
# ROB, I have added this in to add to your class organically. Optimize membership if true.
|
|
1405
|
+
# This will force all the class-specific effects to be the variable and only play around with class membership variables.
|
|
1406
|
+
# phase 1 optimise membership
|
|
1407
|
+
print(f"1st Phase, Optimize Membership")
|
|
1408
|
+
#TODO turn back on, just checking that this doesn't fall over
|
|
1409
|
+
sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'optimise_membership': True, 'id_num': f'Ashkay_c{number_of_classes}_p1'}
|
|
1410
|
+
#sa_parms = {'ctrl': (10, 0.001, 20, 1), 'max_classes': 4, 'min_classes': 3}
|
|
1411
|
+
|
|
1412
|
+
best_member = call_siman(parameters, init_sol, **sa_parms)
|
|
1413
|
+
#TODO if perturb randvar, need to add it into one of the classes
|
|
1414
|
+
"""Optimizing the betas, play around with only the classes"""
|
|
1415
|
+
print(f"2nd Phase, Optimize Classes")
|
|
1416
|
+
sa_parms = {'ctrl': (10, 0.001, number_of_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'optimise_membership': False,
|
|
1417
|
+
'optimise_class': True, 'fixed_solution': best_member, 'id_num': f'Ashkay_c{number_of_classes}_p2'}
|
|
1418
|
+
best_joint = call_siman(parameters_2nd, best_member, **sa_parms)
|
|
1419
|
+
"""Final Fit"""
|
|
1420
|
+
print(f"Final Phase")
|
|
1421
|
+
sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'Ashkay_c{number_of_classes}_p3'}
|
|
1422
|
+
''' Injecting the best joint solution to start'''
|
|
1423
|
+
final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
|
|
1427
|
+
''' ----------------------------------------------------------- '''
|
|
1428
|
+
''' SCRIPT. Testing mixed logit with correlated vars '''
|
|
1429
|
+
''' ----------------------------------------------------------- '''
|
|
1430
|
+
def fit_electricity_mxl():
|
|
1431
|
+
# {
|
|
1432
|
+
model = MixedLogit()
|
|
1433
|
+
try:
|
|
1434
|
+
df = pd.read_csv("electricity.csv")
|
|
1435
|
+
except:
|
|
1436
|
+
df = pd.read_csv("data/electricity.csv")
|
|
1437
|
+
varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas']
|
|
1438
|
+
isvars = ['seas']
|
|
1439
|
+
X = df[varnames].values
|
|
1440
|
+
y = df['choice'].values
|
|
1441
|
+
transvars = []
|
|
1442
|
+
randvars = {'pf': 'n', 'cl': 'n', 'loc': 'n', 'wk': 'n', 'tod': 'n'}
|
|
1443
|
+
#correlated_vars = True
|
|
1444
|
+
correlated_vars = ['pf', 'wk'] # Optional
|
|
1445
|
+
model.setup(X, y, ids=df['chid'].values, panels=df['id'].values, varnames=varnames,
|
|
1446
|
+
isvars=isvars, transvars=transvars, correlated_vars=correlated_vars, randvars=randvars,
|
|
1447
|
+
fit_intercept=False, alts=df['alt'], n_draws=200, mnl_init=True)
|
|
1448
|
+
model.fit()
|
|
1449
|
+
model.get_loglik_null()
|
|
1450
|
+
model.summarise()
|
|
1451
|
+
# }
|
|
1452
|
+
|
|
1453
|
+
def optimise_synth_1a():
|
|
1454
|
+
print('file')
|
|
1455
|
+
current_directory = os.getcwd()
|
|
1456
|
+
|
|
1457
|
+
# Print the current working directory
|
|
1458
|
+
print("Current Working Directory:", current_directory)
|
|
1459
|
+
df = pd.read_csv("data/artificial_1a_multi_many.csv")
|
|
1460
|
+
|
|
1461
|
+
df_test = None
|
|
1462
|
+
|
|
1463
|
+
asvarnames = ['added_fixed1', 'added_fixed2', 'added_fixed3',
|
|
1464
|
+
|
|
1465
|
+
'added_fixed4', 'added_fixed5', 'added_fixed6', 'added_fixed7',
|
|
1466
|
+
|
|
1467
|
+
'added_fixed8', 'added_fixed9', 'added_fixed10', 'nonsig1', 'nonsig2',
|
|
1468
|
+
|
|
1469
|
+
'nonsig3', 'nonsig4', 'nonsig5',
|
|
1470
|
+
|
|
1471
|
+
'cat_var1', 'cat_var2', 'cat_var3']
|
|
1472
|
+
|
|
1473
|
+
isvarnames = ['added_isvar1', 'added_isvar2']
|
|
1474
|
+
|
|
1475
|
+
varnames = asvarnames + isvarnames
|
|
1476
|
+
|
|
1477
|
+
choice_id = df['id']
|
|
1478
|
+
|
|
1479
|
+
ind_id = None
|
|
1480
|
+
|
|
1481
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
1482
|
+
|
|
1483
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
1484
|
+
|
|
1485
|
+
base_alt = None # Reference alternative
|
|
1486
|
+
|
|
1487
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
1488
|
+
|
|
1489
|
+
choice_set = ['1', '2', '3']
|
|
1490
|
+
|
|
1491
|
+
criterions = [['bic', 1]]
|
|
1492
|
+
|
|
1493
|
+
# criterions = [['loglik',1], ['mae',-1]]
|
|
1494
|
+
|
|
1495
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1496
|
+
|
|
1497
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
1498
|
+
|
|
1499
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
1500
|
+
choices=choices,
|
|
1501
|
+
|
|
1502
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=False, allow_random=True,
|
|
1503
|
+
base_alt=base_alt,
|
|
1504
|
+
|
|
1505
|
+
allow_bcvars=False, n_draws=200, verbose = True)
|
|
1506
|
+
|
|
1507
|
+
init_sol = None
|
|
1508
|
+
|
|
1509
|
+
call_siman(parameters, init_sol)
|
|
1510
|
+
|
|
1511
|
+
# call_thresold(parameters, init_sol)
|
|
1512
|
+
|
|
1513
|
+
# call_parcopsa(parameters, init_sol)
|
|
1514
|
+
|
|
1515
|
+
# call_harmony(parameters, init_sol)
|
|
1516
|
+
|
|
1517
|
+
|
|
1518
|
+
def estimate_init_mnls():
|
|
1519
|
+
# {
|
|
1520
|
+
current_directory = os.getcwd()
|
|
1521
|
+
print(f'current directory is {current_directory}')
|
|
1522
|
+
df = pd.read_csv("artificial_latent_new.csv")
|
|
1523
|
+
df_test = None
|
|
1524
|
+
asvarnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp','nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
|
|
1525
|
+
isvarnames = ['income', 'age','nonsig_isvar1', 'nonsig_isvar2']
|
|
1526
|
+
varnames = asvarnames + isvarnames
|
|
1527
|
+
|
|
1528
|
+
choice_id = df['id']
|
|
1529
|
+
ind_id = None
|
|
1530
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
1531
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
1532
|
+
base_alt = None # Reference alternative
|
|
1533
|
+
distr = ['n', 'u', 't'] # List of random distributions to select from
|
|
1534
|
+
choice_set = ['1', '2', '3']
|
|
1535
|
+
criterions = [['bic',1]]
|
|
1536
|
+
#criterions = [['loglik',1], ['mae',-1]]
|
|
1537
|
+
|
|
1538
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1539
|
+
|
|
1540
|
+
parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
|
|
1541
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames, choices=choices,
|
|
1542
|
+
choice_id=choice_id, ind_id=ind_id, latent_class=False, allow_random=True, base_alt=base_alt,
|
|
1543
|
+
allow_bcvars=False,allow_corvars=True, n_draws=200)
|
|
1544
|
+
|
|
1545
|
+
init_sol = None
|
|
1546
|
+
|
|
1547
|
+
call_siman(parameters, init_sol)
|
|
1548
|
+
#call_thresold(parameters, init_sol)
|
|
1549
|
+
#call_parcopsa(parameters, init_sol)
|
|
1550
|
+
#call_harmony(parameters, init_sol)
|
|
1551
|
+
|
|
1552
|
+
# }
|
|
1553
|
+
def optimise_orderered():
|
|
1554
|
+
from ordered_logit_multinomial import OrderedLogitML
|
|
1555
|
+
from ordered_logit import OrderedLogitLong
|
|
1556
|
+
|
|
1557
|
+
print('optimising ordered')
|
|
1558
|
+
df = pd.read_csv("ord_log_data/diamonds.csv")
|
|
1559
|
+
#df = pd.read_csv('./diamonds.csv')
|
|
1560
|
+
|
|
1561
|
+
color = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
|
|
1562
|
+
df['color'] = pd.Categorical(df['color'], categories=color, ordered=True)
|
|
1563
|
+
df['color'] = df['color'].cat.codes
|
|
1564
|
+
|
|
1565
|
+
clarity = ['I1', 'SI1', 'SI2', 'VS1', 'VS2', 'VVS1', 'VVS2']
|
|
1566
|
+
df['clarity'] = pd.Categorical(df['clarity'], categories=clarity, ordered=True)
|
|
1567
|
+
df['clarity'] = df['clarity'].cat.codes
|
|
1568
|
+
|
|
1569
|
+
df['vol'] = np.array(df['x'] * df['y'] * df['z'])
|
|
1570
|
+
|
|
1571
|
+
cut = ['Fair', 'Good', 'Ideal', 'Premium', 'Very Good']
|
|
1572
|
+
df['cut'] = pd.Categorical(df['cut'], categories=cut, ordered=True)
|
|
1573
|
+
df['cut_int'] = df['cut'].cat.codes # Values in {0,1,2,3,4}
|
|
1574
|
+
cut_value = np.unique(df['cut'].values) # Values in {0,1,2,3,4}
|
|
1575
|
+
#df.to_csv("diamond_converted.csv", index=False) # Log revised data to csv file
|
|
1576
|
+
|
|
1577
|
+
X = df[['carat', 'vol', 'price']] # Independent variables
|
|
1578
|
+
#X = df[['carat', 'color', 'clarity', 'depth', 'table', 'price', 'vol']] # Other Independent variables
|
|
1579
|
+
y = df['cut_int'] # Dependent variable
|
|
1580
|
+
ncat = 5
|
|
1581
|
+
# ORDLOG(X, y, ncat, start=None, normalize=True, fit_intercept=False)
|
|
1582
|
+
FIT = 'fit ignore' #'fit robs' 'fit stats
|
|
1583
|
+
if FIT == 'fit robs':
|
|
1584
|
+
mod = OrderedLogit(X=X, y=y, J=ncat, distr='logit', start=None, normalize=False, fit_intercept=False)
|
|
1585
|
+
mod.fit()
|
|
1586
|
+
mod.report()
|
|
1587
|
+
elif FIT == 'fit stats':
|
|
1588
|
+
import statsmodels.api as sm
|
|
1589
|
+
from statsmodels.miscmodels.ordinal_model import OrderedModel
|
|
1590
|
+
model = OrderedModel(y, X, distr ='logit')
|
|
1591
|
+
result = model.fit()
|
|
1592
|
+
|
|
1593
|
+
# Display the results
|
|
1594
|
+
print(result.summary())
|
|
1595
|
+
print('finished ordered logit')
|
|
1596
|
+
num_of_thresholds = 4
|
|
1597
|
+
print(model.transform_threshold_params(result.params[-num_of_thresholds:]))
|
|
1598
|
+
|
|
1599
|
+
print('now do a multinomial logit fit trying to get in the ordered logit')
|
|
1600
|
+
df['ids'] = np.arange(len(df))
|
|
1601
|
+
df_long = misc.wide_to_long(df, id_col = 'ids', alt_list = cut, alt_name = 'alt')
|
|
1602
|
+
#add the choice variable
|
|
1603
|
+
df_long['choice'] = df_long['cut'] == df_long['alt']
|
|
1604
|
+
varnames = ['vol']
|
|
1605
|
+
|
|
1606
|
+
|
|
1607
|
+
y = df_long['choice'].values
|
|
1608
|
+
#df_long['vol_Ideal'] = df_long['vol'] * (df_long['alt'] == 'Fair')
|
|
1609
|
+
#df_long['price_Ideal'] = df_long['price'] * (df_long['alt'] == 'Fair')
|
|
1610
|
+
#df_long['carat_Ideal'] = df_long['carat'] * (df_long['alt'] == 'Fair')
|
|
1611
|
+
df_long['ones'] = 1
|
|
1612
|
+
#df_long.loc[~df_long['choice'], ['vol', 'price', 'carat']] =0
|
|
1613
|
+
|
|
1614
|
+
|
|
1615
|
+
#the alternative specific variables
|
|
1616
|
+
alt_var = df_long['alt'].values
|
|
1617
|
+
|
|
1618
|
+
|
|
1619
|
+
X = df_long[varnames].values
|
|
1620
|
+
#from sklearn.preprocessing import StandardScaler
|
|
1621
|
+
#X = np.standardize(X, axis=0, with_mean=True, with_std=True)
|
|
1622
|
+
#scaler = StandardScaler()
|
|
1623
|
+
#X = scaler.fit_transform(X)
|
|
1624
|
+
isvars = []
|
|
1625
|
+
transvars = []
|
|
1626
|
+
ids = df_long['ids']
|
|
1627
|
+
varnames = ['carat', 'vol', 'price']
|
|
1628
|
+
isvars = []
|
|
1629
|
+
X = df_long[varnames].values
|
|
1630
|
+
|
|
1631
|
+
print('long form implementation of the ordered logit')
|
|
1632
|
+
if FIT == 'fit long zeke':
|
|
1633
|
+
moll = OrderedLogitLong(X=X,
|
|
1634
|
+
y=y,
|
|
1635
|
+
varnames = varnames,
|
|
1636
|
+
ids=ids,
|
|
1637
|
+
J=ncat,
|
|
1638
|
+
distr='logit',
|
|
1639
|
+
start=None,
|
|
1640
|
+
normalize=False,
|
|
1641
|
+
fit_intercept=False)
|
|
1642
|
+
# moll.setup(varnames=varnames)
|
|
1643
|
+
|
|
1644
|
+
# Fit the model
|
|
1645
|
+
|
|
1646
|
+
#moll.setup(X=X, y=y, ids=ids, varnames=varnames, isvars=isvars, alts=alt_var, fit_intercept=False)
|
|
1647
|
+
moll.fit(method = 'BFGS')
|
|
1648
|
+
moll.report()
|
|
1649
|
+
|
|
1650
|
+
print('now I want to do OrderedLogitMixed')
|
|
1651
|
+
|
|
1652
|
+
print('long form implementation of the ordered logit')
|
|
1653
|
+
randvars = {'carat': 'n', 'vol': 'n'}
|
|
1654
|
+
mol = MixedOrderedLogit(X=X,
|
|
1655
|
+
y=y,
|
|
1656
|
+
varnames = varnames,
|
|
1657
|
+
ids=ids,
|
|
1658
|
+
J=ncat,
|
|
1659
|
+
alts = alt_var,
|
|
1660
|
+
randvars = randvars,
|
|
1661
|
+
distr='logit',
|
|
1662
|
+
start=None,
|
|
1663
|
+
normalize=False,
|
|
1664
|
+
fit_intercept=False)
|
|
1665
|
+
mol.fit()
|
|
1666
|
+
mol.report()
|
|
1667
|
+
print('success')
|
|
1668
|
+
#mol.setup(X=X, y=y, ids=ids, varnames=varnames, isvars=isvars, alts=alt_var, fit_intercept=False)
|
|
1669
|
+
|
|
1670
|
+
|
|
1671
|
+
|
|
1672
|
+
def Medhi():
|
|
1673
|
+
print('test')
|
|
1674
|
+
df = pd.read_csv("dummy_parking.csv")
|
|
1675
|
+
|
|
1676
|
+
choice_id = df['CHID']
|
|
1677
|
+
ind_id = df['ID']
|
|
1678
|
+
base_varnames = ['Automatic', 'ParkMeter', 'Price',
|
|
1679
|
+
'No_info',
|
|
1680
|
+
'Tap',
|
|
1681
|
+
'No_Remind'] # all explanatory variables to be included in the model #'Gender','Age', 'Education','Income','Drv_Exp','Drv_Frq','Prk_Frq'
|
|
1682
|
+
base_asvarnames = base_varnames # alternative-specific variables in varnames
|
|
1683
|
+
base_isvarnames = [] # individual-specific variables in varnames
|
|
1684
|
+
choice_set = ['1', '2', '3'] # list of alternatives in the choice set
|
|
1685
|
+
|
|
1686
|
+
base_rvars = {'No_info': 'n', 'ParkMeter': 'n', 'No_Remind': 'n'
|
|
1687
|
+
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
choice_var = df['Choice'] # the df column name containing the choice variable
|
|
1691
|
+
alt_var = df['ALT'] # the df column name containing the alternative variable
|
|
1692
|
+
base_intercept = True # if intercept needs to be estimated or not (default is False)
|
|
1693
|
+
av = None # the df column name containing the alternatives' availability
|
|
1694
|
+
weight_var = None # the df column name containing the weights
|
|
1695
|
+
base = None # reference alternative
|
|
1696
|
+
|
|
1697
|
+
model = MultinomialLogit()
|
|
1698
|
+
model.setup(X=df[base_varnames], y=choice_var, isvars=base_isvarnames, varnames=base_varnames, alts=alt_var,
|
|
1699
|
+
ids=choice_id, avail=av, fit_intercept=False, base_alt=base)
|
|
1700
|
+
model.fit()
|
|
1701
|
+
model.summarise()
|
|
1702
|
+
|
|
1703
|
+
model_n = MixedLogit()
|
|
1704
|
+
model_n.setup(X=df[base_varnames], y=choice_var, varnames=base_varnames, alts=alt_var, isvars=base_isvarnames, ids = choice_id, panels = ind_id, avail = av, randvars = base_rvars, n_draws = 200, halton = True) # ,init_coeff=np.repeat(.1, 11))
|
|
1705
|
+
model_n.fit()
|
|
1706
|
+
model_n.summarise()
|
|
1707
|
+
|
|
1708
|
+
def Mario():
|
|
1709
|
+
df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/electricity_long.csv")
|
|
1710
|
+
|
|
1711
|
+
print(df.shape)
|
|
1712
|
+
varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
|
|
1713
|
+
choice_set = np.unique(df['alt'])
|
|
1714
|
+
asvarnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
|
|
1715
|
+
isvarnames = []
|
|
1716
|
+
choice_id = df['id']
|
|
1717
|
+
ind_id = df['id']
|
|
1718
|
+
choices = df['choice'] # the df column name containing the choice variable
|
|
1719
|
+
alt_var = df['alt'] # the df column name containing the alternative variable
|
|
1720
|
+
base_alt = None # Reference alternative
|
|
1721
|
+
distr = ['n', 'u', 't', 'tn'] # List of random distributions to select from
|
|
1722
|
+
criterions = [['bic', -1]]
|
|
1723
|
+
parameters = Parameters(criterions=criterions, df=df, choice_set=choice_set, choice_id=choice_id,
|
|
1724
|
+
alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
|
|
1725
|
+
choices=choices,
|
|
1726
|
+
ind_id=ind_id, base_alt=base_alt, allow_random=True, allow_corvars=False, allow_bcvars=True,
|
|
1727
|
+
latent_class=False, allow_latent_random=False, allow_latent_bcvars=False, pst_intercept = True, n_draws=200)
|
|
1728
|
+
init_sol = None
|
|
1729
|
+
|
|
1730
|
+
search = call_siman(parameters, init_sol)
|
|
1731
|
+
|
|
1732
|
+
|
|
1733
|
+
|
|
1734
|
+
def RRM_f():
|
|
1735
|
+
print('RRM Search')
|
|
1736
|
+
from rrm import RandomRegret
|
|
1737
|
+
df = pd.read_csv("rrm_cran_2016_long.csv")
|
|
1738
|
+
mod = RandomRegret(df=df, short=False, normalize=True)
|
|
1739
|
+
mod.fit()
|
|
1740
|
+
mod.report()
|
|
1741
|
+
#RRM(df, False) # short = False
|
|
1742
|
+
|
|
1743
|
+
|
|
1744
|
+
|
|
1745
|
+
def main(args):
|
|
1746
|
+
Mario()
|
|
1747
|
+
#optimise_synth_1a()
|
|
1748
|
+
#Medhi()
|
|
1749
|
+
#estimate_init_mnls()
|
|
1750
|
+
#fit_mnl_example() # Runs 0.1-0.2
|
|
1751
|
+
RRM_f()
|
|
1752
|
+
np.random.seed(100) # THIS SEED CAUSES THE EXCEPTION.
|
|
1753
|
+
optimise_orderered()
|
|
1754
|
+
exit()
|
|
1755
|
+
#fit_electricity_mxl()
|
|
1756
|
+
optimise_electricity()
|
|
1757
|
+
#optimise_synth_latent(args.index)
|
|
1758
|
+
#true_model_1a()
|
|
1759
|
+
# true_model_mxl_1a()
|
|
1760
|
+
#optimise_synth_1a() # Runs 0.1-0.2s
|
|
1761
|
+
|
|
1762
|
+
|
|
1763
|
+
# Replace the following with the specific function you want to run
|
|
1764
|
+
#ashkay_search(args.num_classes)
|
|
1765
|
+
#fit_lc_example()
|
|
1766
|
+
#fit_lcm_example()
|
|
1767
|
+
|
|
1768
|
+
# Call other functions based on the arguments
|
|
1769
|
+
if args.model_run_item == 1:
|
|
1770
|
+
print(f'running askay with {args.num_classes}')
|
|
1771
|
+
ashkay_search(args.num_classes, args.iterations, args.iterations_i,**vars(args))
|
|
1772
|
+
elif args.model_run_item == 2:
|
|
1773
|
+
print(f'running laten with {args.num_classes}')
|
|
1774
|
+
optimise_latent_3_phase_search(args.num_classes, args.iterations, args.iterations_i)
|
|
1775
|
+
elif args.model_run_item == 3:
|
|
1776
|
+
print(f'running MaaS with {args.num_classes}')
|
|
1777
|
+
MaaS_search(args.num_classes, args.iterations, args.iterations_i, **vars(args))
|
|
1778
|
+
elif args.model_run_item == 4:
|
|
1779
|
+
print(f'running Swiss with {args.num_classes}')
|
|
1780
|
+
optimise_latent_swiss(args.num_classes, args.iterations, args.iterations_i)
|
|
1781
|
+
elif args.model_run_item == 5:
|
|
1782
|
+
print('Model Estimation: Non Latent')
|
|
1783
|
+
optimise_bstm()
|
|
1784
|
+
#Non_Latent_Search_Template()
|
|
1785
|
+
elif args.model_run_item == 6:
|
|
1786
|
+
print('exiting code')
|
|
1787
|
+
exit()
|
|
1788
|
+
else:
|
|
1789
|
+
ashkay_search(args.num_classes)
|
|
1790
|
+
print('Finished...')
|
|
1791
|
+
|
|
1792
|
+
'''' ---------------------------------------------------------- '''
|
|
1793
|
+
''' MAIN PROGRAM '''
|
|
1794
|
+
''' ----------------------------------------------------------- '''
|
|
1795
|
+
|
|
1796
|
+
if __name__ == '__main__':
|
|
1797
|
+
# {
|
|
1798
|
+
#np.random.seed(int(time.time()))
|
|
1799
|
+
parser = argparse.ArgumentParser(description='Script for model fitting and optimization.')
|
|
1800
|
+
parser.add_argument('--seed', type= int, default=1, help='Random seed for reproducibilityr -rf .git/modules')
|
|
1801
|
+
parser.add_argument('--optimise', action='store_true', help='Run optimization functions')
|
|
1802
|
+
parser.add_argument('--index', type = int, default=0, help='Index for the covering arrays')
|
|
1803
|
+
parser.add_argument('--multiobjective', default=0, help='single or multiobjective search')
|
|
1804
|
+
parser.add_argument('--num_classes', type = int, default=3, help='Number of latent classes')
|
|
1805
|
+
parser.add_argument('--model_run_item', type = int, default=6, help= 'run which dataset')
|
|
1806
|
+
parser.add_argument('--iterations', type= int, default= 2000, help = 'max number of iterations')
|
|
1807
|
+
parser.add_argument('--iterations_i', type= int, default= 50, help = 'first phase number of iterations')
|
|
1808
|
+
parser.add_argument('--run_time', type = int, default = 60000*60*4, help = 'termination of run with respect to time in seconds.')
|
|
1809
|
+
|
|
1810
|
+
args = parser.parse_args()
|
|
1811
|
+
main(args)
|
|
1812
|
+
|
|
1813
|
+
|
|
1814
|
+
|
|
1815
|
+
#np.random.seed(1)
|
|
1816
|
+
|
|
1817
|
+
# Testing model fitting:
|
|
1818
|
+
#fit_mnl_example() # Originally ran in 0.1-0.2s
|
|
1819
|
+
#fit_mnl_box_example() # Originally ran in 1s
|
|
1820
|
+
#fit_mxl_example() # Originally ran in about 12s +- 3s
|
|
1821
|
+
#fit_mxl_box_example() # Originally ran in about 20s
|
|
1822
|
+
#fit_lc_example() # Originally ran in about 6s +- 2s
|
|
1823
|
+
#synth_3()
|
|
1824
|
+
#fit_lcm_example() # Originally ran in about 160s + 30s
|
|
1825
|
+
#fit_electricity_mxl()
|
|
1826
|
+
|
|
1827
|
+
# Optimisation:
|
|
1828
|
+
|
|
1829
|
+
|
|
1830
|
+
#ashkay_search()
|
|
1831
|
+
|
|
1832
|
+
#optimise_electricity()
|
|
1833
|
+
#optimise_latent_3_phase_search()
|
|
1834
|
+
#ashkay_search()
|
|
1835
|
+
#optimise()
|
|
1836
|
+
#run_latent_class_mixed()
|
|
1837
|
+
#print('this is for testing')
|
|
1838
|
+
#latent_synth_4()
|
|
1839
|
+
#print('this is for searching for the model')
|
|
1840
|
+
#optimise_latent_3_phase_search()
|
|
1841
|
+
#optimise_electricity()
|
|
1842
|
+
#optimise_synth_latent()
|
|
1843
|
+
|
|
1844
|
+
|
|
1845
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1846
|
+
# DEBUGGING PARETO FRONT GENERATION
|
|
1847
|
+
'''soln = [{'obj1': 45, 'obj2':2}, {'obj1': 64, 'obj2':8}, {'obj1': 21, 'obj2':2},
|
|
1848
|
+
{'obj1': 88, 'obj2':7}, {'obj1': 13, 'obj2':5}, {'obj1': 36, 'obj2':5}, {'obj1': 83, 'obj2':1},
|
|
1849
|
+
{'obj1': 39, 'obj2':10}, {'obj1': 45, 'obj2':10}, {'obj1': 60, 'obj2':9}]
|
|
1850
|
+
fronts = rank_solutions(soln, 'obj1', 'obj2')
|
|
1851
|
+
print("Fronts=",fronts)
|
|
1852
|
+
crowd = {}
|
|
1853
|
+
key = 'obj2'
|
|
1854
|
+
max_val = max(soln[i][key] for i in range(len(soln))) # Compute max value of objective 'key'
|
|
1855
|
+
min_val = min(soln[i][key] for i in range(len(soln))) # Compute min value of objective 'key'
|
|
1856
|
+
for front in fronts.values():
|
|
1857
|
+
compute_crowding_dist_front(front, soln, crowd, key, max_val, min_val)
|
|
1858
|
+
#print(crowd)
|
|
1859
|
+
|
|
1860
|
+
sorted = sort_solutions(fronts, crowd, soln)
|
|
1861
|
+
print(sorted)
|
|
1862
|
+
'''
|
|
1863
|
+
# }
|
|
1864
|
+
|
|
1865
|
+
# RULES:
|
|
1866
|
+
# --------------------------------------------------------------------------
|
|
1867
|
+
"""
|
|
1868
|
+
1. A variable cannot be an isvar and asvar simultaneously.
|
|
1869
|
+
2. An isvar or asvar can be a random variable – I don’t understand this?
|
|
1870
|
+
3. An isvar cannot be a randvar
|
|
1871
|
+
4. A bcvar cannot be a corvar at the same time
|
|
1872
|
+
5. corvar should be a list of at least 2 randvars
|
|
1873
|
+
6. num_classes (Q) should be > 1, for estimating latent class models
|
|
1874
|
+
7. length of member_params_spec should be == Q-1
|
|
1875
|
+
8. length of class_params_spec should be == Q
|
|
1876
|
+
9. coefficients for member_params_spec cannot be in randvars
|
|
1877
|
+
|
|
1878
|
+
|
|
1879
|
+
Randvars are required for MixedLogit models!
|
|
1880
|
+
"""
|