SearchLibrium 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
old_code/main.py ADDED
@@ -0,0 +1,1880 @@
1
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2
+ SOLUTION OF EXAMPLE DISCRETE CHOICE MODELS
3
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
4
+ from tabnanny import verbose
5
+
6
+ #from searchlogit.ordered_logit_mixed import OrderedMixedLogit
7
+
8
+ # NOTE:
9
+ # varnames: All explanatory variables that have been defined
10
+ # isvars: Individual specific variables These variables do not vary across alternatives
11
+ # asvars: Alternative specific variables These variables vary across alternatives.
12
+ # alts: Alternatives for each choice. E.g., Choice = transport mode, Alternatives = {car, bus, train}
13
+ # base_alts: The base (a.k.a., reference) alternative
14
+ # transvars: Variables that have transformations applied to them
15
+ # randvars: Ramdom variables
16
+ # corvars: Correlated variables
17
+ # bcvars: Box Cox transformed variables
18
+
19
+ ''' ---------------------------------------------------------- '''
20
+ ''' LIBRARIES '''
21
+ ''' ---------------------------------------------------------- '''
22
+ import scipy
23
+ from harmony import*
24
+ from siman import*
25
+ from threshold import*
26
+ from latent_class_mixed_model import LatentClassMixedModel
27
+ from latent_class_model import LatentClassModel
28
+ from mixed_logit import*
29
+ from multinomial_logit import MultinomialLogit
30
+ import pandas as pd
31
+ import argparse
32
+ import os
33
+ from ordered_logit import OrderedLogit, OrderedLogitLong, MixedOrderedLogit
34
+ #import time
35
+
36
+ '''' ---------------------------------------------------------- '''
37
+ ''' SCRIPT. MULTINOMIAL '''
38
+ ''' ----------------------------------------------------------- '''
39
+ def fit_mnl_example():
40
+ # {
41
+ df = pd.read_csv("Swissmetro_final.csv")
42
+
43
+
44
+
45
+ varnames = ['COST', 'TIME', 'HEADWAY', 'SEATS', 'AGE']
46
+
47
+ isvars = ['AGE']
48
+ mnl = MultinomialLogit()
49
+ mnl.setup(X=df[varnames], y=df['CHOICE'], varnames=varnames, isvars = isvars,
50
+ fit_intercept=True, alts=df['alt'], ids=df['custom_id'],
51
+ avail=df['AV'], base_alt='SM', gtol=1e-04)
52
+ mnl.fit()
53
+ mnl.get_loglik_null()
54
+ mnl.summarise()
55
+ # }
56
+
57
+ '''' ---------------------------------------------------------- '''
58
+ ''' SCRIPT. MULTINOMIAL '''
59
+ ''' ----------------------------------------------------------- '''
60
+ def fit_mnl_box_example():
61
+ # {
62
+ df = pd.read_csv("artificial_1b_multi_nonlinear.csv")
63
+ varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
64
+ 'added_fixed7', 'added_fixed8', 'added_fixed9', 'added_fixed10', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
65
+
66
+ 'added_isvar1', 'added_isvar2']
67
+ X = df[varnames].values
68
+ y = df['choice'].values
69
+ isvars = ['added_isvar1', 'added_isvar2']
70
+ transvars = ['added_fixed1', 'added_fixed2']
71
+
72
+ mnl = MultinomialLogit()
73
+ mnl.setup(X, y, ids=df['id'], varnames=varnames, isvars=isvars, transvars=transvars, alts=df['alt'])
74
+ mnl.fit()
75
+ mnl.get_loglik_null()
76
+ mnl.summarise()
77
+ # }
78
+
79
+
80
+ ''' ----------------------------------------------------------- '''
81
+ ''' SCRIPT. MIXED LOGIT '''
82
+ ''' ----------------------------------------------------------- '''
83
+ def fit_mxl_example():
84
+ # {
85
+
86
+ df = pd.read_csv("artificial_1h_mixed_corr_trans.csv")
87
+
88
+ varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3',
89
+ 'added_fixed4','added_fixed5', 'added_fixed6',
90
+ 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
91
+ 'added_random1', 'added_random2', 'added_random3',
92
+ 'added_random4', 'added_random5', 'added_random6', 'added_random7']
93
+
94
+ isvars = []
95
+ transvars = [] #['added_random4', 'added_random5']
96
+ randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
97
+ 'added_random4': 'n', 'added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
98
+
99
+ correlated_vars = ['added_random1', 'added_random2', 'added_random3']
100
+
101
+ model = MixedLogit()
102
+ model.setup(X=df[varnames].values, y=df['choice'].values, ids=df['choice_id'].values,
103
+ panels=df['ind_id'].values, varnames=varnames,
104
+ isvars=isvars, transvars=transvars, correlated_vars=correlated_vars,
105
+ randvars=randvars, fit_intercept=False, alts=df['alt'], n_draws=200)
106
+
107
+ model.fit()
108
+ model.summarise()
109
+ # }
110
+
111
+ ''' ----------------------------------------------------------- '''
112
+ ''' SCRIPT. MIXED LOGIT '''
113
+ ''' ----------------------------------------------------------- '''
114
+ def fit_mxl_box_example():
115
+ # {
116
+ df = pd.read_csv("artificial_1h_mixed_corr_trans.csv")
117
+ df['bc_added_random4'] = scipy.stats.boxcox(df['added_random4'], 0.01)
118
+ df['bc_added_random5'] = scipy.stats.boxcox(df['added_random5'], 0.0)
119
+
120
+ varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
121
+ #'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
122
+ 'added_random1', 'added_random2', 'added_random3', 'added_random4', 'added_random5', 'added_random6',
123
+ 'added_random7']
124
+
125
+ isvars = []
126
+ transvars = ['added_random4', 'added_random5']
127
+ randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
128
+ 'added_random4': 'n', 'added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
129
+
130
+ correlated_vars = ['added_random1', 'added_random2', 'added_random3']
131
+
132
+ mxl = MixedLogit()
133
+ mxl.setup(X=df[varnames].values, y=df['choice'].values, ids=df['choice_id'].values,
134
+ panels=df['ind_id'].values, varnames=varnames,
135
+ isvars=isvars, transvars=transvars, correlated_vars=correlated_vars,
136
+ randvars=randvars, fit_intercept=False, alts=df['alt'],
137
+ n_draws=200)
138
+
139
+ mxl.fit()
140
+ mxl.get_loglik_null()
141
+ mxl.summarise()
142
+
143
+ # }
144
+
145
+ ''' ----------------------------------------------------------- '''
146
+ ''' SCRIPT. LATENT CLASS '''
147
+ ''' ----------------------------------------------------------- '''
148
+ def fit_lc_example():
149
+ # {
150
+ df = pd.read_csv("artificial_latent_new.csv")
151
+ varnames = ['income', 'age', 'price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp','nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
152
+ #'nonsig_isvar1', 'nonsig_isvar2'
153
+ # ]
154
+ X = df[varnames].values
155
+ y = df['choice'].values
156
+ member_params_spec = np.array([['income', 'age']], dtype='object')
157
+ class_params_spec = np.array([['price', 'time', 'conven', 'comfort'],
158
+ ['price', 'time', 'meals', 'petfr', 'emipp']], dtype='object') # Two latent classes
159
+
160
+ model = LatentClassModel() # Derived from MultinomialLogit
161
+ model.setup(X, y, varnames=varnames, ids=df['id'], num_classes=2,
162
+ class_params_spec=class_params_spec, member_params_spec=member_params_spec,
163
+ alts=[1,2,3], ftol_lccm=1e-3, gtol=1e-3)
164
+
165
+ model.fit()
166
+ model.summarise()
167
+ # }
168
+
169
+ ''' ----------------------------------------------------------- '''
170
+ ''' SCRIPT. LATENT CLASS MIXED '''
171
+ ''' ----------------------------------------------------------- '''
172
+ def fit_lcm_example():
173
+ # {
174
+
175
+ df = pd.read_csv("synth_latent_mixed_3classes.csv")
176
+
177
+ varnames = ['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2', 'income', 'age']
178
+ X = df[varnames].values
179
+ y = df['choice'].values
180
+
181
+ member_params_spec = np.array([['income', 'age'], ['income', 'age']], dtype='object')
182
+
183
+ # Define three latent classes:
184
+ class_params_spec = np.array([['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2'],
185
+ ['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2'],
186
+ ['added_fixed1', 'added_fixed2', 'added_random1', 'added_random2']],
187
+ dtype='object')
188
+
189
+ randvars = {'added_random1': 'n', 'added_random2': 'n'}
190
+ init_class_thetas = np.array([0.41381657745904565, -0.19457547164109434, -0.41381657745904565, 0.3891509432821887])
191
+ init_class_thetas = np.array([-1, 5.6, -7.61381657745904565, 10.5])
192
+ init_class_betas = [
193
+ np.array([.181, -.35, 2.411337674531561, 2.1511169162160617, 0.8752373368149019, 0.7313773222836617]),
194
+ np.array([0.23, 0, -0.6268738608685024, -1.3812810694501136, 0.8591208458201691, 1.2928663669444755]),
195
+ np.array([0, .94, 0.8382701667527453, 1.3112939261751486, 1.0298368042405897, 1.0076129422492865])
196
+ ]
197
+
198
+ model = LatentClassMixedModel()
199
+ model.setup(X, y, panels=df['ind_id'], n_draws=200, varnames=varnames, num_classes=3,
200
+ class_params_spec=class_params_spec, member_params_spec=member_params_spec,
201
+ gtol=1e-5, init_class_thetas=init_class_thetas, init_class_betas=init_class_betas,
202
+ randvars=randvars, alts=[1,2,3])
203
+ model.fit()
204
+ model.summarise()
205
+
206
+ # }
207
+
208
+
209
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
210
+ # META HEURISTIC OPTIMISATION APPROACH
211
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
212
+
213
+ def call_harmony(parameters, init_sol=None):
214
+ # {
215
+ solver = HarmonySearch(parameters, init_sol)
216
+ solver.max_mem = 25
217
+ solver.maxiter = 5000
218
+ solver.run()
219
+ # }
220
+
221
+ def call_siman(parameters, init_sol=None, **kwargs):
222
+ # {
223
+ ctrl = kwargs.get('ctrl', (10000, 0.001, 20, 20000)) # i.e. (tI, tF, max_temp_steps, max_iter)
224
+ if 'ctrl' in kwargs:
225
+ # Need to delete the 'ctrl' key from kwargs
226
+ # This is because the function has a parameter named 'ctrl'
227
+ # and the 'ctrl' key in kwargs would be a duplicate parameter
228
+ del kwargs['ctrl']
229
+ # ctrl = (1000, 0.001, 20, 20) # i.e. (tI,tF,max_temp_steps,max_iter)
230
+ id_num = kwargs.get('id_num', None)
231
+ solver = SA(parameters, init_sol, ctrl, id_num, **kwargs)
232
+ solver.run()
233
+ solver.close_files()
234
+ return solver.return_best()
235
+ # }
236
+
237
+ def call_parsa(parameters, init_sol=None, nthrds=4, **kwargs):
238
+ # {
239
+ # ctrl = (10, 0.001, 10, 10) # i.e. (tI, tF, max_temp_steps, max_iter)
240
+
241
+
242
+ ctrl = kwargs.get('ctrl',(10, 0.001, 10, 10))
243
+
244
+ if 'ctrl' in kwargs:
245
+ # Need to delete the 'ctrl' key from kwargs
246
+ # This is because the function has a parameter named 'ctrl'
247
+ # and the 'ctrl' key in kwargs would be a duplicate parameter
248
+ del kwargs['ctrl']
249
+ parsa = PARSA(parameters, init_sol, ctrl, nthrds=nthrds)
250
+ parsa.run()
251
+ # }
252
+
253
+ def call_parcopsa(parameters, init_sol=None, nthrds=8):
254
+ # {
255
+ ctrl = (10, 0.001, 10, 10) # i.e. (tI, tF, max_temp_steps, max_iter)
256
+ parcopsa = PARCOPSA(parameters, init_sol, ctrl, nthrds=nthrds)
257
+
258
+ # Optional. Set a different behaviour for each solver
259
+ #tI = [1, 10, 100, 1000, np.random.randint(1, 10000), np.random.randint(1, 10000),
260
+ #np.random.randint(1, 10000), np.random.randint(1, 10000)]
261
+ #for i in range(8):
262
+ # parcopsa.solvers[i].revise_tI(tI[i])
263
+
264
+ parcopsa.run()
265
+ # }
266
+
267
+ def call_threshold(parameters, init_sol=None, hm=False):
268
+ # {
269
+ ctrl = (10, 20, 20) # i.e., threshold, max_steps, max_iter
270
+ #ctrl = (10, 10, 1) # i.e., threshold, max_steps, max_iter
271
+ solver = TA(parameters, init_sol, ctrl)
272
+ solver.run()
273
+ solver.close_files()
274
+ # }
275
+
276
+ def covering_arrays(index = 0):
277
+ # Define parameter ranges
278
+ tI_values = [500, 1000, 1500]
279
+ tF_values = [0.001, 0.01, 0.1]
280
+ max_temp_steps_values = [10, 20, 30]
281
+ max_iter_values = [10, 20, 50]
282
+
283
+ # Generate a full factorial design for illustration (use a library for pairwise if needed)
284
+ import itertools
285
+ all_combinations = list(itertools.product(tI_values, tF_values, max_temp_steps_values, max_iter_values))
286
+
287
+ # If you want pairwise, you may need a library like `allpairspy` or a manual covering array generator
288
+ # Example of a manually reduced covering array for simplicity:
289
+ covering_array = [
290
+ (500, 0.001, 10, 10),
291
+ (500, 0.01, 20, 20),
292
+ (500, 0.1, 30, 50),
293
+ (1000, 0.001, 20, 50),
294
+ (1000, 0.01, 30, 10),
295
+ (1500, 0.001, 30, 20),
296
+ (1500, 0.1, 10, 50),
297
+ (1500, 0.01, 20, 10),
298
+ ]
299
+ print("Covering Array:")
300
+ for row in covering_array:
301
+ print(row)
302
+ if index < len(covering_array):
303
+ return covering_array[index]
304
+
305
+
306
+ ''' ----------------------------------------------------------- '''
307
+ ''' SCRIPT '''
308
+ ''' ----------------------------------------------------------- '''
309
+
310
+ def optimise_synth_latent(index=0):
311
+ # {
312
+
313
+
314
+ # Example Usage
315
+ number_of_classes = 3 # Define the number of latent classes
316
+ df = pd.read_csv("data/artificial_latent_3classes_mnl_22.04.2025.csv")
317
+ df_test = None
318
+ # Initialize the LatentClasses object with 3 latent classes
319
+ latent_classes = LatentClassConstrained(num_classes=number_of_classes)
320
+ asvarnames = ['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
321
+ memvarnames = ['z1', 'z2', 'nonsig_isvar1', 'nonsig_isvar2']
322
+ # Populate data for latent_class_1
323
+ latent_classes.populate_class(
324
+ "latent_class_1",
325
+ asvar=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5', '_int_individual'],
326
+ isvars=[],
327
+ randvars=[],
328
+ memvars=[], #cant have a membership here
329
+ req_asvar=[],
330
+ req_isvars=[],
331
+ req_randvars=[],
332
+ req_memvars=[], #cant have a membership here
333
+ )
334
+
335
+ # Populate data for latent_class_2
336
+ latent_classes.populate_class(
337
+ "latent_class_2",
338
+ asvar=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5','_int_individual'],
339
+ isvars=[],
340
+ randvars=[],
341
+ memvars= ['z1', 'z2', 'nonsig_isvar1', 'nonsig_isvar2'],
342
+ req_asvar=[],
343
+ req_isvars=[],
344
+ req_randvars=[],
345
+ req_memvars=[]
346
+ )
347
+
348
+ latent_classes.populate_class(
349
+ "latent_class_3",
350
+ asvar=['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5', '_int_individual'],
351
+ isvars=[],
352
+ randvars=[],
353
+ memvars= ['z1', 'z2', 'nonsig_isvar1', 'nonsig_isvar2'],
354
+ req_asvar=[],
355
+ req_isvars=[],
356
+ req_randvars=[],
357
+ req_memvars=[]
358
+ )
359
+ # Retrieve and print data for latent_class_1
360
+ print("Latent Class 1 Data:")
361
+ print(latent_classes.get_class("latent_class_1"))
362
+
363
+ # Retrieve and print all latent classes
364
+ print("\nAll Latent Classes:")
365
+ import pprint
366
+ pprint.pprint(latent_classes.get_all_classes())
367
+
368
+
369
+ varnames_gbl = latent_classes.get_global_asvars_randvars()
370
+ gbl_asvars = varnames_gbl['asvars']
371
+ gbl_isvars = varnames_gbl['isvars']
372
+
373
+ gbl_memvars = varnames_gbl['memvars']
374
+ varnames = list(set(gbl_asvars + gbl_isvars +gbl_isvars+gbl_memvars))
375
+
376
+
377
+ print(gbl_asvars)
378
+
379
+
380
+ print('Running Latent Class Search')
381
+ model = LatentClassModel()
382
+
383
+
384
+ X = df[varnames].values
385
+ y = df['choice'].values
386
+
387
+
388
+ '''Here we define the search options'''
389
+
390
+ asvarnames = gbl_asvars # class-specific variables
391
+ isvarnames = gbl_isvars # class-ind specific variables
392
+ memvarnames = gbl_memvars # class mem specific variables
393
+
394
+ choice_id = df['id']
395
+ ind_id = df['id']
396
+ choices = df['choice'] # the df column name containing the choice variable
397
+ alt_var = df['alt'] # the df column name containing the alternative variable
398
+ base_alt = None # Reference alternative
399
+ distr = ['n', 'u', 't'] # List of random distributions to select from
400
+ choice_set = ['1', '2', '3']
401
+ criterions = [['bic',-1]]
402
+
403
+ #choice_id = df['CHID']
404
+ #ind_id = df['indID'] #I believe this is also panels
405
+
406
+ #choices = df['CHOICE'] # the df column name containing the choice variable
407
+ #alt_var = df['alt'] # the df column name containing the alternative variable
408
+ #base_alt = None # Reference alternative
409
+ #distr = ['n', 'u', 't'] # List of random distributions to select from
410
+ #choice_set = ['1', '2', '3', '4']
411
+
412
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
413
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
414
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
415
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
416
+
417
+ #criterions = [['bic', -1]]
418
+
419
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
420
+ # DEFINE PARAMETERS FOR THE SEARCH
421
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
422
+
423
+ latent_class = True # True
424
+
425
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
426
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
427
+ mem_vars=memvarnames, choices=choices,
428
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=False,
429
+ base_alt=base_alt,
430
+ allow_bcvars=False, n_draws=200, min_classes=number_of_classes, max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=False,
431
+ optimise_class=True, ftol_lccm=1e-4, LCR = latent_classes)
432
+
433
+ # Setting up for fixed thetas
434
+ parameters_2nd = copy.deepcopy(parameters)
435
+ parameters_2nd.fixed_thetas = True
436
+ # adding in asvars
437
+ parameters_2nd.isvarnames = asvarnames
438
+ parameters_2nd.optimise_class = True # adding as true
439
+
440
+ parameters_3rd = copy.deepcopy(parameters_2nd)
441
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
442
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
443
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
444
+
445
+ init_sol = None
446
+
447
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
448
+ # RUN THE SEARCH
449
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
450
+ initial_iterations = 20
451
+ number_of_iterations = 5000
452
+ # This will force all the class-specific effects to be the variable and only play around with class membership variables.
453
+ # phase 1 optimise membership
454
+
455
+
456
+ """Final Fit"""
457
+ print(f"Final Phase")
458
+ cntr_arr = covering_arrays(index)
459
+ sa_parms = {'ctrl': cntr_arr, 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'Ashkay_c{number_of_classes}_p3'}
460
+ ''' Injecting the best joint solution to start'''
461
+ final_sol = call_siman(parameters_3rd, None, **sa_parms)
462
+
463
+
464
+ # }
465
+
466
+ ''' ----------------------------------------------------------- '''
467
+ ''' SCRIPT '''
468
+ ''' ----------------------------------------------------------- '''
469
+ def optimise_electricity():
470
+ # {
471
+ """
472
+ Description of electricity data: the choice of electricity supplier data collected in California by the
473
+ Electric Power Research Institute (Goett, 1998). A stated-preference survey was conducted on 361 residential
474
+ customers to study their preferences regarding electricity plans. The panel dataset includes a total of 4,308
475
+ observations wherein each customer faced up to 12 choice scenarios with four different plans to choose from.
476
+ Each choice scenario was designed using six attributes, including a fixed price (pf) for an electricity plan
477
+ (7 or 9 cents/kWh), contract length (cl) during which a penalty is imposed if the customer chooses to
478
+ switch plans (no contract, 1 year or 5 years), a dummy variable indicating if the supplier was well-known (wk),
479
+ time of the day rates (tod) (11 cents/kWh from 8AM to 8PM and 5 cents/kWh from 8PM to 8AM), seasonal rates (seas)
480
+ (10 cents/kWh for summer, 8 cents/kWh for winter and 6 cents/kWh in spring and fall) and, a dummy variable
481
+ indicating if the supplier was a local (loc).
482
+ """
483
+
484
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
485
+ # LOAD THE PROBLEM DATA
486
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
487
+
488
+ df = pd.read_csv("electricity.csv")
489
+ df_test = None
490
+ varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas'] # all explanatory variables to be included in the model
491
+ asvarnames = varnames # alternative-specific variables in varnames
492
+ #now trying is varnames
493
+ isvarnames = varnames # individual-specific variables in varnames
494
+ choice_id = df['chid']
495
+ ind_id = df['id']
496
+ choices = df['choice'] # the df column name containing the choice variable
497
+ alt_var = df['alt'] # the df column name containing the alternative variable
498
+ base_alt = None # Reference alternative
499
+ distr = ['n', 'u', 't'] # List of random distributions to select from
500
+ choice_set = ['1', '2', '3', '4']
501
+
502
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
503
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
504
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
505
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
506
+
507
+ criterions = [['loglik', 1]] # Options: {mae:-1, bic:-1, aic:-1, loglik:1}
508
+
509
+ #criterions = [['loglik',1], ['mae',-1]] # Option
510
+ #criterions = [['bic',-1], ['mae',-1]] # Option
511
+
512
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
513
+ # DEFINE PARAMETERS FOR THE SEARCH
514
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
515
+
516
+ latent_class = False # Define as True or False
517
+ num_latent_classes = 2 # When latent_class=True choose a value from {2,3,4,5}
518
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
519
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames, choices=choices,
520
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class,
521
+ allow_random=True, base_alt=base_alt, allow_bcvars=True, n_draws=200, verbose=True)
522
+
523
+ # Note: allow_corvars is True by default
524
+
525
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
526
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
527
+ # CAVEAT: THE USER MUST KNOW WHAT THEY ARE DOING. THEY MUST KNOW THE RULES
528
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
529
+
530
+ init_sol = None
531
+
532
+ '''nb_crit = len(criterions)
533
+ init_sol = Solution(nb_crit)
534
+ init_sol.set_asvar(['cl','wk','tod'])
535
+ init_sol.set_randvar(['cl','tod','wk'], ['t','t','u'])
536
+ '''
537
+
538
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
539
+ # RUN THE SEARCH
540
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
541
+ sa_parms = {'ctrl': (10, 0.001, 1000, 10)}
542
+ call_siman(parameters, init_sol, **sa_parms)
543
+ #call_threshold(parameters, init_sol)
544
+ #call_parsa(parameters, init_sol, 2)
545
+ #call_parcopsa(parameters, init_sol, 2)
546
+ # }
547
+ def optimise_latent_3_phase_search(num_classes = 3, num_of_iterations = 1000, initial_iterations = 200):
548
+ df = pd.read_csv("electricity.csv")
549
+ df_test = None
550
+ varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas'] # all explanatory variables to be included in the model
551
+ asvarnames = varnames # alternative-specific variables in varnames
552
+ isvarnames = [] # individual-specific variables in varnames
553
+ memvarnames = [name for name in varnames if name != ['listofunwantednamesinmember']] #member-specific variables
554
+ choice_id = df['chid']
555
+ ind_id = df['id']
556
+ choices = df['choice'] # the df column name containing the choice variable
557
+ alt_var = df['alt'] # the df column name containing the alternative variable
558
+ base_alt = None # Reference alternative
559
+ distr = ['n', 'u', 't'] # List of random distributions to select from
560
+ choice_set = ['1', '2', '3', '4']
561
+
562
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
563
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
564
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
565
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
566
+
567
+
568
+ #criterions = [['loglik', 1]]
569
+ criterions = [['bic',-1]]
570
+ # criterions = [['aic',-1]]
571
+
572
+ # criterions = [['loglik',1], ['mae',-1]]
573
+ # criterions = [['bic',-1], ['mae',-1]]
574
+
575
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
576
+ # DEFINE PARAMETERS FOR THE SEARCH
577
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
578
+
579
+ latent_class = True # True
580
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
581
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
582
+ mem_vars = memvarnames, choices=choices,
583
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
584
+ base_alt=base_alt,
585
+ allow_bcvars=False, n_draws=200, min_classes = num_classes, max_classes = num_classes,
586
+ num_classes = num_classes, ps_intercept = True, optimise_class = True
587
+ )
588
+
589
+ # Setting up for fixed thetas
590
+ parameters_2nd = parameters
591
+ parameters_2nd.fixed_thetas = True
592
+ #adding in asvars
593
+ parameters_2nd.isvarnames = varnames
594
+ parameters_2nd.optimise_class = True #adding as true
595
+
596
+ parameters_3rd = parameters_2nd
597
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
598
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
599
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
600
+
601
+ init_sol = None
602
+
603
+
604
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
605
+ # RUN THE SEARCH
606
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
607
+ # ROB, I have added this in to add to your class organically. Optimize membership if true.
608
+ # This will force all the class-specific effects to be the variable and only play around with class membership variables.
609
+ #phase 1 optimise membership
610
+ print(f"1st Phase, Optimize Membership")
611
+ sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2),'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True, 'id_num': f'Elec_c{num_classes}_p1'}
612
+ #sa_parms = {'ctrl': (10, 0.001, 200, 10), 'max_classes': 4, 'min_classes': 3}
613
+ best_member = call_siman(parameters, init_sol, **sa_parms)
614
+
615
+
616
+ """Optimizing the betas, play around with only the classes"""
617
+ print(f"2nd Phase, Optimize Classes")
618
+ sa_parms = {'ctrl': (10, 0.001, num_of_iterations, 10), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': False, 'optimise_class': True, 'fixed_solution':best_member, 'id_num': f'Elec_c{num_classes}_p2'}
619
+ best_joint = call_siman(parameters_2nd, init_sol, **sa_parms)
620
+ """Final Fit"""
621
+ print(f"Final Phase")
622
+ sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True,
623
+ 'optimise_class': True, 'id_num': f'Elec_c{num_classes}_p3'}
624
+ ''' Injecting the best joint solution to start'''
625
+ final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
626
+
627
+ def optimise_latent_swiss(num_classes = 3, num_of_iterations = 1000, number_of_initials = 200):
628
+ df = pd.read_csv("swissmetro_long_1.csv")
629
+ df_test = None
630
+ varnames = ['TT_SCALED', 'CO_SCALED', 'HE', 'SEATS', ] # all explanatory variables to be included in the model
631
+ memer = ['AGE', 'MALE', 'INCOME', 'GA', 'WHO', 'FIRST', 'LUGGAGE']
632
+
633
+ asvarnames = varnames # alternative-specific variables in varnames
634
+ isvarnames = [] # individual-specific variables in varnames
635
+ memvarnames = [name for name in varnames if name != ['listofunwantednamesinmember']] #member-specific variables
636
+ choice_id = df['CHID']
637
+ ind_id = df['ID']
638
+ choices = df['CHOICE'] # the df column name containing the choice variable
639
+ alt_var = df['ALT'] # the df column name containing the alternative variable
640
+ base_alt = None # Reference alternative
641
+ distr = ['n', 'u', 't'] # List of random distributions to select from
642
+ choice_set = ['CAR', 'SM', 'TRAIN'] # 1 2 3 redcode if broken
643
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
644
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
645
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
646
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
647
+
648
+
649
+ #criterions = [['loglik', 1]]
650
+ criterions = [['bic',-1]]
651
+ # criterions = [['aic',-1]]
652
+
653
+ # criterions = [['loglik',1], ['mae',-1]]
654
+ # criterions = [['bic',-1], ['mae',-1]]
655
+
656
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
657
+ # DEFINE PARAMETERS FOR THE SEARCH
658
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
659
+
660
+ latent_class = True # True
661
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
662
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
663
+ mem_vars = memvarnames, choices=choices,
664
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
665
+ base_alt=base_alt,
666
+ allow_bcvars=False, n_draws=200, min_classes = num_classes, max_classes = num_classes, num_classes = num_classes, ps_intercept = True, optimise_class = True)
667
+
668
+ # Setting up for fixed thetas
669
+ parameters_2nd = parameters
670
+ parameters_2nd.fixed_thetas = True
671
+ #adding in asvars
672
+ parameters_2nd.isvarnames = varnames
673
+ parameters_2nd.optimise_class = True #adding as true
674
+
675
+ parameters_3rd = parameters_2nd
676
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
677
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
678
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
679
+
680
+ init_sol = None
681
+
682
+
683
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
684
+ # RUN THE SEARCH
685
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
686
+ # ROB, I have added this in to add to your class organically. Optimize membership if true.
687
+ # This will force all the class-specific effects to be the variable and only play around with class membership variables.
688
+ #phase 1 optimise membership
689
+ print(f"1st Phase, Optimize Membership")
690
+ sa_parms = {'ctrl': (10, 0.001, number_of_initials, 10),'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True, 'id_num': f'Swiss_c{num_classes}_p1'}
691
+ #sa_parms = {'ctrl': (10, 0.001, 200, 10), 'max_classes': 4, 'min_classes': 3}
692
+ best_member = call_siman(parameters, init_sol, **sa_parms)
693
+
694
+
695
+ """Optimizing the betas, play around with only the classes"""
696
+ print(f"2nd Phase, Optimize Classes")
697
+ sa_parms = {'ctrl': (10, 0.001, num_of_iterations, 10), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': False, 'optimise_class': True, 'fixed_solution':best_member, 'id_num': f'Swiss_c{num_classes}_p2'}
698
+ best_joint = call_siman(parameters_2nd, init_sol, **sa_parms)
699
+ """Final Fit"""
700
+ print(f"Final Phase")
701
+ sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': num_classes+1, 'min_classes': num_classes, 'optimise_membership': True,
702
+ 'optimise_class': True, 'id_num': f'Swiss_c{num_classes}_p3'}
703
+ ''' Injecting the best joint solution to start'''
704
+ final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
705
+
706
+ ''' ----------------------------------------------------------- '''
707
+ ''' SCRIPT '''
708
+ ''' ----------------------------------------------------------- '''
709
+ def optimise_new_syn():
710
+ # {
711
+
712
+ df = pd.read_csv("New_Syn_MOOF_TRAIN_seed6.csv")
713
+ df_test = pd.read_csv("New_Syn_MOOF_TEST_seed6.csv")
714
+
715
+ # Manually transforming the variable to avoid estimation of lambda for better convergence
716
+ df['bc_added_random4'] = scipy.stats.boxcox(df['added_random4'], 0.01)
717
+
718
+ # Manually transforming the variable to avoid estimation of lambda for better convergence
719
+ df['bc_added_random5'] = scipy.stats.boxcox(df['added_random5'], 0.05)
720
+
721
+ # Manually transforming the variable to avoid estimation of lambda for better convergence
722
+ df_test['bc_added_random4'] = scipy.stats.boxcox(df_test['added_random4'], 0.01)
723
+
724
+ # Manually transforming the variable to avoid estimation of lambda for better convergence
725
+ df_test['bc_added_random5'] = scipy.stats.boxcox(df_test['added_random5'], 0.05)
726
+
727
+ choice_id = df['choice_id']
728
+ test_choice_id = df_test['choice_id']
729
+
730
+ ind_id = df['ind_id']
731
+ test_ind_id = df_test['ind_id']
732
+
733
+ alt_var = df['alt']
734
+ test_alt_var = df_test['alt']
735
+
736
+ distr = ['n', 'u', 't']
737
+ choice_set = ['1', '2', '3']
738
+
739
+ asvarnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5',
740
+ 'added_fixed6', 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5', 'added_random1',
741
+ 'added_random2', 'added_random3', 'added_random4',
742
+ 'added_random5', 'added_random6', 'added_random7']
743
+
744
+ isvarnames = []
745
+ varnames = asvarnames + isvarnames
746
+ # UNUSED CODE: trans_asvars = []
747
+ choices = df['choice']
748
+ test_choices = df_test['choice'] # CHANGED the df column name containing the choice variable
749
+
750
+ criterions = [['loglik', 1]]
751
+ # criterions = [['loglik', 1], ['mae', -1]]
752
+
753
+ parameters = Parameters(criterions=criterions,df=df, distr=distr, df_test=df_test, choice_set=choice_set,
754
+ alt_var=alt_var, test_alt_var=test_alt_var, varnames=varnames, isvarnames=isvarnames,
755
+ asvarnames=asvarnames, choices=choices, test_choices=test_choices, choice_id=choice_id,
756
+ test_choice_id=test_choice_id, ind_id=ind_id, test_ind_id=test_ind_id, latent_class=False,
757
+ allow_random=True, base_alt=None, allow_bcvars=False, n_draws=200,
758
+
759
+ # gtol=1e-2,
760
+ # avail_latent=avail_latent,# p_val=0.01,
761
+ # ="Synth_SOOF_seed6"
762
+ )
763
+
764
+
765
+ init_sol = None
766
+ call_siman(parameters, init_sol)
767
+ # call_thresold(parameters, init_sol)
768
+ # call_parcopsa(parameters, init_sol)
769
+
770
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
771
+ # FIT MIXED LOGIT
772
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
773
+
774
+ '''varnames = ['added_fixed1', 'added_fixed2', 'added_fixed3', 'added_fixed4', 'added_fixed5', 'added_fixed6',
775
+ 'added_random1', 'added_random2', 'added_random3',
776
+ 'bc_added_random4', 'bc_added_random5', 'added_random6', 'added_random7']
777
+
778
+ X = df[varnames].values
779
+ y = df['choice'].values
780
+ av = None
781
+ test_av = None
782
+ weight_var = None
783
+ test_weight_var = None
784
+ isvars = []
785
+ transvars = [] # ['added_random4', 'added_random5']
786
+ randvars = {'added_random1': 'n', 'added_random2': 'n', 'added_random3': 'n',
787
+ 'bc_added_random4': 'n', 'bc_added_random5': 'n', 'added_random6': 'u', 'added_random7': 't'}
788
+
789
+ correlated_vars = ['added_random1', 'added_random2', 'added_random3']
790
+ model = MixedLogit()
791
+ model.setup(X,y, ids=df['choice_id'].values, panels=df['ind_id'].values, varnames=varnames,
792
+ isvars=isvars, n_draws=200, correlated_vars=correlated_vars, transvars=transvars, randvars=randvars, alts=df['alt'] )
793
+ # gtol=2e-6, ftol=1e-8,method="L-BFGS-B",
794
+ model.fit()
795
+ model.summarise()
796
+
797
+ choice_set = [1,2,3]
798
+ def_vals = model.coeff_est
799
+ X_test = df_test[varnames].values
800
+ y_test = df_test['choice'].values
801
+
802
+
803
+ # Calculating MAE
804
+ # Choice frequecy obtained from estimated model applied on testing sample
805
+ predicted_probabilities_val = model.pred_prob * 100
806
+ obs_freq = model.obs_prob * 100
807
+ MAE = round((1 / len(choice_set)) * (np.sum(abs(predicted_probabilities_val - obs_freq))), 2)
808
+ MAPE = round((1 / len(choice_set)) * (np.sum(abs((predicted_probabilities_val - obs_freq) / obs_freq))))
809
+ print("MAE = ", MAE,"; MAPE = ", MAPE)'''
810
+ # }
811
+
812
+
813
+ ''' ----------------------------------------------------------- '''
814
+ ''' SCRIPT '''
815
+ ''' ----------------------------------------------------------- '''
816
+ 'TEST FOR FITTING LATENT CLASS MODEL'
817
+ def latent_synth_4():
818
+ print('testing intercept model')
819
+ df = pd.read_csv("artificial_latent_new_4classes_mnl.csv")
820
+ varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp', 'income', 'age']
821
+
822
+
823
+
824
+
825
+
826
+ print('testing synthetic experiment for the laten class, 4 class ')
827
+ varnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp', 'income', 'age','ones'
828
+ # 'nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5',
829
+ # 'nonsig_isvar1', 'nonsig_isvar2'
830
+ ]
831
+
832
+
833
+
834
+ df = pd.read_csv("artificial_latent_new_4classes_mnl.csv")
835
+ df = df.assign(ones= 1)
836
+ model = LatentClassModel()
837
+
838
+ X = df[varnames].values
839
+ y = df['choice'].values
840
+ member_params_spec = np.array([['_inter',]
841
+ ], dtype='object')
842
+
843
+
844
+ class_params_spec = np.array([['ones'],
845
+ ['ones']]
846
+ , dtype='object')
847
+
848
+
849
+ print('do i need to declare intecept')
850
+ model.setup(X, y, panels=df['id'].values, varnames=varnames, num_classes=2,
851
+ class_params_spec=class_params_spec, member_params_spec=member_params_spec,
852
+ alts=[1, 2, 3])
853
+ model.reassign_penalty(0.10)
854
+ model.fit()
855
+ model.summarise()
856
+ print('finished')
857
+ # }
858
+ def synth_3():
859
+ # {
860
+ print('testing synthetic experiment for the mixed latent class random parameters...')
861
+ df = pd.read_csv("synth_latent_mixed_3classes.csv")
862
+ model = LatentClassMixedModel()
863
+ varnames = ['added_fixed1', 'added_fixed2', 'nonsig1', 'nonsig2', 'nonsig3',
864
+ 'added_random1', 'added_random2',
865
+ 'income', 'age', 'gender'
866
+ # 'nonsig1', 'nonsig2', 'nonsig3',
867
+ # 'nonsig4', 'nonsig5', 'nonsig_isvar1', 'nonsig_isvar2'
868
+ ]
869
+
870
+ X = df[varnames].values
871
+ y = df['choice'].values
872
+ member_params_spec = np.array([['income', 'gender'],
873
+ ['income', 'age']], dtype='object')
874
+ class_params_spec = np.array([['added_fixed1', 'added_fixed2'],
875
+ ['added_fixed1', 'added_random1'],
876
+ ['added_fixed2', 'added_random2']],
877
+ dtype='object')
878
+
879
+ randvars = {'added_random1': 'n', 'added_random2': 'n'}
880
+ init_class_thetas = np.array([0.1, -0.03, -0.1, 0.02])
881
+ init_class_betas = [np.array([-1, 2.5, 1.242992317, 2.040125077, 1.02, 0.90]),
882
+ np.array([1.5, -1, 0.74, 0.81, 1.47, 1.36]),
883
+ np.array([-2, 1, 1.20, 1.65, 1.27, 1.07])]
884
+
885
+ model.setup(X, y, panels=df['ind_id'], n_draws=100, varnames=varnames, num_classes=3,
886
+ class_params_spec=class_params_spec, member_params_spec=member_params_spec,
887
+ # ftol=1e-3,
888
+ gtol=1e-5, ftol_lccmm=1e-3,
889
+ # init_class_betas=init_class_betas,
890
+ randvars=randvars, alts=[1, 2, 3])
891
+ #model.reassign_penalty(0.1)
892
+ model.fit()
893
+ model.summarise()
894
+ # }
895
+
896
+ def Non_Latent_Search_Template():
897
+ df = pd.read_csv('MassLong.csv')
898
+ print('Pleae Change Data Set ')
899
+ varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
900
+ 'Follow-up', 'Residential', 'Technology',
901
+ 'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
902
+ 'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
903
+ 'Age_2', 'Age_3', 'Live_alone',
904
+ 'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
905
+ asvarnames = varnames # alternative-specific variables in varnames
906
+ isvarnames = [] # individual-specific variables in varnames
907
+ unwanted_class = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age', 'Gender',
908
+ 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
909
+ 'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
910
+ 'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
911
+ unwanted_member = ['MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Driving', 'Bike', 'Scooter', 'Multimode',
912
+ 'Public_Transit',
913
+ 'Price', 'PT'
914
+ ]
915
+ memvarnames = [name for name in varnames if name not in unwanted_member] # member-specific variables
916
+ asvarnames = [name for name in varnames if name not in unwanted_class] # class-specific variables
917
+ choice_id = df['CHID']
918
+ ind_id = df['ID'] # I believe this is also panels
919
+
920
+ choices = df['CHOICE'] # the df column name containing the choice variable
921
+ alt_var = df['alt'] # the df column name containing the alternative variable
922
+ base_alt = None # Reference alternative
923
+ distr = ['n', 'u', 't'] # List of random distributions to select from
924
+ choice_set = ['1', '2', '3', '4']
925
+
926
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
927
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
928
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
929
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
930
+
931
+ criterions = [['bic', -1]]
932
+
933
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
934
+ # DEFINE PARAMETERS FOR THE SEARCH
935
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
936
+
937
+ latent_class = False # True
938
+ df_test = None
939
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
940
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
941
+ choices=choices,
942
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
943
+ base_alt=base_alt,
944
+ allow_bcvars=False, n_draws=200,
945
+ ps_intercept=True)
946
+
947
+ # Setting up for fixed thetas
948
+
949
+
950
+
951
+
952
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
953
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
954
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
955
+
956
+ init_sol = None
957
+
958
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
959
+ # RUN THE SEARCH
960
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
961
+
962
+ # TODO turn back on, just checking that this doesn't fall over
963
+
964
+ """Final Fit"""
965
+ print(f"Final Phase")
966
+ sa_parms = {'ctrl': (10, 0.001, 50, 1),
967
+ 'id_num': f'MaaS_c_p3'}
968
+ ''' Injecting the best joint solution to start'''
969
+ final_sol = call_siman(parameters, init_sol, **sa_parms)
970
+
971
+
972
+ def optimise_bstm():
973
+ # {
974
+ df = pd.read_csv("BSTM_HBS_CAL_ALL.csv")
975
+ df_test = pd.read_csv("BSTM_HBS_VAL_ALL.csv")
976
+ df_test = None
977
+ varnames = ['TT', 'TC', 'TT_CAD', 'TT_CAP',
978
+ 'TCPC', 'EMPDENS_CAD', 'EMPDENS_PT', 'VEHADUL_CAD', 'VEHADUL_CAP', 'VEHADUL_W2PT',
979
+ 'VEHADUL_PR', 'VEHADUL_KR', 'VEHADUL_CYCLE', 'VEHADUL_WALK', 'VEHPER_CAD', 'PC',
980
+ 'TT_CADL1', 'TT_CADL2', 'TT_CAPL1', 'TT_CAPL2', 'TT_W2PTL1', 'TT_W2PTL2', 'TT_KRL1', 'TT_KRL2',
981
+ 'TT_PRL1', 'TT_PRL2', 'TT_CYCLEL1', 'TT_CYCLEL1',
982
+ 'TT_WALKL1', 'TT_WALKL2', 'TCPCL1', 'TCPCL2', 'WAT']
983
+ #varnames = ['TT', 'TC', 'TT_CAD', 'TT_CAP',
984
+ # 'TCPC', 'EMPDENS_PT', 'VEHADUL_CAD', 'VEHADUL_CAP', 'VEHADUL_W2PT',
985
+ # 'VEHADUL_PR', 'VEHADUL_CYCLE', 'VEHPER_CAD', 'PC',
986
+ # 'TT_CADL1', 'TT_CADL2', 'TT_CAPL1', 'TT_W2PTL1', 'TT_KRL2', 'TT_PRL2', 'TT_CYCLEL1',
987
+ # 'TT_WALKL1', 'TCPCL1', 'TCPCL2', 'WAT']
988
+
989
+ asvarnames = varnames
990
+ isvarnames = []
991
+
992
+ choice_id = df['TRIPID']
993
+ ind_id = df['TRIPID']
994
+ choices = df['Chosen_Mode'] # the df column name containing the choice variable
995
+ alt_var = df['alt'] # the df column name containing the alternative variable
996
+ base_alt = 'WALK' # Reference alternative
997
+ distr = ['n', 'u', 't'] # List of random distributions to select from
998
+ choice_set = ['CAD', 'CAP', 'W2PT', 'PR', 'KR', 'CYCLE', 'WALK']
999
+ criterions = [['bic',-1]]
1000
+ # criterions = [['loglik',1], ['mae',-1]]
1001
+
1002
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1003
+
1004
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
1005
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
1006
+ choices=choices,
1007
+ choice_id=choice_id, ind_id=ind_id, latent_class=False, allow_random=True,
1008
+ base_alt=base_alt,
1009
+ allow_bcvars=False, n_draws=200)
1010
+
1011
+ init_sol = None
1012
+ if init_sol is None:
1013
+ nb_crit = len(criterions)
1014
+ init_sol = Solution(nb_crit)
1015
+ init_sol.set_asvar(['TCPC', 'TT_CAD', 'VEHPER_CAD', 'TT_CAP', 'VEHADUL_CAP',
1016
+ 'TT_W2PT', 'EMPDENS_PT', 'VEHADUL_W2PT', 'TT_CYCLE', 'TT_WALK', 'VEHADUL_WALK'])
1017
+ init_sol_v = Search(parameters).evaluate_mnl(init_sol)
1018
+ init_sol['aic'] = float(init_sol_v[0])
1019
+ init_sol['loglik'] = init_sol_v[2]
1020
+ init_sol['bic'] = init_sol_v[1]
1021
+ init_sol['obj'] = [init_sol_v[1]]
1022
+ init_sol['loglik'] = init_sol_v[2]
1023
+ print(f'inital_solution{init_sol_v[1]}')
1024
+ #init_sol.set_randvar(['cl', 'tod', 'wk'], ['t', 't', 'u'])
1025
+
1026
+
1027
+ asvars = ['TCPC', 'TT_CAD', 'VEHPER_CAD', 'TT_CAP', 'VEHADUL_CAP',
1028
+ 'TT_W2PT', 'EMPDENS_PT', 'VEHADUL_W2PT', 'TT_CYCLE', 'TT_WALK', 'VEHADUL_WALK']
1029
+ isvars = []
1030
+ asc_ind = True
1031
+ randvars = {}
1032
+ bcvars = []
1033
+ corvars = []
1034
+ bctrans = False
1035
+ class_param_spec = None
1036
+ member_params_spec = None
1037
+ model = MultinomialLogit()
1038
+ #varnames = ['COST', 'TIME', 'HEADWAY', 'LUGGAGE_CAR', 'SEATS', 'AGE_TRAIN']
1039
+ varnames = asvars
1040
+ mnl = MultinomialLogit()
1041
+ mnl.setup(X=df[varnames], y=df['Chosen_Mode'], varnames=varnames,
1042
+ fit_intercept=True, alts=df['alt'], ids=ind_id,
1043
+ avail=None, base_alt='WALK', gtol=1e-04)
1044
+ mnl.fit()
1045
+
1046
+ #mnl.summarise()
1047
+
1048
+
1049
+
1050
+
1051
+
1052
+ sa_parms = {'ctrl': (100, 0.001, 1000,1),
1053
+ 'id_num': f'bstm'}
1054
+ call_siman(parameters, init_sol, **sa_parms)
1055
+ # call_thresold(parameters, init_sol)
1056
+ # call_parcopsa(parameters, init_sol)
1057
+
1058
+
1059
+ # }
1060
+
1061
+
1062
+ def MaaS_search(number_of_classes = 3, number_of_iterations = 1000, initial_iterations = 200, **kwargs):
1063
+
1064
+
1065
+ df = pd.read_csv('MassLong.csv')
1066
+ print('Running Latent Class Search')
1067
+
1068
+ varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
1069
+ 'Gender', 'Household', 'Education', 'Employment', 'WFH', 'Income',
1070
+ 'Follow-up', 'Residential', 'Technology',
1071
+ 'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
1072
+ 'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
1073
+ 'PT_averse', 'LGA_1', 'LGA_2', 'LGA_3', 'Age_1', 'Age_2', 'Age_3', 'Live_alone',
1074
+ 'Live_housemate', 'Fam_nokid', 'Fam_kid', 'Fam_singl', 'Full_time', 'Part_time',
1075
+ 'Casual', 'Home_duties', 'Unemployed', 'Full_student', 'Part_student', 'Retired',
1076
+ 'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4']
1077
+
1078
+ varnames = ['Price', 'PT', 'Rideshare', 'Ebike', 'Addon', 'Age',
1079
+ 'Gender', 'Household', 'Education', 'Employment', 'WFH', 'Income',
1080
+ 'Follow-up', 'Residential', 'Technology',
1081
+ 'Disability', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
1082
+ 'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
1083
+ 'PT_averse', 'LGA_1', 'LGA_3', 'Age_1', 'Age_3', 'Live_alone', 'Fam_nokid', 'Fam_kid', 'Full_time', 'Part_time',
1084
+ 'Casual', 'Home_duties', 'Unemployed', 'Full_student', 'Retired',
1085
+ 'Income_1', 'Income_2', 'Income_3']
1086
+
1087
+ '''Here we define the search options'''
1088
+ df_test = None
1089
+ asvarnames = varnames # alternative-specific variables in varnames
1090
+ isvarnames = [] # individual-specific variables in varnames
1091
+ unwanted_class = ['PT', 'Rideshare', 'Ebike', 'Addon', 'Age', 'Gender',
1092
+ 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
1093
+ 'D_walk', 'D_car', 'D_bike', 'Long_w_trips', 'Long_r_trips', 'Long_s_trips',
1094
+ 'Income_1', 'Income_2', 'Income_3', 'MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Live_alone', 'Unemployed']
1095
+ unwanted_member = ['MaaS_1', 'MaaS_2', 'MaaS_3', 'MaaS_4', 'Driving', 'Bike', 'Scooter', 'Multimode', 'Public_Transit',
1096
+ 'Price', 'PT'
1097
+ ]
1098
+ memvarnames = [name for name in varnames if name not in unwanted_member] # member-specific variables
1099
+ asvarnames = [name for name in varnames if name not in unwanted_class] # class-specific variables
1100
+ choice_id = df['CHID']
1101
+ ind_id = df['ID'] # I believe this is also panels
1102
+
1103
+ choices = df['CHOICE'] # the df column name containing the choice variable
1104
+ alt_var = df['alt'] # the df column name containing the alternative variable
1105
+ base_alt = None # Reference alternative
1106
+ distr = ['n', 'u', 't'] # List of random distributions to select fr choice_set = ['1', '2', '3', '4']
1107
+ choice_set = ['1', '2', '3', '4']
1108
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1109
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
1110
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
1111
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1112
+ if kwargs.get('multiobjective', 0):
1113
+ criterions = [['bic', -1], ['mae', -1]]
1114
+ else:
1115
+ criterions = [['bic', -1]]
1116
+
1117
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1118
+ # DEFINE PARAMETERS FOR THE SEARCH
1119
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1120
+
1121
+ latent_class = True # True
1122
+
1123
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
1124
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
1125
+ mem_vars=memvarnames, choices=choices,
1126
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=False,
1127
+ base_alt=base_alt,
1128
+ allow_bcvars=False, n_draws=200, min_classes=number_of_classes,
1129
+ max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=True,
1130
+ optimise_class=True, ftol_lccm=1e-5, ps_asvars = ['Price'])
1131
+
1132
+ # Setting up for fixed thetas
1133
+
1134
+
1135
+ parameters_2nd = parameters
1136
+ parameters_2nd.fixed_thetas = True
1137
+ # adding in asvars
1138
+ parameters_2nd.isvarnames = varnames
1139
+ parameters_2nd.ps_vars = ['Price']
1140
+ parameters_2nd.optimise_class = True # adding as true
1141
+
1142
+ parameters_3rd = parameters_2nd
1143
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1144
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
1145
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1146
+
1147
+ init_sol = None
1148
+
1149
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1150
+ # RUN THE SEARCH
1151
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1152
+ # ROB, I have added this in to add to your class organically. Optimize membership if true.
1153
+ # This will force all the class-specific effects to be the variable and only play around with class membership variables.
1154
+ # phase 1 optimise membership
1155
+ print(f"1st Phase, Optimize Membership")
1156
+ # TODO turn back on, just checking that this doesn't fall over
1157
+ #initial_iterations = 2
1158
+ sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes,
1159
+ 'optimise_membership': True, 'id_num': f'MaaS_c{number_of_classes}_p1'}
1160
+ # sa_parms = {'ctrl': (10, 0.001, 20, 1), 'max_classes': 4, 'min_classes': 3}
1161
+ best_member = call_siman(parameters, init_sol, **sa_parms)
1162
+ # TODO if perturb randvar, need to add it into one of the classes
1163
+ """Optimizing the betas, play around with only the classes"""
1164
+ print(f"2nd Phase, Optimize Classes")
1165
+ sa_parms = {'ctrl': (100, 0.001, number_of_iterations, 5), 'max_classes': number_of_classes,
1166
+ 'min_classes': number_of_classes, 'optimise_membership': False,
1167
+ 'optimise_class': True, 'fixed_solution': best_member, 'id_num': f'MaaS_c{number_of_classes}_p2'}
1168
+ #best_joint = call_harmony(parameters_2nd, best_member)
1169
+ best_joint = call_siman(parameters_2nd, best_member, **sa_parms)
1170
+ """Final Fit"""
1171
+ print(f"Final Phase")
1172
+ sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'MaaS_c{number_of_classes}_p3'}
1173
+ ''' Injecting the best joint solution to start'''
1174
+ final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
1175
+
1176
+ def ashkay_search(number_of_classes = 3, number_of_iterations = 1000, initial_iterations = 200, *args, **kwargs):
1177
+ max_time = kwargs.get('run_time', 60*60*12)
1178
+ df = pd.read_csv('akshay_long_true.csv')
1179
+
1180
+ df_test = None
1181
+ RUN_AKSHAY = 1
1182
+ if RUN_AKSHAY:
1183
+ print('testing against Akshays model')
1184
+ model = LatentClassModel()
1185
+ varnames = ['InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime', 'PartTime', 'Male',
1186
+ 'Children', 'Income', 'NDI',
1187
+ 'LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG',
1188
+ 'BikesharePayG',
1189
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1190
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'
1191
+ ]
1192
+
1193
+ X = df[varnames].values
1194
+ y = df['CHOICE'].values
1195
+ member_params_spec = np.array([['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
1196
+ 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
1197
+ ['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
1198
+ 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
1199
+ ['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
1200
+ 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
1201
+ ['_inter', 'InnerCity', 'InnerRegional', 'Under30', 'Over65', 'College', 'FullTime',
1202
+ 'PartTime', 'Male', 'Children', 'Income', 'NDI']],
1203
+ dtype='object')
1204
+
1205
+ class_params_spec = np.array(
1206
+ [['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1207
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1208
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1209
+ ['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1210
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1211
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1212
+ ['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1213
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1214
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1215
+ ['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1216
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1217
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1218
+ ['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1219
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1220
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers']],
1221
+ dtype='object')
1222
+
1223
+ init_class_thetas = np.array(
1224
+ [-1.321318, -0.254239, -0.137624, -9.159877, 0.009594, 1.189211, -0.084255, 0.437849, 0.222736, -2.338727,
1225
+ -0.220732, 0.206103,
1226
+ 0.293479, 0.17829, -0.293836, -0.499868, -0.336, 0.588949, 0.0357, 0.393709, -0.215125, -0.28694, -0.264146,
1227
+ -0.871409,
1228
+ -1.160788, 0.752398, -0.054771, 0.554518, -0.559022, 0.633359, -0.150176, 0.020715, -0.23028, 0.185878,
1229
+ -0.219888, -1.531753,
1230
+ -0.833134, -0.168312, -2.27768, 1.136705, 0.093996, 1.672507, 1.29167, 1.49679, 0.423603, 0.249344, -0.832107,
1231
+ -2.778636])
1232
+
1233
+ init_class_betas = [np.array([0.441269, 0.448334, 0.288787, 0.35502, 0.216816, 0.198564, 0.069477,
1234
+ 0.346543, 0.233089, 0.323059, 0.333928, 0.149546, 0.124614, 0.0443181,
1235
+ -0.00741137, 0.036144, -0.00298227, 0.140595, 0.046312]), # Class 1
1236
+ np.array([0.801542, 0.483616, 0.546757, 0.498264, 0.206961, 0.367382, 0.00124702,
1237
+ 0.587733, 0.398037, 0.5319, 0.369294, 0.246564, -0.100532, -0.141248,
1238
+ -0.019849, 0.038627, -0.104714, 0.173183, 0.0905047]), # Class 2
1239
+ np.array([1.28245, 0.704765, 0.8016, 0.145479, 0.340825, 0.554092, -0.0942558,
1240
+ 12.6054, 83.2791, 27.7743, -14.1763, 26.7106, 21.6308, -2.87297,
1241
+ -32.6663, 0.528885, 0.375195, 0.367734, 0.343927]), # Class 3
1242
+ np.array([1.18916, 0.562234, 0.58024, -0.00850272, 0.122827, 0.619118, 0.0330975,
1243
+ 0.970455, 0.24954, 0.698946, 0.172871, 0.64793, -0.395843, 0.00472563,
1244
+ -0.425557, 0.157351, 0.0453663, 0.194574, 0.0677801]), # Class 4
1245
+ np.array([0, 0, 0, 0, 0, 0, 0,
1246
+ 0, 0, 0, 0, 0, 0, 0,
1247
+ 0, 0, 0, 0, 0])] # Class 5
1248
+
1249
+
1250
+
1251
+ model.setup(X, y, ids=df['CHID'], panels=df['indID'],
1252
+ varnames=varnames,
1253
+ num_classes=5,
1254
+ class_params_spec=class_params_spec,
1255
+ member_params_spec=member_params_spec,
1256
+ init_class_thetas=init_class_thetas,
1257
+ init_class_betas=init_class_betas,
1258
+ alts=[1, 2],
1259
+ ftol_lccm=1e-2,
1260
+ gtol=1e-3,
1261
+ # verbose = 2
1262
+ )
1263
+ model.fit()
1264
+ model.summarise()
1265
+ print('completed Ashkays model')
1266
+
1267
+
1268
+
1269
+
1270
+
1271
+ from latent_class_constrained import LatentClassConstrained
1272
+
1273
+ # Example Usage
1274
+ # Initialize the LatentClasses object with 3 latent classes
1275
+ latent_classes = LatentClassConstrained(num_classes=number_of_classes)
1276
+
1277
+ # Populate data for latent_class_1
1278
+ latent_classes.populate_class(
1279
+ "latent_class_1",
1280
+ asvar=['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1281
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1282
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1283
+ isvars=[],
1284
+ randvars=[],
1285
+ memvars=[], #cant have a membership here
1286
+ req_asvar=["Cost", "BikeshareUnl", "CarshareUnl", "RideshareUnl"],
1287
+ req_isvars=[],
1288
+ req_randvars=[],
1289
+ req_memvars=[], #cant have a membership here
1290
+ )
1291
+
1292
+ # Populate data for latent_class_2
1293
+ latent_classes.populate_class(
1294
+ "latent_class_2",
1295
+ asvar=['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1296
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1297
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1298
+ isvars=[],
1299
+ randvars=[],
1300
+ memvars= ['InnerCity', 'InnerRegional', 'Under30', 'College', 'FullTime',
1301
+ 'PartTime', 'Male', 'Children', 'Income', 'NDI'],
1302
+ req_asvar=['Cost', 'TaxiPayG', 'CarRentalPayG'],
1303
+ req_isvars=[],
1304
+ req_randvars=[],
1305
+ req_memvars=['_inter', 'Male', 'FullTime']
1306
+ )
1307
+
1308
+ latent_classes.populate_class(
1309
+ "latent_class_3",
1310
+ asvar=['LocalPTPayG', 'LDPTPayG', 'TaxiPayG', 'CarRentalPayG', 'CarsharePayG', 'RidesharePayG', 'BikesharePayG',
1311
+ 'LocalPTUnl', 'LDPTUnl', 'TaxiUnl', 'CarRentalUnl', 'CarshareUnl', 'RideshareUnl', 'BikeshareUnl',
1312
+ 'Cost', 'TktInt', 'BkInt', 'RTInf', 'Pers'],
1313
+ isvars=[],
1314
+ randvars=[],
1315
+ memvars= ['InnerCity', 'InnerRegional', 'Under30', 'College',
1316
+ 'PartTime', 'Income'],
1317
+ req_asvar=['Cost'],
1318
+ req_isvars=[],
1319
+ req_randvars=[],
1320
+ req_memvars=['_inter', 'PartTime', 'College']
1321
+ )
1322
+ # Retrieve and print data for latent_class_1
1323
+ print("Latent Class 1 Data:")
1324
+ print(latent_classes.get_class("latent_class_1"))
1325
+
1326
+ # Retrieve and print all latent classes
1327
+ print("\nAll Latent Classes:")
1328
+ import pprint
1329
+ pprint.pprint(latent_classes.get_all_classes())
1330
+
1331
+
1332
+ varnames_gbl = latent_classes.get_global_asvars_randvars()
1333
+ gbl_asvars = varnames_gbl['asvars']
1334
+ gbl_isvars = varnames_gbl['isvars']
1335
+ #gbl_asvars = varnames_gbl['isvars']
1336
+ gbl_memvars = varnames_gbl['memvars']
1337
+ varnames = list(set(gbl_asvars + gbl_isvars +gbl_isvars+gbl_memvars))
1338
+
1339
+
1340
+
1341
+
1342
+
1343
+ print('Running Latent Class Search')
1344
+ model = LatentClassModel()
1345
+
1346
+
1347
+ X = df[varnames].values
1348
+ y = df['CHOICE'].values
1349
+
1350
+
1351
+ '''Here we define the search options'''
1352
+
1353
+ asvarnames = gbl_asvars # class-specific variables
1354
+ isvarnames = gbl_isvars # class-ind specific variables
1355
+ memvarnames = gbl_memvars # class mem specific variables
1356
+
1357
+ choice_id = df['CHID']
1358
+ ind_id = df['indID'] #I believe this is also panels
1359
+
1360
+ choices = df['CHOICE'] # the df column name containing the choice variable
1361
+ alt_var = df['alt'] # the df column name containing the alternative variable
1362
+ base_alt = None # Reference alternative
1363
+ distr = ['n', 'u', 't'] # List of random distributions to select from
1364
+ choice_set = ['1', '2', '3', '4']
1365
+
1366
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1367
+ # CHOOSE SINGLE OBJECTIVE OR MULTI-OBJECTIVE
1368
+ # SET KPI AND SIGN (I.E. TUPLE) AND PLACE IN LIST
1369
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1370
+
1371
+ criterions = [['bic', -1]]
1372
+
1373
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1374
+ # DEFINE PARAMETERS FOR THE SEARCH
1375
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1376
+
1377
+ latent_class = True # True
1378
+
1379
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
1380
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
1381
+ mem_vars=memvarnames, choices=choices,
1382
+ choice_id=choice_id, ind_id=ind_id, latent_class=latent_class, allow_random=True,
1383
+ base_alt=base_alt,
1384
+ allow_bcvars=False, n_draws=200, min_classes=number_of_classes, max_classes=number_of_classes, num_classes=number_of_classes, ps_intercept=False,
1385
+ optimise_class=True, ftol_lccm=1e-4, LCR = latent_classes)
1386
+
1387
+ # Setting up for fixed thetas
1388
+ parameters_2nd = copy.deepcopy(parameters)
1389
+ parameters_2nd.fixed_thetas = True
1390
+ # adding in asvars
1391
+ parameters_2nd.isvarnames = asvarnames
1392
+ parameters_2nd.optimise_class = True # adding as true
1393
+
1394
+ parameters_3rd = copy.deepcopy(parameters_2nd)
1395
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1396
+ # DEFINE THE STARTING SOLUTION - NEW FEATURE WORTH CONSIDERING
1397
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1398
+
1399
+ init_sol = None
1400
+
1401
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1402
+ # RUN THE SEARCH
1403
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1404
+ # ROB, I have added this in to add to your class organically. Optimize membership if true.
1405
+ # This will force all the class-specific effects to be the variable and only play around with class membership variables.
1406
+ # phase 1 optimise membership
1407
+ print(f"1st Phase, Optimize Membership")
1408
+ #TODO turn back on, just checking that this doesn't fall over
1409
+ sa_parms = {'ctrl': (10, 0.001, initial_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'optimise_membership': True, 'id_num': f'Ashkay_c{number_of_classes}_p1'}
1410
+ #sa_parms = {'ctrl': (10, 0.001, 20, 1), 'max_classes': 4, 'min_classes': 3}
1411
+
1412
+ best_member = call_siman(parameters, init_sol, **sa_parms)
1413
+ #TODO if perturb randvar, need to add it into one of the classes
1414
+ """Optimizing the betas, play around with only the classes"""
1415
+ print(f"2nd Phase, Optimize Classes")
1416
+ sa_parms = {'ctrl': (10, 0.001, number_of_iterations, 2), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'optimise_membership': False,
1417
+ 'optimise_class': True, 'fixed_solution': best_member, 'id_num': f'Ashkay_c{number_of_classes}_p2'}
1418
+ best_joint = call_siman(parameters_2nd, best_member, **sa_parms)
1419
+ """Final Fit"""
1420
+ print(f"Final Phase")
1421
+ sa_parms = {'ctrl': (10, 0.001, 5, 1), 'max_classes': number_of_classes, 'min_classes': number_of_classes, 'id_num': f'Ashkay_c{number_of_classes}_p3'}
1422
+ ''' Injecting the best joint solution to start'''
1423
+ final_sol = call_siman(parameters_3rd, best_joint, **sa_parms)
1424
+
1425
+
1426
+
1427
+ ''' ----------------------------------------------------------- '''
1428
+ ''' SCRIPT. Testing mixed logit with correlated vars '''
1429
+ ''' ----------------------------------------------------------- '''
1430
+ def fit_electricity_mxl():
1431
+ # {
1432
+ model = MixedLogit()
1433
+ try:
1434
+ df = pd.read_csv("electricity.csv")
1435
+ except:
1436
+ df = pd.read_csv("data/electricity.csv")
1437
+ varnames = ['pf', 'cl', 'loc', 'wk', 'tod', 'seas']
1438
+ isvars = ['seas']
1439
+ X = df[varnames].values
1440
+ y = df['choice'].values
1441
+ transvars = []
1442
+ randvars = {'pf': 'n', 'cl': 'n', 'loc': 'n', 'wk': 'n', 'tod': 'n'}
1443
+ #correlated_vars = True
1444
+ correlated_vars = ['pf', 'wk'] # Optional
1445
+ model.setup(X, y, ids=df['chid'].values, panels=df['id'].values, varnames=varnames,
1446
+ isvars=isvars, transvars=transvars, correlated_vars=correlated_vars, randvars=randvars,
1447
+ fit_intercept=False, alts=df['alt'], n_draws=200, mnl_init=True)
1448
+ model.fit()
1449
+ model.get_loglik_null()
1450
+ model.summarise()
1451
+ # }
1452
+
1453
+ def optimise_synth_1a():
1454
+ print('file')
1455
+ current_directory = os.getcwd()
1456
+
1457
+ # Print the current working directory
1458
+ print("Current Working Directory:", current_directory)
1459
+ df = pd.read_csv("data/artificial_1a_multi_many.csv")
1460
+
1461
+ df_test = None
1462
+
1463
+ asvarnames = ['added_fixed1', 'added_fixed2', 'added_fixed3',
1464
+
1465
+ 'added_fixed4', 'added_fixed5', 'added_fixed6', 'added_fixed7',
1466
+
1467
+ 'added_fixed8', 'added_fixed9', 'added_fixed10', 'nonsig1', 'nonsig2',
1468
+
1469
+ 'nonsig3', 'nonsig4', 'nonsig5',
1470
+
1471
+ 'cat_var1', 'cat_var2', 'cat_var3']
1472
+
1473
+ isvarnames = ['added_isvar1', 'added_isvar2']
1474
+
1475
+ varnames = asvarnames + isvarnames
1476
+
1477
+ choice_id = df['id']
1478
+
1479
+ ind_id = None
1480
+
1481
+ choices = df['choice'] # the df column name containing the choice variable
1482
+
1483
+ alt_var = df['alt'] # the df column name containing the alternative variable
1484
+
1485
+ base_alt = None # Reference alternative
1486
+
1487
+ distr = ['n', 'u', 't'] # List of random distributions to select from
1488
+
1489
+ choice_set = ['1', '2', '3']
1490
+
1491
+ criterions = [['bic', 1]]
1492
+
1493
+ # criterions = [['loglik',1], ['mae',-1]]
1494
+
1495
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1496
+
1497
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
1498
+
1499
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
1500
+ choices=choices,
1501
+
1502
+ choice_id=choice_id, ind_id=ind_id, latent_class=False, allow_random=True,
1503
+ base_alt=base_alt,
1504
+
1505
+ allow_bcvars=False, n_draws=200, verbose = True)
1506
+
1507
+ init_sol = None
1508
+
1509
+ call_siman(parameters, init_sol)
1510
+
1511
+ # call_thresold(parameters, init_sol)
1512
+
1513
+ # call_parcopsa(parameters, init_sol)
1514
+
1515
+ # call_harmony(parameters, init_sol)
1516
+
1517
+
1518
+ def estimate_init_mnls():
1519
+ # {
1520
+ current_directory = os.getcwd()
1521
+ print(f'current directory is {current_directory}')
1522
+ df = pd.read_csv("artificial_latent_new.csv")
1523
+ df_test = None
1524
+ asvarnames = ['price', 'time', 'conven', 'comfort', 'meals', 'petfr', 'emipp','nonsig1', 'nonsig2', 'nonsig3', 'nonsig4', 'nonsig5']
1525
+ isvarnames = ['income', 'age','nonsig_isvar1', 'nonsig_isvar2']
1526
+ varnames = asvarnames + isvarnames
1527
+
1528
+ choice_id = df['id']
1529
+ ind_id = None
1530
+ choices = df['choice'] # the df column name containing the choice variable
1531
+ alt_var = df['alt'] # the df column name containing the alternative variable
1532
+ base_alt = None # Reference alternative
1533
+ distr = ['n', 'u', 't'] # List of random distributions to select from
1534
+ choice_set = ['1', '2', '3']
1535
+ criterions = [['bic',1]]
1536
+ #criterions = [['loglik',1], ['mae',-1]]
1537
+
1538
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1539
+
1540
+ parameters = Parameters(criterions=criterions, df=df, distr=distr, df_test=df_test, choice_set=choice_set,
1541
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames, choices=choices,
1542
+ choice_id=choice_id, ind_id=ind_id, latent_class=False, allow_random=True, base_alt=base_alt,
1543
+ allow_bcvars=False,allow_corvars=True, n_draws=200)
1544
+
1545
+ init_sol = None
1546
+
1547
+ call_siman(parameters, init_sol)
1548
+ #call_thresold(parameters, init_sol)
1549
+ #call_parcopsa(parameters, init_sol)
1550
+ #call_harmony(parameters, init_sol)
1551
+
1552
+ # }
1553
+ def optimise_orderered():
1554
+ from ordered_logit_multinomial import OrderedLogitML
1555
+ from ordered_logit import OrderedLogitLong
1556
+
1557
+ print('optimising ordered')
1558
+ df = pd.read_csv("ord_log_data/diamonds.csv")
1559
+ #df = pd.read_csv('./diamonds.csv')
1560
+
1561
+ color = ['D', 'E', 'F', 'G', 'H', 'I', 'J']
1562
+ df['color'] = pd.Categorical(df['color'], categories=color, ordered=True)
1563
+ df['color'] = df['color'].cat.codes
1564
+
1565
+ clarity = ['I1', 'SI1', 'SI2', 'VS1', 'VS2', 'VVS1', 'VVS2']
1566
+ df['clarity'] = pd.Categorical(df['clarity'], categories=clarity, ordered=True)
1567
+ df['clarity'] = df['clarity'].cat.codes
1568
+
1569
+ df['vol'] = np.array(df['x'] * df['y'] * df['z'])
1570
+
1571
+ cut = ['Fair', 'Good', 'Ideal', 'Premium', 'Very Good']
1572
+ df['cut'] = pd.Categorical(df['cut'], categories=cut, ordered=True)
1573
+ df['cut_int'] = df['cut'].cat.codes # Values in {0,1,2,3,4}
1574
+ cut_value = np.unique(df['cut'].values) # Values in {0,1,2,3,4}
1575
+ #df.to_csv("diamond_converted.csv", index=False) # Log revised data to csv file
1576
+
1577
+ X = df[['carat', 'vol', 'price']] # Independent variables
1578
+ #X = df[['carat', 'color', 'clarity', 'depth', 'table', 'price', 'vol']] # Other Independent variables
1579
+ y = df['cut_int'] # Dependent variable
1580
+ ncat = 5
1581
+ # ORDLOG(X, y, ncat, start=None, normalize=True, fit_intercept=False)
1582
+ FIT = 'fit ignore' #'fit robs' 'fit stats
1583
+ if FIT == 'fit robs':
1584
+ mod = OrderedLogit(X=X, y=y, J=ncat, distr='logit', start=None, normalize=False, fit_intercept=False)
1585
+ mod.fit()
1586
+ mod.report()
1587
+ elif FIT == 'fit stats':
1588
+ import statsmodels.api as sm
1589
+ from statsmodels.miscmodels.ordinal_model import OrderedModel
1590
+ model = OrderedModel(y, X, distr ='logit')
1591
+ result = model.fit()
1592
+
1593
+ # Display the results
1594
+ print(result.summary())
1595
+ print('finished ordered logit')
1596
+ num_of_thresholds = 4
1597
+ print(model.transform_threshold_params(result.params[-num_of_thresholds:]))
1598
+
1599
+ print('now do a multinomial logit fit trying to get in the ordered logit')
1600
+ df['ids'] = np.arange(len(df))
1601
+ df_long = misc.wide_to_long(df, id_col = 'ids', alt_list = cut, alt_name = 'alt')
1602
+ #add the choice variable
1603
+ df_long['choice'] = df_long['cut'] == df_long['alt']
1604
+ varnames = ['vol']
1605
+
1606
+
1607
+ y = df_long['choice'].values
1608
+ #df_long['vol_Ideal'] = df_long['vol'] * (df_long['alt'] == 'Fair')
1609
+ #df_long['price_Ideal'] = df_long['price'] * (df_long['alt'] == 'Fair')
1610
+ #df_long['carat_Ideal'] = df_long['carat'] * (df_long['alt'] == 'Fair')
1611
+ df_long['ones'] = 1
1612
+ #df_long.loc[~df_long['choice'], ['vol', 'price', 'carat']] =0
1613
+
1614
+
1615
+ #the alternative specific variables
1616
+ alt_var = df_long['alt'].values
1617
+
1618
+
1619
+ X = df_long[varnames].values
1620
+ #from sklearn.preprocessing import StandardScaler
1621
+ #X = np.standardize(X, axis=0, with_mean=True, with_std=True)
1622
+ #scaler = StandardScaler()
1623
+ #X = scaler.fit_transform(X)
1624
+ isvars = []
1625
+ transvars = []
1626
+ ids = df_long['ids']
1627
+ varnames = ['carat', 'vol', 'price']
1628
+ isvars = []
1629
+ X = df_long[varnames].values
1630
+
1631
+ print('long form implementation of the ordered logit')
1632
+ if FIT == 'fit long zeke':
1633
+ moll = OrderedLogitLong(X=X,
1634
+ y=y,
1635
+ varnames = varnames,
1636
+ ids=ids,
1637
+ J=ncat,
1638
+ distr='logit',
1639
+ start=None,
1640
+ normalize=False,
1641
+ fit_intercept=False)
1642
+ # moll.setup(varnames=varnames)
1643
+
1644
+ # Fit the model
1645
+
1646
+ #moll.setup(X=X, y=y, ids=ids, varnames=varnames, isvars=isvars, alts=alt_var, fit_intercept=False)
1647
+ moll.fit(method = 'BFGS')
1648
+ moll.report()
1649
+
1650
+ print('now I want to do OrderedLogitMixed')
1651
+
1652
+ print('long form implementation of the ordered logit')
1653
+ randvars = {'carat': 'n', 'vol': 'n'}
1654
+ mol = MixedOrderedLogit(X=X,
1655
+ y=y,
1656
+ varnames = varnames,
1657
+ ids=ids,
1658
+ J=ncat,
1659
+ alts = alt_var,
1660
+ randvars = randvars,
1661
+ distr='logit',
1662
+ start=None,
1663
+ normalize=False,
1664
+ fit_intercept=False)
1665
+ mol.fit()
1666
+ mol.report()
1667
+ print('success')
1668
+ #mol.setup(X=X, y=y, ids=ids, varnames=varnames, isvars=isvars, alts=alt_var, fit_intercept=False)
1669
+
1670
+
1671
+
1672
+ def Medhi():
1673
+ print('test')
1674
+ df = pd.read_csv("dummy_parking.csv")
1675
+
1676
+ choice_id = df['CHID']
1677
+ ind_id = df['ID']
1678
+ base_varnames = ['Automatic', 'ParkMeter', 'Price',
1679
+ 'No_info',
1680
+ 'Tap',
1681
+ 'No_Remind'] # all explanatory variables to be included in the model #'Gender','Age', 'Education','Income','Drv_Exp','Drv_Frq','Prk_Frq'
1682
+ base_asvarnames = base_varnames # alternative-specific variables in varnames
1683
+ base_isvarnames = [] # individual-specific variables in varnames
1684
+ choice_set = ['1', '2', '3'] # list of alternatives in the choice set
1685
+
1686
+ base_rvars = {'No_info': 'n', 'ParkMeter': 'n', 'No_Remind': 'n'
1687
+
1688
+ }
1689
+
1690
+ choice_var = df['Choice'] # the df column name containing the choice variable
1691
+ alt_var = df['ALT'] # the df column name containing the alternative variable
1692
+ base_intercept = True # if intercept needs to be estimated or not (default is False)
1693
+ av = None # the df column name containing the alternatives' availability
1694
+ weight_var = None # the df column name containing the weights
1695
+ base = None # reference alternative
1696
+
1697
+ model = MultinomialLogit()
1698
+ model.setup(X=df[base_varnames], y=choice_var, isvars=base_isvarnames, varnames=base_varnames, alts=alt_var,
1699
+ ids=choice_id, avail=av, fit_intercept=False, base_alt=base)
1700
+ model.fit()
1701
+ model.summarise()
1702
+
1703
+ model_n = MixedLogit()
1704
+ model_n.setup(X=df[base_varnames], y=choice_var, varnames=base_varnames, alts=alt_var, isvars=base_isvarnames, ids = choice_id, panels = ind_id, avail = av, randvars = base_rvars, n_draws = 200, halton = True) # ,init_coeff=np.repeat(.1, 11))
1705
+ model_n.fit()
1706
+ model_n.summarise()
1707
+
1708
+ def Mario():
1709
+ df = pd.read_csv("https://raw.githubusercontent.com/arteagac/xlogit/master/examples/data/electricity_long.csv")
1710
+
1711
+ print(df.shape)
1712
+ varnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
1713
+ choice_set = np.unique(df['alt'])
1714
+ asvarnames = ["pf", "cl", "loc", "wk", "tod", "seas"]
1715
+ isvarnames = []
1716
+ choice_id = df['id']
1717
+ ind_id = df['id']
1718
+ choices = df['choice'] # the df column name containing the choice variable
1719
+ alt_var = df['alt'] # the df column name containing the alternative variable
1720
+ base_alt = None # Reference alternative
1721
+ distr = ['n', 'u', 't', 'tn'] # List of random distributions to select from
1722
+ criterions = [['bic', -1]]
1723
+ parameters = Parameters(criterions=criterions, df=df, choice_set=choice_set, choice_id=choice_id,
1724
+ alt_var=alt_var, varnames=varnames, isvarnames=isvarnames, asvarnames=asvarnames,
1725
+ choices=choices,
1726
+ ind_id=ind_id, base_alt=base_alt, allow_random=True, allow_corvars=False, allow_bcvars=True,
1727
+ latent_class=False, allow_latent_random=False, allow_latent_bcvars=False, pst_intercept = True, n_draws=200)
1728
+ init_sol = None
1729
+
1730
+ search = call_siman(parameters, init_sol)
1731
+
1732
+
1733
+
1734
+ def RRM_f():
1735
+ print('RRM Search')
1736
+ from rrm import RandomRegret
1737
+ df = pd.read_csv("rrm_cran_2016_long.csv")
1738
+ mod = RandomRegret(df=df, short=False, normalize=True)
1739
+ mod.fit()
1740
+ mod.report()
1741
+ #RRM(df, False) # short = False
1742
+
1743
+
1744
+
1745
+ def main(args):
1746
+ Mario()
1747
+ #optimise_synth_1a()
1748
+ #Medhi()
1749
+ #estimate_init_mnls()
1750
+ #fit_mnl_example() # Runs 0.1-0.2
1751
+ RRM_f()
1752
+ np.random.seed(100) # THIS SEED CAUSES THE EXCEPTION.
1753
+ optimise_orderered()
1754
+ exit()
1755
+ #fit_electricity_mxl()
1756
+ optimise_electricity()
1757
+ #optimise_synth_latent(args.index)
1758
+ #true_model_1a()
1759
+ # true_model_mxl_1a()
1760
+ #optimise_synth_1a() # Runs 0.1-0.2s
1761
+
1762
+
1763
+ # Replace the following with the specific function you want to run
1764
+ #ashkay_search(args.num_classes)
1765
+ #fit_lc_example()
1766
+ #fit_lcm_example()
1767
+
1768
+ # Call other functions based on the arguments
1769
+ if args.model_run_item == 1:
1770
+ print(f'running askay with {args.num_classes}')
1771
+ ashkay_search(args.num_classes, args.iterations, args.iterations_i,**vars(args))
1772
+ elif args.model_run_item == 2:
1773
+ print(f'running laten with {args.num_classes}')
1774
+ optimise_latent_3_phase_search(args.num_classes, args.iterations, args.iterations_i)
1775
+ elif args.model_run_item == 3:
1776
+ print(f'running MaaS with {args.num_classes}')
1777
+ MaaS_search(args.num_classes, args.iterations, args.iterations_i, **vars(args))
1778
+ elif args.model_run_item == 4:
1779
+ print(f'running Swiss with {args.num_classes}')
1780
+ optimise_latent_swiss(args.num_classes, args.iterations, args.iterations_i)
1781
+ elif args.model_run_item == 5:
1782
+ print('Model Estimation: Non Latent')
1783
+ optimise_bstm()
1784
+ #Non_Latent_Search_Template()
1785
+ elif args.model_run_item == 6:
1786
+ print('exiting code')
1787
+ exit()
1788
+ else:
1789
+ ashkay_search(args.num_classes)
1790
+ print('Finished...')
1791
+
1792
+ '''' ---------------------------------------------------------- '''
1793
+ ''' MAIN PROGRAM '''
1794
+ ''' ----------------------------------------------------------- '''
1795
+
1796
+ if __name__ == '__main__':
1797
+ # {
1798
+ #np.random.seed(int(time.time()))
1799
+ parser = argparse.ArgumentParser(description='Script for model fitting and optimization.')
1800
+ parser.add_argument('--seed', type= int, default=1, help='Random seed for reproducibilityr -rf .git/modules')
1801
+ parser.add_argument('--optimise', action='store_true', help='Run optimization functions')
1802
+ parser.add_argument('--index', type = int, default=0, help='Index for the covering arrays')
1803
+ parser.add_argument('--multiobjective', default=0, help='single or multiobjective search')
1804
+ parser.add_argument('--num_classes', type = int, default=3, help='Number of latent classes')
1805
+ parser.add_argument('--model_run_item', type = int, default=6, help= 'run which dataset')
1806
+ parser.add_argument('--iterations', type= int, default= 2000, help = 'max number of iterations')
1807
+ parser.add_argument('--iterations_i', type= int, default= 50, help = 'first phase number of iterations')
1808
+ parser.add_argument('--run_time', type = int, default = 60000*60*4, help = 'termination of run with respect to time in seconds.')
1809
+
1810
+ args = parser.parse_args()
1811
+ main(args)
1812
+
1813
+
1814
+
1815
+ #np.random.seed(1)
1816
+
1817
+ # Testing model fitting:
1818
+ #fit_mnl_example() # Originally ran in 0.1-0.2s
1819
+ #fit_mnl_box_example() # Originally ran in 1s
1820
+ #fit_mxl_example() # Originally ran in about 12s +- 3s
1821
+ #fit_mxl_box_example() # Originally ran in about 20s
1822
+ #fit_lc_example() # Originally ran in about 6s +- 2s
1823
+ #synth_3()
1824
+ #fit_lcm_example() # Originally ran in about 160s + 30s
1825
+ #fit_electricity_mxl()
1826
+
1827
+ # Optimisation:
1828
+
1829
+
1830
+ #ashkay_search()
1831
+
1832
+ #optimise_electricity()
1833
+ #optimise_latent_3_phase_search()
1834
+ #ashkay_search()
1835
+ #optimise()
1836
+ #run_latent_class_mixed()
1837
+ #print('this is for testing')
1838
+ #latent_synth_4()
1839
+ #print('this is for searching for the model')
1840
+ #optimise_latent_3_phase_search()
1841
+ #optimise_electricity()
1842
+ #optimise_synth_latent()
1843
+
1844
+
1845
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1846
+ # DEBUGGING PARETO FRONT GENERATION
1847
+ '''soln = [{'obj1': 45, 'obj2':2}, {'obj1': 64, 'obj2':8}, {'obj1': 21, 'obj2':2},
1848
+ {'obj1': 88, 'obj2':7}, {'obj1': 13, 'obj2':5}, {'obj1': 36, 'obj2':5}, {'obj1': 83, 'obj2':1},
1849
+ {'obj1': 39, 'obj2':10}, {'obj1': 45, 'obj2':10}, {'obj1': 60, 'obj2':9}]
1850
+ fronts = rank_solutions(soln, 'obj1', 'obj2')
1851
+ print("Fronts=",fronts)
1852
+ crowd = {}
1853
+ key = 'obj2'
1854
+ max_val = max(soln[i][key] for i in range(len(soln))) # Compute max value of objective 'key'
1855
+ min_val = min(soln[i][key] for i in range(len(soln))) # Compute min value of objective 'key'
1856
+ for front in fronts.values():
1857
+ compute_crowding_dist_front(front, soln, crowd, key, max_val, min_val)
1858
+ #print(crowd)
1859
+
1860
+ sorted = sort_solutions(fronts, crowd, soln)
1861
+ print(sorted)
1862
+ '''
1863
+ # }
1864
+
1865
+ # RULES:
1866
+ # --------------------------------------------------------------------------
1867
+ """
1868
+ 1. A variable cannot be an isvar and asvar simultaneously.
1869
+ 2. An isvar or asvar can be a random variable – I don’t understand this?
1870
+ 3. An isvar cannot be a randvar
1871
+ 4. A bcvar cannot be a corvar at the same time
1872
+ 5. corvar should be a list of at least 2 randvars
1873
+ 6. num_classes (Q) should be > 1, for estimating latent class models
1874
+ 7. length of member_params_spec should be == Q-1
1875
+ 8. length of class_params_spec should be == Q
1876
+ 9. coefficients for member_params_spec cannot be in randvars
1877
+
1878
+
1879
+ Randvars are required for MixedLogit models!
1880
+ """