SearchLibrium 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1363 @@
1
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2
+ IMPLEMENTATION: BASE CLASS FOR LOGIT MODELS
3
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
4
+
5
+ """
6
+ BACKGROUND - Choice Modelling
7
+
8
+ ISVARS: In choice modeling, an individual-specific variable refers to a characteristic or attribute
9
+ of an individual that is specific to that individual and influences their decision-making process.
10
+ These variables are typically included in choice models to capture heterogeneity or differences
11
+ among individuals in their preferences or behavior.
12
+
13
+ Individual-specific variables are used in choice modeling to account for variations in preferences
14
+ or behaviors that cannot be explained solely by the attributes of the alternatives being considered.
15
+
16
+ ASVARS: In choice modeling, an alternative-specific variable refers to a characteristic or attribute
17
+ of a specific alternative that may influence the decision-making process of individuals when choosing
18
+ among available options. These variables are included in choice models to capture the effects of
19
+ attributes that vary across alternatives and affect individuals' preferences for those alternatives.
20
+
21
+ Examples of alternative-specific variables in choice modeling may include attributes such as price,
22
+ brand, product features, location, or availability. These variables can represent both observable
23
+ characteristics of alternatives (e.g., price) and unobservable characteristics that may influence
24
+ preferences (e.g., brand reputation).
25
+
26
+ """""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
27
+
28
+ ''' ----------------------------------------------------------- '''
29
+ ''' MAIN PARAMETERS: '''
30
+ ''' ----------------------------------------------------------- '''
31
+ # N : Number of choice situations
32
+ # P : Number of observations per panels
33
+ # J : Number of alternatives
34
+ # K : Number of variables (Kf: fixed, non-trans, Kr: random, non-trans,
35
+ # Kftrans: fixed, trans, Krtrans: random, trans)
36
+
37
+ ''' ---------------------------------------------------------- '''
38
+ ''' UNUSED LIBRARIES '''
39
+ ''' ---------------------------------------------------------- '''
40
+ #import logging
41
+
42
+ ''' ---------------------------------------------------------- '''
43
+ ''' LIBRARIES '''
44
+ ''' ---------------------------------------------------------- '''
45
+ import warnings
46
+ from abc import ABC #, abstractmethod
47
+ from time import time
48
+ import numpy as np
49
+ import pandas as pd
50
+ import scipy.stats as ss
51
+
52
+
53
+
54
+ try:
55
+ from .boxcox_functions import boxcox_param_deriv, boxcox_transformation, truncate, truncate_lower
56
+ from ._device import device as dev
57
+
58
+ except ImportError:
59
+ from boxcox_functions import boxcox_param_deriv, boxcox_transformation, truncate, truncate_lower
60
+ from _device import device as dev
61
+
62
+
63
+ #library for keeping track of variables
64
+ #from watchpoints import watch
65
+
66
+ ''' ---------------------------------------------------------- '''
67
+ ''' CLASS FOR ESTIMATION OF DISCRETE CHOICE MODEL '''
68
+ ''' ---------------------------------------------------------- '''
69
+ class DiscreteChoiceModel(ABC):
70
+ # {
71
+ """ Docstring """
72
+
73
+ # ===================
74
+ # CLASS PARAMETERS
75
+ # ===================
76
+
77
+ """
78
+ coeff_est: Coefficient estimates
79
+ """
80
+
81
+ # ===================
82
+ # CLASS FUNCTIONS
83
+ # ===================
84
+
85
+ """
86
+ 1. void __init__(self);
87
+ 2. void fit(self);
88
+ 3. void reset_attributes(self);
89
+ 4. set_asarray(self, X, y, varnames, alts, isvars, transvars, ids, weights, panels, avail);
90
+ 5. void pre_process(self, alts, varnames, isvars, transvars, base_alt, fit_intercept, transformation,
91
+ maxiter, panels=None, correlated_vars=None, randvars=None);
92
+ 6. void post_process(self, result, coeff_names, sample_size, hess_inv=None);
93
+ 7. X, names <-- setup_design_matrix(self, X);
94
+ 8. check_long_format_consistency(self, ids, alts, sorted_idx);
95
+ 9. X, y, panels <-- arrange_long_format(self, X, y, ids, alts, panels=None);
96
+ 10. void validate_inputs(self, X, y, alts, varnames, isvars, ids, weights,
97
+ panels, base_alt, fit_intercept, maxiter);
98
+ 11. loglik <-- get_loglik_null(self);
99
+ 12. void summarise(self);
100
+ 13. void print_matrix(self, str_mat, descr);
101
+ 14. str_mat <-- setup_print(self, mat);
102
+ 15. void print_mat(self, mat, descr);
103
+ 16. pch_res <-- fitted(self, type="parameters");
104
+ 17. void print_stdev(self, stdevs, names);
105
+ 18. void compute_stddev(self);
106
+ """
107
+
108
+ ''' ---------------------------------------------------------- '''
109
+ ''' Function '''
110
+ ''' ---------------------------------------------------------- '''
111
+ def reset_attributes(self): # {
112
+ self.coeff_names, self.coeff_est = None, None
113
+ self.stderr, self.zvalues = None, None
114
+ self.pvalues, self.loglik = None, None
115
+ self.total_fun_eval = 0
116
+ # }
117
+
118
+ ''' ---------------------------------------------------------- '''
119
+ ''' Function '''
120
+ ''' ---------------------------------------------------------- '''
121
+ def __init__(self):
122
+ # {
123
+ self.is_latent_class = False
124
+ self.reset_attributes()
125
+
126
+ self.reg_penalty = 0.00 # Define a penalty for regularization.
127
+ self.pval_penalty = 0
128
+ # NOTE: The reg_penalty value is tricky to define. If too high, convergence is restricted.
129
+ # Set to zero to turn off. A value of 1 seems too high.
130
+
131
+ # Variables used in derived classes and were previously not defined in this class:
132
+ self.num_classes, self.obs_prob = 0, 0
133
+ self.Xnames, self.member_params_spec = None, None
134
+ self.ordered_varnames = None
135
+ self.covariance_matrix, self.betas = None, None
136
+
137
+ # Define constants:
138
+ self.ftol, self.gtol = 1e-7, 1e-5
139
+ self.maxiter = 2000
140
+
141
+ # Define boolean flags:
142
+ self.converged = False
143
+ self.return_grad, self.return_hess, fit_intercept = True, True, False
144
+ self.scipy_optimisation = True
145
+ self.method, self.transformation = "bfgs", "boxcox"
146
+
147
+ self.trans_func = None # NEW. CHECK VALIDITY!
148
+ self.varnames, self.isvars, self.transvars = None, None, None
149
+ self.base_alt, self.alts, self.panels = None, None, None
150
+ self.bic, self.aic, self.mae = None, None, None # Metrics
151
+ self.loglik = None
152
+
153
+ # Initialise empty arrays
154
+ self.fxidx, self.fxtransidx = [], []
155
+ self.X, self.y = [], []
156
+ self.X_original, self.y_original = [], []
157
+ self.weights, self.avail = [], []
158
+ self.init_coeff = []
159
+
160
+ self.descr = ""
161
+ # }
162
+
163
+ ''' ---------------------------------------------------------- '''
164
+ ''' Function. Virtual '''
165
+ ''' ---------------------------------------------------------- '''
166
+ #@abstractmethod
167
+ def fit(self): # {
168
+ pass
169
+ # }
170
+
171
+ ''' ---------------------------------------------------------- '''
172
+ ''' Function. Convert to numpy arrays '''
173
+ ''' ---------------------------------------------------------- '''
174
+ def set_asarray(self, X, y, varnames, alts, isvars, transvars, ids, weights, panels, avail): # {
175
+ X = np.asarray(X)
176
+ y = np.asarray(y)
177
+ varnames = np.asarray(varnames, dtype="<U64") if varnames is not None else None
178
+ alts = np.asarray(alts) if alts is not None else None
179
+ isvars = np.asarray(isvars, dtype="<U64") if isvars is not None else None
180
+ transvars = np.asarray(transvars, dtype="<U64") if transvars is not None else []
181
+ ids = np.asarray(ids) if ids is not None else None
182
+ weights = np.asarray(weights) if weights is not None else None
183
+ panels = np.asarray(panels) if panels is not None else None
184
+ avail = np.asarray(avail) if avail is not None else None
185
+ return X, y, varnames, alts, isvars, transvars, ids, weights, panels, avail
186
+ # }
187
+
188
+ ''' ---------------------------------------------------------- '''
189
+ ''' Function to assing the penalty of the regularisation '''
190
+ ''' ---------------------------------------------------------- '''
191
+ def reassign_penalty(self, penalty = 0):
192
+ self.reg_penalty = penalty
193
+
194
+ ''' ---------------------------------------------------------- '''
195
+ ''' Function. Initialise member variables '''
196
+ ''' ---------------------------------------------------------- '''
197
+ def pre_process(self, alts, varnames, isvars, transvars, base_alt, fit_intercept, transformation,
198
+ maxiter, panels=None, correlated_vars=None, randvars=None):
199
+ # {
200
+ self.reset_attributes()
201
+ self.fit_start_time = time() # Set the start time for runtime calculation
202
+ self.isvars = [] if isvars is None else isvars
203
+ self.transvars = [] if transvars is None else transvars
204
+ self.randvars = [] if randvars is None else randvars
205
+
206
+
207
+ self.asvars = [v for v in varnames if ((v not in self.isvars) and
208
+ #(v not in self.transvars) and
209
+ (v not in self.randvars))]
210
+ # old definition of asvars used to make datasets
211
+ self.asvars_construct_matrix = [v for v in varnames if v not in self.isvars]
212
+ self.randtransvars, self.fixedtransvars = [], []
213
+ self.alts = np.unique(alts) # Extract unique alternatives from the data
214
+ self.varnames = list(varnames) # Easier to handle with lists
215
+ self.fit_intercept = fit_intercept
216
+ self.transformation = transformation
217
+ self.base_alt = self.alts[0] if base_alt is None else base_alt
218
+ self.correlated_vars = False if correlated_vars is None else correlated_vars
219
+ self.maxiter = maxiter
220
+
221
+ # Assign panels to self.panels if self.panels attribute does not exist
222
+ self.panels = getattr(self, 'panels', panels) # i.e., if not hasattr(self, 'panels'): self.panels = panels
223
+ # }
224
+
225
+
226
+ ''' ---------------------------------------------------------- '''
227
+ ''' Function '''
228
+ ''' convert hess inverse for L-BFGS-B optimisation method '''
229
+ ''' ---------------------------------------------------------- '''
230
+ def post_process(self, result, coeff_names, sample_size, hess_inv=None):
231
+ # {
232
+ self.converged = result['success']
233
+ self.coeff_est = result['x']
234
+ self.loglik = -result['fun']
235
+ self.total_iter = result['nit']
236
+ self.estim_time_sec = time() - self.fit_start_time
237
+ self.sample_size = sample_size
238
+ self.num_params = self.Kbw + self.Kchol + self.Kf + self.Kftrans + self.Kr + self.Krtrans
239
+
240
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
241
+ # Compute stderr
242
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
243
+ self.stderr = np.zeros_like(self.coeff_est)
244
+ std_err_estimated = False
245
+
246
+ if 'stderr' in result: # {
247
+ std_err_estimated = True
248
+ self.stderr = result['stderr']
249
+ # }
250
+
251
+ self.is_latent_class = result['is_latent_class'] if 'is_latent_class' in result else False
252
+
253
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
254
+ # Define coeff_names
255
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256
+ if self.is_latent_class:
257
+ # {
258
+ new_coeff_names = np.array([])
259
+
260
+ # CONCEPTUAL ERROR. num_classes is not a member variable of DiscreteChoiceModel (?)
261
+ for i in range(self.num_classes):
262
+ # {
263
+ # CONCEPTUAL ERROR: get_class_X_idx is not from choice_model
264
+ #try:
265
+ # X_class_idx = self.get_class_X_idx(i, coeff_names=coeff_names)
266
+ # class_coeff_names = coeff_names[X_class_idx]
267
+ #except Exception as e:
268
+ #print(X_class_idx)
269
+
270
+ #X_class_idx = self.get_class_X_idx_alternative(i, coeff_names=coeff_names)
271
+ #print(f'after {X_class_idx}')
272
+
273
+
274
+ class_coeff_names = coeff_names[0][i]
275
+ class_coeff_names = np.core.defchararray.add('class-' + str(i + 1) + ': ', class_coeff_names)
276
+ new_coeff_names = np.concatenate((new_coeff_names, class_coeff_names))
277
+ # }
278
+ coeff_names = new_coeff_names
279
+ # }
280
+
281
+ self.coeff_names = coeff_names
282
+
283
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284
+ # Compute stderr
285
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
286
+ Hinv_exists = (hasattr(self,'Hinv') and not self.Hinv is None)
287
+ if Hinv_exists and not self.is_latent_class:
288
+ # {
289
+ if dev.using_gpu:
290
+ self.stderr = np.sqrt(np.abs(np.diag(self.Hinv)))
291
+ else:
292
+ # {
293
+ diag_arr_tmp = np.diag(np.array(self.Hinv))
294
+
295
+ # stop runtime warnings from (very small) negative values
296
+ # assume these occur from some floating point error and are 0.
297
+
298
+ pos_vals_idx = [ii for ii, el in enumerate(diag_arr_tmp) if el > 0]
299
+ diag_arr = np.zeros(len(diag_arr_tmp))
300
+ diag_arr[pos_vals_idx] = diag_arr_tmp[pos_vals_idx]
301
+ self.stderr = np.sqrt(np.abs(diag_arr))
302
+ # }
303
+
304
+ std_err_estimated = False if np.isnan(self.stderr).any else True
305
+ # }
306
+
307
+ if not std_err_estimated:
308
+ # {
309
+ if self.method == "bfgs":
310
+ self.stderr = np.sqrt(np.abs(np.diag(result['hess_inv'])))
311
+
312
+ if self.method == "l-bfgs-b":
313
+ hess = result['hess_inv'].todense()
314
+ self.stderr = np.sqrt(np.abs(np.diag(np.array(hess))))
315
+ # }
316
+
317
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
318
+ # Compute lambda_mask
319
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
320
+ lambda_mask = [1 if "lambda" in x else 0 for x in coeff_names]
321
+
322
+ if len(lambda_mask) != len(self.coeff_est):
323
+ lambda_mask = np.ones_like(self.coeff_est)
324
+
325
+ if 'is_latent_class' in result:
326
+ lambda_mask = np.zeros_like(self.coeff_est)
327
+
328
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
329
+ # Compute z-values
330
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
331
+ self.zvalues = np.nan_to_num((self.coeff_est - lambda_mask) / self.stderr)
332
+ self.zvalues = truncate(self.zvalues, -1e+5, 1e+5) # Set maximum (and minimum) limits
333
+
334
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
335
+ # Compute pvalues
336
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
337
+ if sample_size < 100: # arbitrary ... could do standard 30
338
+ self.pvalues = 2 * (1 - ss.t.cdf(np.abs(self.zvalues), df=sample_size))
339
+ else:
340
+ self.pvalues = 2 * (1 - ss.norm.cdf(np.abs(self.zvalues)))
341
+
342
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
343
+ # Compute Number of Non-Significant pvalues
344
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
345
+ non_sigs = self.num_of_exceeding_pvalues(self.pvalues, 0.0)
346
+ #print('log like is before', self.loglik)
347
+ self.loglik -= non_sigs*self.pval_penalty # penalise the non-sigs
348
+ #print('log like is', self.loglik)
349
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
350
+ # Compute aic and bic
351
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
352
+ num_params = len(self.coeff_est)
353
+ if self.is_latent_class:
354
+ # PENALISE IF TOO FEW #TODO FORCE a variable
355
+ if num_params <= self.num_classes:
356
+ num_exceeded = (self.num_classes)- num_params
357
+ self.loglik -= sample_size*num_exceeded
358
+
359
+ num_params += len(result['class_x'])
360
+
361
+
362
+
363
+ self.aic = 2 * num_params - 2 * self.loglik
364
+ self.bic = np.log(sample_size) * num_params - 2 * self.loglik
365
+
366
+ if 'is_latent_class' in result:
367
+ # {
368
+ self.class_x = result['class_x']
369
+ self.class_x_stderr = result['class_x_stderr']
370
+ # }
371
+ # }
372
+
373
+ def num_of_exceeding_pvalues(self, pvalues, threshold):
374
+ """
375
+ :param pvalues: array of pvalues
376
+ :type pvalues:float
377
+ :param threshold: signficant values for hypothesis testing
378
+ :type threshold: float
379
+ :return: int
380
+ """
381
+ return len([p for p in pvalues if p > threshold])
382
+
383
+
384
+ ''' ------------------------------------------------------------------------ '''
385
+ ''' Function. Setup and reshape input data after adding isvars and intercept '''
386
+ ''' ------------------------------------------------------------------------ '''
387
+ def setup_design_matrdix(self, X):
388
+ # {
389
+ """ Setup the design matrix by adding the intercept when necessary and
390
+ converting the isvars to a dummy representation that removes the base alternative """
391
+
392
+ self.J = getattr(self, 'J', len(self.alts)) # i.e., if not hasattr(self, 'J'): self.J = len(self.alts)
393
+ self.N = int(len(X) / self.J)
394
+ self.P = 0
395
+
396
+ P_N = self.N
397
+ J = self.J
398
+ N = self.N
399
+
400
+ if self.panels is not None:
401
+ # {
402
+ # Identify and count unique values. Return two arrays
403
+ unique_values, counts = np.unique(self.panels, return_counts=True)
404
+
405
+ self.N = len(unique_values) # Set N as the number of unique values
406
+ normalized_counts = counts / self.J # Normalize counts by dividing by self.J
407
+ self.P_i = normalized_counts.astype(int) # Convert scaled counts to integers
408
+
409
+ # Assumption of itegrality. Should check if any element is not an integer!
410
+
411
+ self.P = np.max(self.P_i) # Identify and store maximum element
412
+ # }
413
+ else:
414
+ # {
415
+ self.P = 1
416
+ self.P_i = np.ones([self.N]).astype(int)
417
+ # }
418
+
419
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
420
+ # MAKE COPIES
421
+ isvars = self.isvars.copy()
422
+ asvars = self.asvars.copy()
423
+ asvars_construct_matrix = self.asvars_construct_matrix.copy()
424
+ randvars = self.randvars.copy()
425
+ randtransvars = self.randtransvars.copy()
426
+ fixedtransvars = self.fixedtransvars.copy()
427
+ varnames = self.varnames.copy()
428
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
429
+
430
+ self.varnames = np.array(varnames, dtype="<U64")
431
+ lst = self.varnames.tolist()
432
+ ispos = [lst.index(str) for str in self.isvars if str in lst] # Position of IS vars
433
+
434
+
435
+ ispos_old = [self.varnames.tolist().index(i) for i in self.isvars] # Position of IS va
436
+ # adjust index array to include isvars
437
+ if len(self.isvars) > 0 and not hasattr(self, 'ispos'): # check not done before...
438
+ # {
439
+
440
+
441
+ nbFalse = len(self.isvars) * (J - 1) # Calculate the number of False values to insert
442
+
443
+ # Creates a masked version of the boolean array fxidx_bool based on indices not present in the array ispos
444
+ fxidx_bool = np.array(self.fxtransidx, dtype="bool") # Convert elements to booleans
445
+ indices = np.arange(len(fxidx_bool)) # Array of indices from 0 to len(fxidx_bool) - 1.
446
+ mask = np.isin(indices, ispos) # True indicates that the element from indices is in ispos, and False indicates it is not.
447
+ fxidx_bool_masked = fxidx_bool[~mask] # negated_mask = ~mask
448
+ self.fxidx = np.insert(fxidx_bool_masked, 0, np.repeat(True, nbFalse))# Insert True values
449
+
450
+ fxtransidx_bool = np.array(self.fxtransidx, dtype="bool") # Convert elements to booleans
451
+ indices= np.arange(len(fxtransidx_bool))
452
+ mask = np.isin(indices, ispos)
453
+ fxtransidx_bool_masked = fxtransidx_bool[~mask]
454
+ self.fxtransidx = np.insert(fxtransidx_bool_masked, 0, np.repeat(False, nbFalse)) # Insert False values
455
+
456
+ if hasattr(self, 'rvidx'):
457
+ # {
458
+ rvidx_bool = np.array(self.rvidx, dtype=bool) # Convert self.rvidx to boolean array if it's not already
459
+ indices = np.arange(len(rvidx_bool))
460
+ mask = np.isin(indices, ispos)
461
+ rvidx_bool_masked = rvidx_bool[~mask]
462
+ self.rvidx = np.insert(rvidx_bool_masked, 0, np.repeat(False, nbFalse)) # Insert False values
463
+ # }
464
+
465
+ if hasattr(self, 'rvtransidx'):
466
+ # {
467
+ rvtransidx_bool = np.array(self.rvtransidx, dtype=bool) # Convert self.rvidx to boolean array if it's not already
468
+ indices = np.arange(len(rvtransidx_bool))
469
+ mask = np.isin(indices, ispos)
470
+ rvtransidx_bool_masked = rvtransidx_bool[~mask]
471
+ self.rvtransidx = np.insert(rvtransidx_bool_masked, 0, np.repeat(False, nbFalse)) # Insert False values
472
+ # }
473
+ # }
474
+
475
+ if self.fit_intercept:
476
+ # {
477
+ if '_inter' not in self.isvars: # stop running in validation
478
+ # {
479
+ ones_array = np.ones(J * N) # Create an array of ones with length J * N.
480
+ column_vector = ones_array[:, None] # Reshapes array to have dimensions (J * N, 1)
481
+ X = np.hstack((column_vector, X)) # Stack arrays horizontally
482
+
483
+ # Adjust variables to allow intercept parameters
484
+ # These lines of code check if self has specific attributes. If it does, it will
485
+ # convert it to a NumPy array with boolean dtype using np.array().
486
+ # If the attribute doesn't exist, it will create an array of False values using np.repeat()
487
+ # with length J - 1. Then, it will insert False at the beginning of the array using np.insert().
488
+ self.isvars = np.insert(np.array(self.isvars, dtype="<U64"), 0, '_inter')
489
+ self.varnames = np.insert(np.array(self.varnames, dtype="<U64"), 0, '_inter')
490
+ self.fxidx = np.insert(np.array(self.fxidx, dtype="bool_"), 0, np.repeat(True, J - 1))
491
+ self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"), 0, np.repeat(False, J - 1))
492
+
493
+
494
+ # Get the attribute if it exits, otherwise add False, 'J-1' times
495
+ current_rvidx = getattr(self, 'rvidx', np.repeat(False, J - 1))
496
+ self.rvidx = np.insert(current_rvidx, 0, np.repeat(False, J - 1)) # Insert at beginning
497
+
498
+ current_rvtransidx = getattr(self, 'rvtransidx', np.repeat(False, J - 1))
499
+ self.rvtransidx = np.insert(current_rvtransidx, 0, np.repeat(False, J - 1)) # Insert at beginning
500
+
501
+ # }
502
+ # }
503
+
504
+ if self.transformation == "boxcox": # {
505
+ self.trans_func = boxcox_transformation
506
+ self.transform_deriv = boxcox_param_deriv
507
+ # }
508
+
509
+ S = np.zeros((self.N, self.P, self.J))
510
+ for i in range(self.N):
511
+ S[i, 0:self.P_i[i], :] = 1
512
+
513
+ self.S = S
514
+
515
+ lst = self.varnames.tolist()
516
+ self.ispos = [lst.index(str) for str in self.isvars if str in lst] # Position of isvars in varnames
517
+ self.aspos = [lst.index(str) for str in asvars_construct_matrix if str in lst] # Position of asvars in varnames
518
+ randpos = [lst.index(str) for str in randvars if str in lst] # Position of randvars
519
+ randtranspos = [lst.index(str) for str in randtransvars if str in lst] # bc transformed variables with random coeffs
520
+ fixedtranspos = [lst.index(str) for str in fixedtransvars if str in lst] # bc transformed variables with fixed coeffs
521
+
522
+ self.correlationpos = []
523
+ if randvars:
524
+ self.correlationpos = [lst.index(str) for str in self.varnames if str in self.randvars] # Position of correlated variables within randvars
525
+
526
+ if (isinstance(self.correlated_vars, list)):
527
+ #{
528
+ self.correlationpos = [lst.index(str) for str in self.varnames if str in self.correlated_vars]
529
+ self.uncorrelatedpos = [lst.index(str) for str in self.varnames if str not in self.correlated_vars]
530
+ #}
531
+
532
+ self.Kf = sum(self.fxidx) # Set number of fixed coeffs from idx
533
+ self.Kr = len(randpos) # Number of random coefficients
534
+ self.Kftrans = len(fixedtranspos) # Number of fixed coefficients of bc transformed vars
535
+ self.Krtrans = len(randtranspos) # Number of random coefficients of bc transformed vars
536
+ self.Kchol = 0 # Number of random beta cholesky factors
537
+ self.correlationLength = 0
538
+ self.Kbw = self.Kr
539
+
540
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
541
+ # set up length of betas required to estimate correlation and/or
542
+ # random variable standard deviations, useful for cholesky matrix
543
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
544
+ if (self.correlated_vars):
545
+ # {
546
+ if (isinstance(self.correlated_vars, list)):
547
+ # {
548
+ nb_corvars = len(self.correlated_vars)
549
+ self.correlationLength = nb_corvars
550
+ self.Kbw = self.Kr - nb_corvars
551
+ self.Kchol = int((nb_corvars * (nb_corvars + 1)) / 2)
552
+ # i.e., Kchol => # permutations of specified params in correlation list
553
+ # }
554
+ else: # {
555
+ self.correlationLength = self.Kr
556
+ self.Kbw = 0
557
+ nb_randvars = len(self.randvars)
558
+ self.Kchol = int((nb_randvars * (nb_randvars + 1)) / 2)
559
+ # i.e., correlated_vars = True, Kchol => permutations of rand vars
560
+ # }
561
+ # }
562
+
563
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
564
+ # Create design matrix for individual specific variables
565
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
566
+ Xis = None
567
+ if len(self.isvars):
568
+ # {
569
+ # Create a dummy individual specific variables for the alts
570
+ dummy = np.tile(np.eye(J), reps=(P_N, 1))
571
+
572
+ # Remove base alternative
573
+ dummy = np.delete(dummy, np.where(self.alts == self.base_alt)[0], axis=1)
574
+ Xis = X[:, self.ispos]
575
+
576
+
577
+ if Xis.dtype == np.object_:
578
+ Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
579
+ Xis = Xis_numeric
580
+
581
+
582
+ # Multiply dummy representation by the individual specific data
583
+ try:
584
+
585
+ Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
586
+ except:
587
+ Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
588
+ Xis = Xis_numeric
589
+ Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
590
+ # Example of filtering out non-numeric data
591
+ #Xis = np.array([x for x in Xis if isinstance(x, (int, float))], dtype='float64')
592
+ nbOf = (self.J - 1) * len(self.ispos)
593
+ Xis = Xis.reshape((P_N, self.J, nbOf)) # ERROR: UNEXPECTED ARGUMENT?
594
+
595
+ # }
596
+ else: # {
597
+ Xis = np.array([])
598
+ # }
599
+
600
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
601
+ # For alternative specific variables
602
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
603
+ Xas = None
604
+ if asvars_construct_matrix:
605
+ # {
606
+ Xas = X[:, self.aspos]
607
+ Xas = Xas.reshape(N, J, -1)
608
+ # }
609
+
610
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
611
+ # Set design matrix based on existence of asvars and isvars
612
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
613
+ """
614
+ NEW CODE:
615
+ if asvars_construct_matrix: # There are vars that are not isvars
616
+ X = np.dstack((Xis, Xas)) if self.isvars else Xas
617
+ elif self.isvars:
618
+ X = Xis
619
+ else: # {
620
+ length = len(self.varnames)
621
+ if self.fit_intercept: length += (J - 1) - 1
622
+ X = X.reshape(-1, len(self.alts), length)
623
+ # }"""
624
+
625
+ # OLD CODE:
626
+ if len(asvars_construct_matrix) and len(self.isvars):
627
+ X = np.dstack((Xis, Xas))
628
+ elif len(asvars_construct_matrix):
629
+ X = Xas
630
+ elif (len(self.isvars)):
631
+ X = Xis
632
+ else:
633
+ x_varname_length = len(self.varnames) if not self.fit_intercept \
634
+ else (len(self.varnames) - 1) + (J - 1)
635
+ X = X.reshape((-1, len(self.alts), x_varname_length))
636
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
637
+
638
+
639
+ intercept_names = ["_intercept.{}".format(j) for j in self.alts
640
+ if j != self.base_alt] if self.fit_intercept else []
641
+
642
+ names = ["{}.{}".format(isvar, j) for isvar in isvars for j in self.alts if j != self.base_alt]
643
+
644
+ lambda_names_fixed = ["lambda.{}".format(transvar) for transvar in fixedtransvars]
645
+
646
+ lambda_names_rand = ["lambda.{}".format(transvar) for transvar in randtransvars]
647
+
648
+ randvars = [x for x in self.varnames if x in self.randvars]
649
+ randvars = np.array(randvars, dtype='<U64')
650
+
651
+ asvars_names = [x for x in asvars if (x not in self.randvars) and
652
+ (x not in fixedtransvars) and (x not in randtransvars)]
653
+
654
+ chol = ["chol." + self.varnames[self.correlationpos[i]] + "." +
655
+ self.varnames[self.correlationpos[j]] for i
656
+ in range(self.correlationLength) for j in range(i + 1)]
657
+
658
+ br_w_names = []
659
+
660
+ # three cases for corr. varnames: no corr, corr list, corr Bool (All)
661
+ if not (self.correlated_vars is True or isinstance(self.correlated_vars, list)):
662
+ if hasattr(self, "rvidx"): # avoid errors with multinomial logit
663
+ br_w_names = np.char.add("sd.", randvars)
664
+
665
+ if (isinstance(self.correlated_vars, list)): # if not all r.v.s correlated
666
+ # {
667
+ sd_uncorrelated_pos = [lst.index(str) for str in self.varnames
668
+ if str not in self.correlated_vars and str in randvars]
669
+ br_w_names = np.char.add("sd.", self.varnames[sd_uncorrelated_pos])
670
+ # }
671
+
672
+ sd_rand_trans = np.char.add("sd.", self.varnames[randtranspos])
673
+
674
+ names = np.concatenate((intercept_names, names, asvars_names, randvars,
675
+ chol, br_w_names, fixedtransvars,
676
+ lambda_names_fixed, randtransvars,
677
+ sd_rand_trans, lambda_names_rand))
678
+
679
+ names = np.array(names, dtype="<U64")
680
+ return X, names
681
+ # }
682
+
683
+
684
+ def check_instance(self, obj):
685
+ if "MultinomialLogit" in obj.__class__.__name__:
686
+
687
+ return True
688
+ elif "OrderedLogitLong" in obj.__class__.__name__:
689
+ return True
690
+ else:
691
+
692
+ return False
693
+ #return isinstance(self, MultinomialLogit)
694
+
695
+ def restate_idx(self, ispos, isvar, asvars):
696
+ #todo check if i isvar and aasvars can both be present, otherwise not needed
697
+ if self.check_instance(self):
698
+
699
+
700
+ return
701
+ else:
702
+
703
+ #I BELIEVE THIS IS REDUNDANT NOW
704
+
705
+ self.fxidx = np.delete(self.fxidx, ispos)
706
+
707
+ self.fxtransidx = np.delete(self.fxtransidx, ispos)
708
+ # self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"),
709
+ # 0, np.repeat(False, len(self.isvars)*(J - 1)))
710
+ if hasattr(self, 'rvidx'):
711
+ self.rvidx = np.delete(self.rvidx, ispos)
712
+ if hasattr(self, 'rvtransidx'):
713
+ self.rvtransidx = np.delete(self.rvtransidx, ispos)
714
+
715
+
716
+
717
+
718
+ def setup_design_matrix(self, X):
719
+ """Setups and reshapes input data after adding isvars and intercept.
720
+
721
+ Setup the design matrix by adding the intercept when necessary and
722
+ converting the isvars to a dummy representation that removes the base
723
+ alternative.
724
+ """
725
+ J = getattr(self, 'J', len(self.alts))
726
+
727
+ N = P_N = int(len(X)/J)
728
+ self.P = 0
729
+ self.N = N
730
+ self.J = J
731
+ if self.panels is not None:
732
+ # panels size
733
+ self.P_i = ((np.unique(self.panels, return_counts=True)[1])/J).astype(int)
734
+ self.P = np.max(self.P_i)
735
+ self.N = len(self.P_i)
736
+ else:
737
+ self.P = 1
738
+ self.P_i = np.ones([N]).astype(int)
739
+ isvars = self.isvars.copy()
740
+ asvars = self.asvars.copy()
741
+ asvars_construct_matrix = self.asvars_construct_matrix.copy()
742
+ randvars = self.randvars.copy()
743
+ randtransvars = self.randtransvars.copy()
744
+ fixedtransvars = self.fixedtransvars.copy()
745
+ varnames = self.varnames.copy()
746
+ self.varnames = np.array(varnames, dtype="<U64")
747
+
748
+ lst = varnames
749
+ lst = np.array(lst, dtype='<U64')
750
+ if self.fit_intercept:
751
+ lst = np.insert(lst, 0, '_inter').tolist()
752
+ if '_inter' not in self.isvars:
753
+ if hasattr(self, 'ispos'):
754
+ self.isvars = np.insert(self.isvars, 0, '_inter')
755
+ else:
756
+ self.isvars = np.array(['_inter'])
757
+ else:
758
+ lst = lst.tolist()
759
+
760
+ ispos = [lst.index(str) for str in self.isvars if str in lst] # Position of IS vars
761
+ #ispos = [self.varnames.tolist().index(i) for i in self.isvars] # Position of IS vars
762
+
763
+ # adjust index array to include isvars
764
+
765
+ if len(self.isvars) > 0 and not hasattr(self, 'ispos'): # check not done before...
766
+ #self.fxidx = np.insert(np.array(self.fxidx, dtype="bool_"), 0,
767
+ # np.repeat(True, len(self.isvars)*(J - 1)))
768
+ where_h = ispos
769
+ where_h =0
770
+
771
+
772
+ self.restate_idx(ispos, isvars, asvars)
773
+ self.fxidx =np.insert(np.array(self.fxidx, dtype="bool_"), where_h, np.repeat(True, len(self.isvars)*(J-1)))
774
+ self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"), where_h, np.repeat(False, len(self.isvars)*(J - 1)))
775
+ #self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"),
776
+ # 0, np.repeat(False, len(self.isvars)*(J - 1)))
777
+ if hasattr(self, 'rvidx'):
778
+ self.rvidx = np.insert(np.array(self.rvidx, dtype="bool_"), where_h,
779
+ np.repeat(False, len(self.isvars)*(J -1)))
780
+ if hasattr(self, 'rvtransidx'):
781
+ self.rvtransidx = np.insert(np.array(self.rvtransidx, dtype="bool_"),
782
+ 0, np.repeat(False, len(self.isvars)*(J - 1)))
783
+ else:
784
+ self.restate_idx(ispos, isvars, asvars)
785
+ if self.fit_intercept:
786
+ X = np.hstack((np.ones(J*N)[:, None], X))
787
+ #X=np.hstack(np.tile(np.eye(J), reps=(P_N, 1)),X)
788
+ #eye = np.tile(np.eye(J), reps=(P_N, 1))
789
+ #X = np.hstack((eye,X))
790
+ if '_inter' not in self.isvars: # stop running in validation
791
+ # adjust variables to allow intercept parameters
792
+ self.isvars = np.insert(np.array(self.isvars, dtype="<U64"), 0, '_inter')
793
+ self.varnames = np.insert(np.array(self.varnames, dtype="<U64"), 0, '_inter')
794
+ self.fxidx = np.insert(np.array(self.fxidx, dtype="bool_"), 0, np.repeat(True, J-1))
795
+ if hasattr(self, 'rvidx'):
796
+ self.rvidx = np.insert(np.array(self.rvidx, dtype="bool_"), 0, np.repeat(False, J-1))
797
+ self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"), 0, np.repeat(False, J-1))
798
+ if hasattr(self, 'rvtransidx'):
799
+ self.rvtransidx = np.insert(np.array(self.rvtransidx, dtype="bool_"), 0, np.repeat(False, J-1))
800
+
801
+
802
+ if self.transformation == "boxcox":
803
+ self.trans_func = boxcox_transformation
804
+ self.transform_deriv = boxcox_param_deriv
805
+
806
+ S = np.zeros((self.N, self.P, self.J))
807
+ for i in range(self.N):
808
+ S[i, 0:self.P_i[i], :] = 1
809
+ self.S = S
810
+
811
+ #ispos = [self.varnames.tolist().index(i) for i in self.isvars[self.isvars != '_inter']] # Position of IS vars
812
+ aspos = [self.varnames.tolist().index(i) for i in asvars_construct_matrix] # Position of AS vars
813
+ self.aspos = np.array(aspos) # saved for later use
814
+ self.ispos = np.array(ispos)
815
+ randpos = [self.varnames.tolist().index(i) for i in randvars] # Position of AS vars
816
+ randtranspos = [self.varnames.tolist().index(i) for i in randtransvars] # bc transformed variables with random coeffs
817
+ fixedtranspos = [self.varnames.tolist().index(i) for i in fixedtransvars] # bc transformed variables with fixed coeffs
818
+
819
+
820
+ self.correlationpos = []
821
+ self.uncorrelatedpos = []
822
+ if randvars:
823
+ self.correlationpos = [lst.index(str) for str in self.varnames if
824
+ str in self.randvars] # Position of correlated variables within randvars
825
+
826
+ if (isinstance(self.correlated_vars, list)):
827
+ # {
828
+ self.correlationpos = [lst.index(str) for str in self.varnames if str in self.correlated_vars]
829
+ self.uncorrelatedpos = [lst.index(str) for str in self.varnames if str not in self.correlated_vars and str in randvars]
830
+ # }
831
+
832
+ self.Kf = sum(self.fxidx) # set number of fixed coeffs from idx
833
+ self.Kr = len(randpos) # Number of random coefficients
834
+ self.Kftrans = len(fixedtranspos) # Number of fixed coefficients of bc transformed vars
835
+ self.Krtrans = len(randtranspos) # Number of random coefficients of bc transformed vars
836
+ self.Kchol = 0 # Number of random beta cholesky factors
837
+ self.correlationLength = 0
838
+ self.Kbw = self.Kr
839
+
840
+ # set up length of betas required to estimate correlation and/or
841
+ # random variable standard deviations, useful for cholesky matrix
842
+ if (self.correlated_vars):
843
+ if (isinstance(self.correlated_vars, list)):
844
+ self.correlationLength = len(self.correlated_vars)
845
+ self.Kbw = self.Kr - len(self.correlated_vars)
846
+ else:
847
+ self.correlationLength = self.Kr
848
+ self.Kbw = 0
849
+ if (self.correlated_vars):
850
+ if (isinstance(self.correlated_vars, list)):
851
+ # Kchol, permutations of specified params in correlation list
852
+ self.Kchol = int((len(self.correlated_vars) *
853
+ (len(self.correlated_vars)+1))/2)
854
+ else:
855
+ # i.e. correlation = True, Kchol permutations of rand vars
856
+ self.Kchol = int((len(self.randvars) *
857
+ (len(self.randvars)+1))/2)
858
+
859
+
860
+ if (self.correlated_vars):
861
+ # {
862
+ if (isinstance(self.correlated_vars, list)):
863
+ # {
864
+ nb_corvars = len(self.correlated_vars)
865
+ self.correlationLength = nb_corvars
866
+ self.Kbw = self.Kr - nb_corvars
867
+ self.Kchol = int((nb_corvars * (nb_corvars + 1)) / 2)
868
+ # i.e., Kchol => # permutations of specified params in correlation list
869
+ # }
870
+ else: # {
871
+ self.correlationLength = self.Kr
872
+ self.Kbw = 0
873
+ nb_randvars = len(self.randvars)
874
+ self.Kchol = int((nb_randvars * (nb_randvars + 1)) / 2)
875
+ # i.e., correlated_vars = True, Kchol => permutations of rand vars
876
+ # }
877
+ # }
878
+
879
+
880
+ # Create design matrix
881
+ # For individual specific variables
882
+ Xis = None
883
+ if len(self.isvars) or len(self.ispos):
884
+ # {
885
+ # Create a dummy individual specific variables for the alts
886
+ dummy = np.tile(np.eye(J), reps=(P_N, 1))
887
+
888
+ # Remove base alternative
889
+ dummy = np.delete(dummy, np.where(self.alts == self.base_alt)[0], axis=1)
890
+ Xis = X[:, self.ispos]
891
+
892
+ if Xis.dtype == np.object_:
893
+ Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
894
+ Xis = Xis_numeric
895
+
896
+ # Multiply dummy representation by the individual specific data
897
+ try:
898
+
899
+ Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
900
+ except:
901
+ Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
902
+ Xis = Xis_numeric
903
+ Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
904
+ # Example of filtering out non-numeric data
905
+ # Xis = np.array([x for x in Xis if isinstance(x, (int, float))], dtype='float64')
906
+ nbOf = (self.J - 1) * len(self.ispos)
907
+ Xis = Xis.reshape((P_N, self.J, nbOf)) # ERROR: UNEXPECTED ARGUMENT?
908
+
909
+ # }
910
+ else: # {
911
+ Xis = np.array([])
912
+ # }
913
+ # For alternative specific variables
914
+ Xas = None
915
+ if asvars_construct_matrix:
916
+ Xas = X[:, aspos]
917
+ Xas = Xas.reshape(N, J, -1)
918
+
919
+ # Set design matrix based on existance of asvars and isvars
920
+ if len(asvars_construct_matrix) and len(self.isvars):
921
+ X = np.dstack((Xis, Xas))
922
+ elif len(asvars_construct_matrix):
923
+ X = Xas
924
+ elif (len(self.isvars)):
925
+ X = Xis
926
+ else:
927
+ x_varname_length = len(self.varnames) if not self.fit_intercept \
928
+ else (len(self.varnames) - 1)+(J-1)
929
+ X = X.reshape(-1, len(self.alts), x_varname_length)
930
+
931
+ intercept_names = ["_intercept.{}".format(j) for j in self.alts
932
+ if j != self.base_alt] if self.fit_intercept else []
933
+
934
+ names = ["{}.{}".format(isvar, j) for isvar in isvars for j in self.alts if j != self.base_alt]
935
+
936
+ lambda_names_fixed = ["lambda.{}".format(transvar) for transvar in fixedtransvars]
937
+
938
+ lambda_names_rand = ["lambda.{}".format(transvar) for transvar in randtransvars]
939
+
940
+ randvars = [x for x in self.varnames if x in self.randvars]
941
+ randvars = np.array(randvars, dtype='<U64')
942
+
943
+ asvars_names = [x for x in asvars if (x not in self.randvars) and
944
+ (x not in fixedtransvars) and (x not in randtransvars)]
945
+
946
+
947
+ chol = ["chol." + self.varnames[self.correlationpos[i]] + "." +
948
+ self.varnames[self.correlationpos[j]] for i
949
+ in range(self.correlationLength) for j in range(i + 1)]
950
+
951
+ br_w_names = []
952
+
953
+ # three cases for corr. varnames: no corr, corr list, corr Bool (All)
954
+ if not (self.correlated_vars is True or isinstance(self.correlated_vars, list)):
955
+ if hasattr(self, "rvidx"): # avoid errors with multinomial logit
956
+ br_w_names = np.char.add("sd.", randvars)
957
+
958
+ if (isinstance(self.correlated_vars, list)): # if not all r.v.s correlated
959
+ # {
960
+ if self.fit_intercept:
961
+ if '_inter' not in self.varnames:
962
+ names_for_p = np.insert(self.varnames, 0, '_inter')
963
+ else:
964
+ names_for_p = self.varnames
965
+ else:
966
+ names_for_p = self.varnames
967
+ sd_uncorrelated_pos = [lst.index(str) for str in names_for_p
968
+ if str not in self.correlated_vars and str in randvars]
969
+ br_w_names = np.char.add("sd.", names_for_p[sd_uncorrelated_pos])
970
+ # }
971
+
972
+ sd_rand_trans = np.char.add("sd.", self.varnames[randtranspos])
973
+
974
+ #if isvars then isvars gets positionemed to fromt
975
+ if len(self.isvars) >0:
976
+
977
+ inter_o = ["_inter" for j in self.alts
978
+ if j != self.base_alt] if self.fit_intercept else ['_inter']
979
+
980
+ names_o = [isvar for isvar in isvars for j in self.alts if j != self.base_alt]
981
+ restvars = [var for var in self.varnames if var not in names_o and var not in inter_o]
982
+ self.ordered_varnames = names_o + restvars
983
+
984
+ elif self.fit_intercept:
985
+ inter_o = ["_inter" for j in self.alts
986
+ if j != self.base_alt]
987
+ restvars = [var for var in self.varnames if var not in inter_o]
988
+ self.ordered_varnames = restvars
989
+
990
+
991
+
992
+ else:
993
+ self.ordered_varnames = self.varnames
994
+ np.insert(np.array(self.varnames, dtype="<U64"), 0, '_inter')
995
+
996
+
997
+ names = np.concatenate((intercept_names, names, asvars_names, randvars,
998
+ chol, br_w_names, fixedtransvars,
999
+ lambda_names_fixed, randtransvars,
1000
+ sd_rand_trans, lambda_names_rand))
1001
+
1002
+ names = np.array(names, dtype="<U64")
1003
+ return X, names
1004
+
1005
+ ''' ---------------------------------------------------------- '''
1006
+ ''' Function. Check data is in long format '''
1007
+ ''' ---------------------------------------------------------- '''
1008
+ def check_long_format_consistency(self, ids, alts, sorted_idx):
1009
+ # {
1010
+ alts = alts[sorted_idx]
1011
+ uq_alt = np.unique(alts)
1012
+
1013
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1014
+ # Unused code:
1015
+ # expect_alt = np.tile(uq_alt, int(len(ids)/len(uq_alt)))
1016
+ # if not np.array_equal(alts, expect_alt):
1017
+ # raise ValueError('inconsistent alts values in long format')
1018
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1019
+
1020
+ _, obs_by_id = np.unique(ids, return_counts=True)
1021
+
1022
+ """ An error is raised if the array of alternative indexes is incomplete. """
1023
+ if not np.all(obs_by_id / len(uq_alt)): # Multiple of J
1024
+ raise ValueError('inconsistent alts and ids values in long format')
1025
+ # }
1026
+
1027
+ ''' ---------------------------------------------------------- '''
1028
+ ''' Function. Check for data consistency.Set to lonf format '''
1029
+ ''' ---------------------------------------------------------- '''
1030
+ def arrange_long_format(self, X, y, ids, alts, panels=None):
1031
+ # {
1032
+ if ids is not None:
1033
+ # {
1034
+ pnl = panels if panels is not None else np.ones(len(ids))
1035
+ alts = alts.astype(str)
1036
+ alts = alts if len(alts) == len(ids) else np.tile(alts, int(len(ids) / len(alts)))
1037
+ cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids', 'alts'], 'formats': ['<f4', '<f4', '<U64']})
1038
+ cols['panels'], cols['ids'], cols['alts'] = pnl, ids, alts # Record
1039
+
1040
+ # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1041
+ # Unused code:
1042
+ # sorted_idx = np.argsort(cols, order=['panels', 'ids', 'alts'])
1043
+ # X, y = X[sorted_idx], y[sorted_idx]
1044
+ # if panels is not None: panels = panels[sorted_idx]
1045
+ # self._check_long_format_consistency(ids, alts, sorted_idx)
1046
+ # }
1047
+ return X, y, panels
1048
+ # }
1049
+
1050
+ ''' ---------------------------------------------------------- '''
1051
+ ''' Function. Validate potential mistakes in the input data '''
1052
+ ''' ---------------------------------------------------------- '''
1053
+ def validate_inputs(self, X, y, alts, varnames):
1054
+ # {
1055
+ if varnames is None:
1056
+ raise ValueError('The parameter varnames is required')
1057
+ if alts is None:
1058
+ raise ValueError('The parameter alternatives is required')
1059
+ if X.ndim != 2:
1060
+ raise ValueError("X must be an array of two dimensions in long format")
1061
+ if y.ndim != 1:
1062
+ raise ValueError("y must be an array of one dimension in long format")
1063
+ if len(varnames) != X.shape[1]:
1064
+ raise ValueError("The length of varnames must match the number of columns in X")
1065
+ # }
1066
+
1067
+ ''' ------------------------------------------------------------- '''
1068
+ ''' Function. Regularization of the loglike '''
1069
+ ''' Flag affects whether penalty is added or subtracted '''
1070
+ ''' ------------------------------------------------------------- '''
1071
+ def regularize_loglik(self, betas, negative=False):
1072
+ # {
1073
+ # Use lasso regularisation L2 to penalise the function
1074
+ penalty = self.reg_penalty * np.sum(np.square(betas))
1075
+ return -penalty if negative else penalty
1076
+ # }
1077
+
1078
+
1079
+
1080
+ ''' ---------------------------------------------------------- '''
1081
+ ''' Function. Compute the log-likelihood of the null model '''
1082
+ ''' |y| = #samples * #choices '''
1083
+ ''' ---------------------------------------------------------- '''
1084
+ def get_loglik_null(self): # {
1085
+ factor = 1.0 / self.J
1086
+ y_ = self.y * factor # Scale each element by 1/J
1087
+ lik = np.sum(y_, axis=1) # Compute row sums => |lik| = #samples
1088
+ loglik = np.log(lik) # Log each element
1089
+ loglik = -2 * np.sum(loglik) # Sum the elements => |loglik| = 1
1090
+ return loglik
1091
+ # }
1092
+
1093
+ ''' ---------------------------------------------------------- '''
1094
+ ''' Function. Print the coefficients and estimation outputs '''
1095
+ ''' ---------------------------------------------------------- '''
1096
+ def summarise(self, file=None):
1097
+ # {
1098
+ print("", file=file)
1099
+ print(f"Choice Model: {self.descr}", file=file)
1100
+
1101
+ if self.coeff_est is None: # {
1102
+ warnings.warn("The current model has not been yet estimated", UserWarning)
1103
+ return
1104
+ # }
1105
+
1106
+ if not self.converged: # {
1107
+ print("-" * 50, file=file)
1108
+ print("WARNING: Convergence was not reached during estimation. "
1109
+ "The given estimates may not be reliable", file=file)
1110
+ if hasattr(self, "gtol_res"):
1111
+ print("gtol:", self.gtol, file=file)
1112
+ print("Final gradient norm:", self.gtol_res, file=file)
1113
+ print('*' * 50, file=file)
1114
+ # }
1115
+
1116
+ print("Estimation time= {:.1f} seconds".format(self.estim_time_sec), file=file)
1117
+
1118
+ if hasattr(self, 'pred_prob'):
1119
+ # {
1120
+ print("", file=file)
1121
+ print("Proportion of alternatives: observed choice", file=file)
1122
+ print(self.obs_prob, file=file)
1123
+
1124
+ # CONCEPTUAL ERROR: obs_prob is not defined
1125
+
1126
+ print("", file=file)
1127
+ print("Proportion of alternatives: predicted choice", file=file)
1128
+ print(self.pred_prob, file=file)
1129
+ # }
1130
+
1131
+ if hasattr(self, 'class_freq'): # {
1132
+ print("", file=file)
1133
+ print("Estimated proportion of classes", file=file)
1134
+ print(self.class_freq, file=file)
1135
+ # }
1136
+
1137
+ print("", file=file)
1138
+ print("Table.", file=file)
1139
+ fmt = "{:19} {:13.10f} {:13.10f} {:13.10f} {:13.3g} {:3}"
1140
+ coeff_name_str_length = 19
1141
+ if self.is_latent_class:
1142
+ # {
1143
+ coeff_name_str_length = 28
1144
+ print("-" * 84, file=file)
1145
+ fmt = "{:28} {:13.10f} {:13.10f} {:13.10f} {:13.3g} {:3}"
1146
+ print("{:28} {:>13} {:>13} {:>13} {:>13}"
1147
+ .format("Coefficient", "Estimate", "Std.Err.", "z-val", "P>|z|"), file=file)
1148
+ print("-" * 84, file=file)
1149
+ # }
1150
+ else: # {
1151
+ print("-" * 75, file=file)
1152
+ print("{:19} {:>13} {:>13} {:>13} {:>13}"
1153
+ .format("Coefficient", "Estimate", "Std.Err.", "z-val", "P>|z|"), file=file)
1154
+ print("-" * 75, file=file)
1155
+ # }
1156
+
1157
+ # Dictionary to map p-value thresholds to significance symbols
1158
+ significance_symbols = {0.001: "***", 0.01: "**", 0.05: "*", 0.1: ".", 1.01: ""}
1159
+ sig_sim_items = significance_symbols.items()
1160
+
1161
+ # Iterate through the coefficients
1162
+ for i, coeff in enumerate(self.coeff_est):
1163
+ # {
1164
+ # Get the corresponding significance symbol
1165
+ try:
1166
+ signif = next(symbol for threshold, symbol in sig_sim_items if self.pvalues[i] < threshold)
1167
+ except Exception as e:
1168
+ print(e)
1169
+ signif = ""
1170
+ tmp = self.coeff_names[i][:coeff_name_str_length]
1171
+ print(fmt.format(tmp, self.coeff_est[i], self.stderr[i], self.zvalues[i], self.pvalues[i], signif), file=file)
1172
+ # }
1173
+
1174
+ # CONCEPTUAL ERROR: THIS CODE SHOULD BE IN 'latent*.py'
1175
+ if self.is_latent_class:
1176
+ # {
1177
+ zvalues = np.nan_to_num(self.class_x / self.class_x_stderr)
1178
+ zvalues = truncate_lower(zvalues, -1e+5)
1179
+ pvalues = 2 * (1 - ss.t.cdf(np.abs(zvalues), df=self.sample_size))
1180
+ self.pvalues_member = pvalues
1181
+ coeff_names_member = np.array([])
1182
+
1183
+ # CONCEPTUAL ERROR: self.member_params_spec is not defined
1184
+ for ii, member_class in enumerate(self.member_params_spec):
1185
+ # {
1186
+ # Logic for isvars
1187
+ # Remove lambda coeffs from member class param naget-mes
1188
+
1189
+ # CONCEPTUAL ERROR. get_member_X_idx is from latent_class_model.py and latent_class_mixed_model.py
1190
+ member_class_names_idx = self.get_member_X_idx(ii, coeff_names=member_class)
1191
+
1192
+ lambda_idx = np.where(np.char.find(np.array(member_class, dtype=str), 'lambda') != -1)[0]
1193
+ sd_idx = np.where(np.char.find(np.array(member_class, dtype=str), 'sd') != -1)[0]
1194
+ chol_idx = np.where(np.char.find(np.array(member_class, dtype=str), 'chol') != -1)[0]
1195
+
1196
+ member_class_names_idx = [x for x in member_class_names_idx
1197
+ if x not in sd_idx and x not in chol_idx
1198
+ and x not in lambda_idx]
1199
+
1200
+ member_class_names_idx = np.sort(member_class_names_idx)
1201
+ member_class_names_idx = np.array(member_class_names_idx, dtype='int32')
1202
+ member_class_names = member_class
1203
+ member_class_names = np.array(member_class_names, dtype='<U')
1204
+ # CONCEPTUAL ERROR. membership_as_probability is not a member variable
1205
+ if self.membership_as_probability:
1206
+ member_class_names = ["probability"]
1207
+
1208
+ class_coeff_names = np.core.defchararray.add('class-' + str(ii + 2) + ': ', member_class_names)
1209
+
1210
+ if '_inter' in self.member_params_spec[ii]:
1211
+ # {
1212
+ print('off for now')
1213
+ '''
1214
+ inter_name = 'class-' + str(ii + 2) + ': ' + 'constant'
1215
+ class_coeff_names = np.concatenate(([inter_name], class_coeff_names))
1216
+ '''
1217
+ # }
1218
+
1219
+ coeff_names_member = np.concatenate((coeff_names_member, class_coeff_names))
1220
+ # }
1221
+
1222
+ self.coeff_names_member = coeff_names_member
1223
+ print("-" * 84, file=file)
1224
+ print("{:30} {:>13} {:>13} {:>13} {:>13}"
1225
+ .format("Class Member Coeff", "Estimate", "Std.Err.", "z-val", "P>|z|"), file=file)
1226
+ print("-" * 84, file=file)
1227
+
1228
+ for ii, coeff_name in enumerate(coeff_names_member):
1229
+ # {
1230
+ # Get the corresponding significance symbol
1231
+ signif = [symbol for threshold, symbol in sig_sim_items if self.pvalues_member[ii] < threshold][0]
1232
+
1233
+ # note below: offset coeff_names by num_params to ignore class0
1234
+ print(fmt.format(coeff_name[:30], self.class_x[ii],
1235
+ self.class_x_stderr[ii], zvalues[ii], pvalues[ii], signif), file=file)
1236
+ # }
1237
+ # }
1238
+
1239
+ print("-" * 84) if self.is_latent_class else print("-" * 75, file=file)
1240
+ print("Significance: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1", file=file)
1241
+ print("", file=file)
1242
+
1243
+ text = f"LOGLIK = {self.loglik:0.3f}; AIC = {self.aic:0.3f}; BIC = {self.bic:0.3f};"
1244
+
1245
+ if self.mae is not None:
1246
+ text += f"MAE= {self.mae:0.3f};"
1247
+
1248
+ loglik_null = self.get_loglik_null()
1249
+ adjust_lik_ratio = 1 - (self.aic / loglik_null)
1250
+ self.adjust_lik_ratio = adjust_lik_ratio
1251
+
1252
+ text += f" ADJLIK RATIO: {adjust_lik_ratio:.3f}"
1253
+ print(text, file=file)
1254
+ # }
1255
+
1256
+
1257
+ ''' ---------------------------------------------------------- '''
1258
+ ''' Function. '''
1259
+ ''' ---------------------------------------------------------- '''
1260
+ def print_matrix(self, str_mat, descr): # {
1261
+ print(descr)
1262
+ fmt = "{:11}"
1263
+ for row in str_mat:
1264
+ for el in row: print(fmt.format(el), end=' ')
1265
+ print('')
1266
+ # }
1267
+
1268
+ ''' ---------------------------------------------------------- '''
1269
+ ''' Function. '''
1270
+ ''' ---------------------------------------------------------- '''
1271
+ def setup_print(self, mat):
1272
+ # {
1273
+ corr_varnames = [self.varnames[pos] for pos in self.correlationpos]
1274
+ K = len(corr_varnames)
1275
+ str_mat = np.array([], dtype="<U64")
1276
+ str_mat = np.append(str_mat, np.array([''] + corr_varnames)) # top row of coeff names
1277
+ mat = np.round(mat[0:K, 0:K], 8)
1278
+
1279
+ # ____________________________________________________
1280
+ if dev.using_gpu: mat = dev.convert_array_cpu(mat)
1281
+ # ____________________________________________________
1282
+
1283
+ for ii, row in enumerate(mat): # {
1284
+ str_mat = np.append(str_mat, corr_varnames[ii])
1285
+ str_mat = np.append(str_mat, np.array(row))
1286
+ # }
1287
+ str_mat = str_mat.reshape((K + 1, K + 1)) # + 1 for coeff names row/col
1288
+ return str_mat
1289
+ # }
1290
+
1291
+ ''' ---------------------------------------------------------- '''
1292
+ ''' Function. Print matrix '''
1293
+ ''' ---------------------------------------------------------- '''
1294
+ def print_mat(self, mat, descr): # {
1295
+ str_mat = self.setup_print(mat)
1296
+ self.print_matrix(str_mat, descr)
1297
+ # }
1298
+
1299
+ ''' ---------------------------------------------------------- '''
1300
+ ''' Function. Return fitted values '''
1301
+ ''' ---------------------------------------------------------- '''
1302
+ def fitted(self, type="parameters"): # {
1303
+ if type == "parameters" and hasattr(self, 'pch2_res'):
1304
+ return self.pch2_res
1305
+ # }
1306
+
1307
+ ''' ---------------------------------------------------------- '''
1308
+ ''' Function '''
1309
+ ''' ---------------------------------------------------------- '''
1310
+ def print_stdev(self, stdevs, names): # {
1311
+ fmt = "{:11}"
1312
+ print('Standard Deviations')
1313
+ for name in names: print(fmt.format(name), end=' ')
1314
+ print('')
1315
+ for std in stdevs: print(fmt.format(std), end=' ')
1316
+ print('')
1317
+ # }
1318
+
1319
+ ''' ---------------------------------------------------------- '''
1320
+ ''' Function. Print standard deviations for randvars '''
1321
+ ''' ---------------------------------------------------------- '''
1322
+ def compute_stddev(self):
1323
+ # {
1324
+ # CONCEPTUAL ERROR: covariance_matrix is undefined - it is a member of class mixed_logit
1325
+ diags = np.diag(self.covariance_matrix) # Grab the diagonals of the covariance_matrix matrix
1326
+ diags = np.sqrt(diags)
1327
+ diags = np.round(diags, 8)
1328
+
1329
+ # CHECK: self.covariance_matrix = [0:n, 0:n] where n = len(corr_varnames)
1330
+
1331
+ ''' QUERY: CAN THESE OPERATIONS BE DONE ONCE ON CLASS INITIALISATION? WHY DO THIS OVER AND OVER?'''
1332
+ self.corr_varnames = [self.varnames[pos] for pos in self.correlationpos]
1333
+ self.rv_names_noncorr = list(set(self.varnames) & set(self.randvars) - set(self.corr_varnames))
1334
+ self.rvtrans_names = list(set(self.varnames) & set(self.randtransvars))
1335
+ self.rv_names_all = self.corr_varnames + self.rv_names_noncorr + self.rvtrans_names
1336
+
1337
+ # ERROR: randvarsdict is undefined!
1338
+ self.distributions_corr = [self.randvarsdict[name] for name in self.corr_varnames]
1339
+ self.distributions_rv = [self.randvarsdict[name] for name in self.rv_names_noncorr]
1340
+ self.distributions_rvtrans = [self.randvarsdict[name] for name in self.rvtrans_names]
1341
+ self.distributions = self.distributions_corr + self.distributions_rv + self.distributions_rvtrans
1342
+
1343
+ stdevs = np.zeros(len(diags)) # Initialise an array of length len(diags) with zero
1344
+
1345
+ # CONCEPTUAL ERROR: betas is undefined - from multinomial_logit.py and mixed_logit.py
1346
+ means = self.betas[self.Kf: self.Kf + self.Kr]
1347
+ for ii, val in enumerate(diags):
1348
+ # {
1349
+ distr = self.distributions[ii]
1350
+ if distr in ('n', 't'):
1351
+ stdev = val
1352
+ elif distr == 'ln':
1353
+ stdev = np.sqrt(np.exp(val ** 2) - 1) * np.exp(means[ii] + 0.5 * val ** 2)
1354
+ elif distr == 'u':
1355
+ stdev = (val ** 2) / 3
1356
+ else:
1357
+ stdev = -1 # ERROR NO DISTRIBUTION CHOSEN
1358
+ stdevs[ii] = np.round(stdev, 8)
1359
+ # }
1360
+
1361
+ self.print_stdev(stdevs, self.rv_names_all)
1362
+ # }
1363
+ # }