SearchLibrium 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- old_code/__init__.py +8 -0
- old_code/_choice_model.py +1363 -0
- old_code/_device.py +145 -0
- old_code/akshay_test.py +125 -0
- old_code/boxcox_functions.py +116 -0
- old_code/draws.py +128 -0
- old_code/harmony.py +1261 -0
- old_code/latent_class_constrained.py +434 -0
- old_code/latent_class_mixed_model.py +1566 -0
- old_code/latent_class_model.py +1281 -0
- old_code/latent_main.py +945 -0
- old_code/main.py +1880 -0
- old_code/main_ol.py +127 -0
- old_code/misc.py +303 -0
- old_code/mixed_logit.py +1553 -0
- old_code/multinomial_logit.py +559 -0
- old_code/ordered_logit.py +1641 -0
- old_code/ordered_logit_mixed.py +103 -0
- old_code/ordered_logit_multinomial.py +701 -0
- old_code/r_ordered.py +168 -0
- old_code/rrm.py +521 -0
- old_code/search.py +3485 -0
- old_code/siman.py +1023 -0
- old_code/threshold.py +777 -0
- searchlibrium-0.0.1.dist-info/METADATA +21 -0
- searchlibrium-0.0.1.dist-info/RECORD +28 -0
- searchlibrium-0.0.1.dist-info/WHEEL +5 -0
- searchlibrium-0.0.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1363 @@
|
|
|
1
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
2
|
+
IMPLEMENTATION: BASE CLASS FOR LOGIT MODELS
|
|
3
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
BACKGROUND - Choice Modelling
|
|
7
|
+
|
|
8
|
+
ISVARS: In choice modeling, an individual-specific variable refers to a characteristic or attribute
|
|
9
|
+
of an individual that is specific to that individual and influences their decision-making process.
|
|
10
|
+
These variables are typically included in choice models to capture heterogeneity or differences
|
|
11
|
+
among individuals in their preferences or behavior.
|
|
12
|
+
|
|
13
|
+
Individual-specific variables are used in choice modeling to account for variations in preferences
|
|
14
|
+
or behaviors that cannot be explained solely by the attributes of the alternatives being considered.
|
|
15
|
+
|
|
16
|
+
ASVARS: In choice modeling, an alternative-specific variable refers to a characteristic or attribute
|
|
17
|
+
of a specific alternative that may influence the decision-making process of individuals when choosing
|
|
18
|
+
among available options. These variables are included in choice models to capture the effects of
|
|
19
|
+
attributes that vary across alternatives and affect individuals' preferences for those alternatives.
|
|
20
|
+
|
|
21
|
+
Examples of alternative-specific variables in choice modeling may include attributes such as price,
|
|
22
|
+
brand, product features, location, or availability. These variables can represent both observable
|
|
23
|
+
characteristics of alternatives (e.g., price) and unobservable characteristics that may influence
|
|
24
|
+
preferences (e.g., brand reputation).
|
|
25
|
+
|
|
26
|
+
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
|
|
27
|
+
|
|
28
|
+
''' ----------------------------------------------------------- '''
|
|
29
|
+
''' MAIN PARAMETERS: '''
|
|
30
|
+
''' ----------------------------------------------------------- '''
|
|
31
|
+
# N : Number of choice situations
|
|
32
|
+
# P : Number of observations per panels
|
|
33
|
+
# J : Number of alternatives
|
|
34
|
+
# K : Number of variables (Kf: fixed, non-trans, Kr: random, non-trans,
|
|
35
|
+
# Kftrans: fixed, trans, Krtrans: random, trans)
|
|
36
|
+
|
|
37
|
+
''' ---------------------------------------------------------- '''
|
|
38
|
+
''' UNUSED LIBRARIES '''
|
|
39
|
+
''' ---------------------------------------------------------- '''
|
|
40
|
+
#import logging
|
|
41
|
+
|
|
42
|
+
''' ---------------------------------------------------------- '''
|
|
43
|
+
''' LIBRARIES '''
|
|
44
|
+
''' ---------------------------------------------------------- '''
|
|
45
|
+
import warnings
|
|
46
|
+
from abc import ABC #, abstractmethod
|
|
47
|
+
from time import time
|
|
48
|
+
import numpy as np
|
|
49
|
+
import pandas as pd
|
|
50
|
+
import scipy.stats as ss
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
from .boxcox_functions import boxcox_param_deriv, boxcox_transformation, truncate, truncate_lower
|
|
56
|
+
from ._device import device as dev
|
|
57
|
+
|
|
58
|
+
except ImportError:
|
|
59
|
+
from boxcox_functions import boxcox_param_deriv, boxcox_transformation, truncate, truncate_lower
|
|
60
|
+
from _device import device as dev
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
#library for keeping track of variables
|
|
64
|
+
#from watchpoints import watch
|
|
65
|
+
|
|
66
|
+
''' ---------------------------------------------------------- '''
|
|
67
|
+
''' CLASS FOR ESTIMATION OF DISCRETE CHOICE MODEL '''
|
|
68
|
+
''' ---------------------------------------------------------- '''
|
|
69
|
+
class DiscreteChoiceModel(ABC):
|
|
70
|
+
# {
|
|
71
|
+
""" Docstring """
|
|
72
|
+
|
|
73
|
+
# ===================
|
|
74
|
+
# CLASS PARAMETERS
|
|
75
|
+
# ===================
|
|
76
|
+
|
|
77
|
+
"""
|
|
78
|
+
coeff_est: Coefficient estimates
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
# ===================
|
|
82
|
+
# CLASS FUNCTIONS
|
|
83
|
+
# ===================
|
|
84
|
+
|
|
85
|
+
"""
|
|
86
|
+
1. void __init__(self);
|
|
87
|
+
2. void fit(self);
|
|
88
|
+
3. void reset_attributes(self);
|
|
89
|
+
4. set_asarray(self, X, y, varnames, alts, isvars, transvars, ids, weights, panels, avail);
|
|
90
|
+
5. void pre_process(self, alts, varnames, isvars, transvars, base_alt, fit_intercept, transformation,
|
|
91
|
+
maxiter, panels=None, correlated_vars=None, randvars=None);
|
|
92
|
+
6. void post_process(self, result, coeff_names, sample_size, hess_inv=None);
|
|
93
|
+
7. X, names <-- setup_design_matrix(self, X);
|
|
94
|
+
8. check_long_format_consistency(self, ids, alts, sorted_idx);
|
|
95
|
+
9. X, y, panels <-- arrange_long_format(self, X, y, ids, alts, panels=None);
|
|
96
|
+
10. void validate_inputs(self, X, y, alts, varnames, isvars, ids, weights,
|
|
97
|
+
panels, base_alt, fit_intercept, maxiter);
|
|
98
|
+
11. loglik <-- get_loglik_null(self);
|
|
99
|
+
12. void summarise(self);
|
|
100
|
+
13. void print_matrix(self, str_mat, descr);
|
|
101
|
+
14. str_mat <-- setup_print(self, mat);
|
|
102
|
+
15. void print_mat(self, mat, descr);
|
|
103
|
+
16. pch_res <-- fitted(self, type="parameters");
|
|
104
|
+
17. void print_stdev(self, stdevs, names);
|
|
105
|
+
18. void compute_stddev(self);
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
''' ---------------------------------------------------------- '''
|
|
109
|
+
''' Function '''
|
|
110
|
+
''' ---------------------------------------------------------- '''
|
|
111
|
+
def reset_attributes(self): # {
|
|
112
|
+
self.coeff_names, self.coeff_est = None, None
|
|
113
|
+
self.stderr, self.zvalues = None, None
|
|
114
|
+
self.pvalues, self.loglik = None, None
|
|
115
|
+
self.total_fun_eval = 0
|
|
116
|
+
# }
|
|
117
|
+
|
|
118
|
+
''' ---------------------------------------------------------- '''
|
|
119
|
+
''' Function '''
|
|
120
|
+
''' ---------------------------------------------------------- '''
|
|
121
|
+
def __init__(self):
|
|
122
|
+
# {
|
|
123
|
+
self.is_latent_class = False
|
|
124
|
+
self.reset_attributes()
|
|
125
|
+
|
|
126
|
+
self.reg_penalty = 0.00 # Define a penalty for regularization.
|
|
127
|
+
self.pval_penalty = 0
|
|
128
|
+
# NOTE: The reg_penalty value is tricky to define. If too high, convergence is restricted.
|
|
129
|
+
# Set to zero to turn off. A value of 1 seems too high.
|
|
130
|
+
|
|
131
|
+
# Variables used in derived classes and were previously not defined in this class:
|
|
132
|
+
self.num_classes, self.obs_prob = 0, 0
|
|
133
|
+
self.Xnames, self.member_params_spec = None, None
|
|
134
|
+
self.ordered_varnames = None
|
|
135
|
+
self.covariance_matrix, self.betas = None, None
|
|
136
|
+
|
|
137
|
+
# Define constants:
|
|
138
|
+
self.ftol, self.gtol = 1e-7, 1e-5
|
|
139
|
+
self.maxiter = 2000
|
|
140
|
+
|
|
141
|
+
# Define boolean flags:
|
|
142
|
+
self.converged = False
|
|
143
|
+
self.return_grad, self.return_hess, fit_intercept = True, True, False
|
|
144
|
+
self.scipy_optimisation = True
|
|
145
|
+
self.method, self.transformation = "bfgs", "boxcox"
|
|
146
|
+
|
|
147
|
+
self.trans_func = None # NEW. CHECK VALIDITY!
|
|
148
|
+
self.varnames, self.isvars, self.transvars = None, None, None
|
|
149
|
+
self.base_alt, self.alts, self.panels = None, None, None
|
|
150
|
+
self.bic, self.aic, self.mae = None, None, None # Metrics
|
|
151
|
+
self.loglik = None
|
|
152
|
+
|
|
153
|
+
# Initialise empty arrays
|
|
154
|
+
self.fxidx, self.fxtransidx = [], []
|
|
155
|
+
self.X, self.y = [], []
|
|
156
|
+
self.X_original, self.y_original = [], []
|
|
157
|
+
self.weights, self.avail = [], []
|
|
158
|
+
self.init_coeff = []
|
|
159
|
+
|
|
160
|
+
self.descr = ""
|
|
161
|
+
# }
|
|
162
|
+
|
|
163
|
+
''' ---------------------------------------------------------- '''
|
|
164
|
+
''' Function. Virtual '''
|
|
165
|
+
''' ---------------------------------------------------------- '''
|
|
166
|
+
#@abstractmethod
|
|
167
|
+
def fit(self): # {
|
|
168
|
+
pass
|
|
169
|
+
# }
|
|
170
|
+
|
|
171
|
+
''' ---------------------------------------------------------- '''
|
|
172
|
+
''' Function. Convert to numpy arrays '''
|
|
173
|
+
''' ---------------------------------------------------------- '''
|
|
174
|
+
def set_asarray(self, X, y, varnames, alts, isvars, transvars, ids, weights, panels, avail): # {
|
|
175
|
+
X = np.asarray(X)
|
|
176
|
+
y = np.asarray(y)
|
|
177
|
+
varnames = np.asarray(varnames, dtype="<U64") if varnames is not None else None
|
|
178
|
+
alts = np.asarray(alts) if alts is not None else None
|
|
179
|
+
isvars = np.asarray(isvars, dtype="<U64") if isvars is not None else None
|
|
180
|
+
transvars = np.asarray(transvars, dtype="<U64") if transvars is not None else []
|
|
181
|
+
ids = np.asarray(ids) if ids is not None else None
|
|
182
|
+
weights = np.asarray(weights) if weights is not None else None
|
|
183
|
+
panels = np.asarray(panels) if panels is not None else None
|
|
184
|
+
avail = np.asarray(avail) if avail is not None else None
|
|
185
|
+
return X, y, varnames, alts, isvars, transvars, ids, weights, panels, avail
|
|
186
|
+
# }
|
|
187
|
+
|
|
188
|
+
''' ---------------------------------------------------------- '''
|
|
189
|
+
''' Function to assing the penalty of the regularisation '''
|
|
190
|
+
''' ---------------------------------------------------------- '''
|
|
191
|
+
def reassign_penalty(self, penalty = 0):
|
|
192
|
+
self.reg_penalty = penalty
|
|
193
|
+
|
|
194
|
+
''' ---------------------------------------------------------- '''
|
|
195
|
+
''' Function. Initialise member variables '''
|
|
196
|
+
''' ---------------------------------------------------------- '''
|
|
197
|
+
def pre_process(self, alts, varnames, isvars, transvars, base_alt, fit_intercept, transformation,
|
|
198
|
+
maxiter, panels=None, correlated_vars=None, randvars=None):
|
|
199
|
+
# {
|
|
200
|
+
self.reset_attributes()
|
|
201
|
+
self.fit_start_time = time() # Set the start time for runtime calculation
|
|
202
|
+
self.isvars = [] if isvars is None else isvars
|
|
203
|
+
self.transvars = [] if transvars is None else transvars
|
|
204
|
+
self.randvars = [] if randvars is None else randvars
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
self.asvars = [v for v in varnames if ((v not in self.isvars) and
|
|
208
|
+
#(v not in self.transvars) and
|
|
209
|
+
(v not in self.randvars))]
|
|
210
|
+
# old definition of asvars used to make datasets
|
|
211
|
+
self.asvars_construct_matrix = [v for v in varnames if v not in self.isvars]
|
|
212
|
+
self.randtransvars, self.fixedtransvars = [], []
|
|
213
|
+
self.alts = np.unique(alts) # Extract unique alternatives from the data
|
|
214
|
+
self.varnames = list(varnames) # Easier to handle with lists
|
|
215
|
+
self.fit_intercept = fit_intercept
|
|
216
|
+
self.transformation = transformation
|
|
217
|
+
self.base_alt = self.alts[0] if base_alt is None else base_alt
|
|
218
|
+
self.correlated_vars = False if correlated_vars is None else correlated_vars
|
|
219
|
+
self.maxiter = maxiter
|
|
220
|
+
|
|
221
|
+
# Assign panels to self.panels if self.panels attribute does not exist
|
|
222
|
+
self.panels = getattr(self, 'panels', panels) # i.e., if not hasattr(self, 'panels'): self.panels = panels
|
|
223
|
+
# }
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
''' ---------------------------------------------------------- '''
|
|
227
|
+
''' Function '''
|
|
228
|
+
''' convert hess inverse for L-BFGS-B optimisation method '''
|
|
229
|
+
''' ---------------------------------------------------------- '''
|
|
230
|
+
def post_process(self, result, coeff_names, sample_size, hess_inv=None):
|
|
231
|
+
# {
|
|
232
|
+
self.converged = result['success']
|
|
233
|
+
self.coeff_est = result['x']
|
|
234
|
+
self.loglik = -result['fun']
|
|
235
|
+
self.total_iter = result['nit']
|
|
236
|
+
self.estim_time_sec = time() - self.fit_start_time
|
|
237
|
+
self.sample_size = sample_size
|
|
238
|
+
self.num_params = self.Kbw + self.Kchol + self.Kf + self.Kftrans + self.Kr + self.Krtrans
|
|
239
|
+
|
|
240
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
241
|
+
# Compute stderr
|
|
242
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
243
|
+
self.stderr = np.zeros_like(self.coeff_est)
|
|
244
|
+
std_err_estimated = False
|
|
245
|
+
|
|
246
|
+
if 'stderr' in result: # {
|
|
247
|
+
std_err_estimated = True
|
|
248
|
+
self.stderr = result['stderr']
|
|
249
|
+
# }
|
|
250
|
+
|
|
251
|
+
self.is_latent_class = result['is_latent_class'] if 'is_latent_class' in result else False
|
|
252
|
+
|
|
253
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
254
|
+
# Define coeff_names
|
|
255
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
256
|
+
if self.is_latent_class:
|
|
257
|
+
# {
|
|
258
|
+
new_coeff_names = np.array([])
|
|
259
|
+
|
|
260
|
+
# CONCEPTUAL ERROR. num_classes is not a member variable of DiscreteChoiceModel (?)
|
|
261
|
+
for i in range(self.num_classes):
|
|
262
|
+
# {
|
|
263
|
+
# CONCEPTUAL ERROR: get_class_X_idx is not from choice_model
|
|
264
|
+
#try:
|
|
265
|
+
# X_class_idx = self.get_class_X_idx(i, coeff_names=coeff_names)
|
|
266
|
+
# class_coeff_names = coeff_names[X_class_idx]
|
|
267
|
+
#except Exception as e:
|
|
268
|
+
#print(X_class_idx)
|
|
269
|
+
|
|
270
|
+
#X_class_idx = self.get_class_X_idx_alternative(i, coeff_names=coeff_names)
|
|
271
|
+
#print(f'after {X_class_idx}')
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class_coeff_names = coeff_names[0][i]
|
|
275
|
+
class_coeff_names = np.core.defchararray.add('class-' + str(i + 1) + ': ', class_coeff_names)
|
|
276
|
+
new_coeff_names = np.concatenate((new_coeff_names, class_coeff_names))
|
|
277
|
+
# }
|
|
278
|
+
coeff_names = new_coeff_names
|
|
279
|
+
# }
|
|
280
|
+
|
|
281
|
+
self.coeff_names = coeff_names
|
|
282
|
+
|
|
283
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
284
|
+
# Compute stderr
|
|
285
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
286
|
+
Hinv_exists = (hasattr(self,'Hinv') and not self.Hinv is None)
|
|
287
|
+
if Hinv_exists and not self.is_latent_class:
|
|
288
|
+
# {
|
|
289
|
+
if dev.using_gpu:
|
|
290
|
+
self.stderr = np.sqrt(np.abs(np.diag(self.Hinv)))
|
|
291
|
+
else:
|
|
292
|
+
# {
|
|
293
|
+
diag_arr_tmp = np.diag(np.array(self.Hinv))
|
|
294
|
+
|
|
295
|
+
# stop runtime warnings from (very small) negative values
|
|
296
|
+
# assume these occur from some floating point error and are 0.
|
|
297
|
+
|
|
298
|
+
pos_vals_idx = [ii for ii, el in enumerate(diag_arr_tmp) if el > 0]
|
|
299
|
+
diag_arr = np.zeros(len(diag_arr_tmp))
|
|
300
|
+
diag_arr[pos_vals_idx] = diag_arr_tmp[pos_vals_idx]
|
|
301
|
+
self.stderr = np.sqrt(np.abs(diag_arr))
|
|
302
|
+
# }
|
|
303
|
+
|
|
304
|
+
std_err_estimated = False if np.isnan(self.stderr).any else True
|
|
305
|
+
# }
|
|
306
|
+
|
|
307
|
+
if not std_err_estimated:
|
|
308
|
+
# {
|
|
309
|
+
if self.method == "bfgs":
|
|
310
|
+
self.stderr = np.sqrt(np.abs(np.diag(result['hess_inv'])))
|
|
311
|
+
|
|
312
|
+
if self.method == "l-bfgs-b":
|
|
313
|
+
hess = result['hess_inv'].todense()
|
|
314
|
+
self.stderr = np.sqrt(np.abs(np.diag(np.array(hess))))
|
|
315
|
+
# }
|
|
316
|
+
|
|
317
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
318
|
+
# Compute lambda_mask
|
|
319
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
320
|
+
lambda_mask = [1 if "lambda" in x else 0 for x in coeff_names]
|
|
321
|
+
|
|
322
|
+
if len(lambda_mask) != len(self.coeff_est):
|
|
323
|
+
lambda_mask = np.ones_like(self.coeff_est)
|
|
324
|
+
|
|
325
|
+
if 'is_latent_class' in result:
|
|
326
|
+
lambda_mask = np.zeros_like(self.coeff_est)
|
|
327
|
+
|
|
328
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
329
|
+
# Compute z-values
|
|
330
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
331
|
+
self.zvalues = np.nan_to_num((self.coeff_est - lambda_mask) / self.stderr)
|
|
332
|
+
self.zvalues = truncate(self.zvalues, -1e+5, 1e+5) # Set maximum (and minimum) limits
|
|
333
|
+
|
|
334
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
335
|
+
# Compute pvalues
|
|
336
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
337
|
+
if sample_size < 100: # arbitrary ... could do standard 30
|
|
338
|
+
self.pvalues = 2 * (1 - ss.t.cdf(np.abs(self.zvalues), df=sample_size))
|
|
339
|
+
else:
|
|
340
|
+
self.pvalues = 2 * (1 - ss.norm.cdf(np.abs(self.zvalues)))
|
|
341
|
+
|
|
342
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
343
|
+
# Compute Number of Non-Significant pvalues
|
|
344
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
345
|
+
non_sigs = self.num_of_exceeding_pvalues(self.pvalues, 0.0)
|
|
346
|
+
#print('log like is before', self.loglik)
|
|
347
|
+
self.loglik -= non_sigs*self.pval_penalty # penalise the non-sigs
|
|
348
|
+
#print('log like is', self.loglik)
|
|
349
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
350
|
+
# Compute aic and bic
|
|
351
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
352
|
+
num_params = len(self.coeff_est)
|
|
353
|
+
if self.is_latent_class:
|
|
354
|
+
# PENALISE IF TOO FEW #TODO FORCE a variable
|
|
355
|
+
if num_params <= self.num_classes:
|
|
356
|
+
num_exceeded = (self.num_classes)- num_params
|
|
357
|
+
self.loglik -= sample_size*num_exceeded
|
|
358
|
+
|
|
359
|
+
num_params += len(result['class_x'])
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
self.aic = 2 * num_params - 2 * self.loglik
|
|
364
|
+
self.bic = np.log(sample_size) * num_params - 2 * self.loglik
|
|
365
|
+
|
|
366
|
+
if 'is_latent_class' in result:
|
|
367
|
+
# {
|
|
368
|
+
self.class_x = result['class_x']
|
|
369
|
+
self.class_x_stderr = result['class_x_stderr']
|
|
370
|
+
# }
|
|
371
|
+
# }
|
|
372
|
+
|
|
373
|
+
def num_of_exceeding_pvalues(self, pvalues, threshold):
|
|
374
|
+
"""
|
|
375
|
+
:param pvalues: array of pvalues
|
|
376
|
+
:type pvalues:float
|
|
377
|
+
:param threshold: signficant values for hypothesis testing
|
|
378
|
+
:type threshold: float
|
|
379
|
+
:return: int
|
|
380
|
+
"""
|
|
381
|
+
return len([p for p in pvalues if p > threshold])
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
''' ------------------------------------------------------------------------ '''
|
|
385
|
+
''' Function. Setup and reshape input data after adding isvars and intercept '''
|
|
386
|
+
''' ------------------------------------------------------------------------ '''
|
|
387
|
+
def setup_design_matrdix(self, X):
|
|
388
|
+
# {
|
|
389
|
+
""" Setup the design matrix by adding the intercept when necessary and
|
|
390
|
+
converting the isvars to a dummy representation that removes the base alternative """
|
|
391
|
+
|
|
392
|
+
self.J = getattr(self, 'J', len(self.alts)) # i.e., if not hasattr(self, 'J'): self.J = len(self.alts)
|
|
393
|
+
self.N = int(len(X) / self.J)
|
|
394
|
+
self.P = 0
|
|
395
|
+
|
|
396
|
+
P_N = self.N
|
|
397
|
+
J = self.J
|
|
398
|
+
N = self.N
|
|
399
|
+
|
|
400
|
+
if self.panels is not None:
|
|
401
|
+
# {
|
|
402
|
+
# Identify and count unique values. Return two arrays
|
|
403
|
+
unique_values, counts = np.unique(self.panels, return_counts=True)
|
|
404
|
+
|
|
405
|
+
self.N = len(unique_values) # Set N as the number of unique values
|
|
406
|
+
normalized_counts = counts / self.J # Normalize counts by dividing by self.J
|
|
407
|
+
self.P_i = normalized_counts.astype(int) # Convert scaled counts to integers
|
|
408
|
+
|
|
409
|
+
# Assumption of itegrality. Should check if any element is not an integer!
|
|
410
|
+
|
|
411
|
+
self.P = np.max(self.P_i) # Identify and store maximum element
|
|
412
|
+
# }
|
|
413
|
+
else:
|
|
414
|
+
# {
|
|
415
|
+
self.P = 1
|
|
416
|
+
self.P_i = np.ones([self.N]).astype(int)
|
|
417
|
+
# }
|
|
418
|
+
|
|
419
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
420
|
+
# MAKE COPIES
|
|
421
|
+
isvars = self.isvars.copy()
|
|
422
|
+
asvars = self.asvars.copy()
|
|
423
|
+
asvars_construct_matrix = self.asvars_construct_matrix.copy()
|
|
424
|
+
randvars = self.randvars.copy()
|
|
425
|
+
randtransvars = self.randtransvars.copy()
|
|
426
|
+
fixedtransvars = self.fixedtransvars.copy()
|
|
427
|
+
varnames = self.varnames.copy()
|
|
428
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
429
|
+
|
|
430
|
+
self.varnames = np.array(varnames, dtype="<U64")
|
|
431
|
+
lst = self.varnames.tolist()
|
|
432
|
+
ispos = [lst.index(str) for str in self.isvars if str in lst] # Position of IS vars
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
ispos_old = [self.varnames.tolist().index(i) for i in self.isvars] # Position of IS va
|
|
436
|
+
# adjust index array to include isvars
|
|
437
|
+
if len(self.isvars) > 0 and not hasattr(self, 'ispos'): # check not done before...
|
|
438
|
+
# {
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
nbFalse = len(self.isvars) * (J - 1) # Calculate the number of False values to insert
|
|
442
|
+
|
|
443
|
+
# Creates a masked version of the boolean array fxidx_bool based on indices not present in the array ispos
|
|
444
|
+
fxidx_bool = np.array(self.fxtransidx, dtype="bool") # Convert elements to booleans
|
|
445
|
+
indices = np.arange(len(fxidx_bool)) # Array of indices from 0 to len(fxidx_bool) - 1.
|
|
446
|
+
mask = np.isin(indices, ispos) # True indicates that the element from indices is in ispos, and False indicates it is not.
|
|
447
|
+
fxidx_bool_masked = fxidx_bool[~mask] # negated_mask = ~mask
|
|
448
|
+
self.fxidx = np.insert(fxidx_bool_masked, 0, np.repeat(True, nbFalse))# Insert True values
|
|
449
|
+
|
|
450
|
+
fxtransidx_bool = np.array(self.fxtransidx, dtype="bool") # Convert elements to booleans
|
|
451
|
+
indices= np.arange(len(fxtransidx_bool))
|
|
452
|
+
mask = np.isin(indices, ispos)
|
|
453
|
+
fxtransidx_bool_masked = fxtransidx_bool[~mask]
|
|
454
|
+
self.fxtransidx = np.insert(fxtransidx_bool_masked, 0, np.repeat(False, nbFalse)) # Insert False values
|
|
455
|
+
|
|
456
|
+
if hasattr(self, 'rvidx'):
|
|
457
|
+
# {
|
|
458
|
+
rvidx_bool = np.array(self.rvidx, dtype=bool) # Convert self.rvidx to boolean array if it's not already
|
|
459
|
+
indices = np.arange(len(rvidx_bool))
|
|
460
|
+
mask = np.isin(indices, ispos)
|
|
461
|
+
rvidx_bool_masked = rvidx_bool[~mask]
|
|
462
|
+
self.rvidx = np.insert(rvidx_bool_masked, 0, np.repeat(False, nbFalse)) # Insert False values
|
|
463
|
+
# }
|
|
464
|
+
|
|
465
|
+
if hasattr(self, 'rvtransidx'):
|
|
466
|
+
# {
|
|
467
|
+
rvtransidx_bool = np.array(self.rvtransidx, dtype=bool) # Convert self.rvidx to boolean array if it's not already
|
|
468
|
+
indices = np.arange(len(rvtransidx_bool))
|
|
469
|
+
mask = np.isin(indices, ispos)
|
|
470
|
+
rvtransidx_bool_masked = rvtransidx_bool[~mask]
|
|
471
|
+
self.rvtransidx = np.insert(rvtransidx_bool_masked, 0, np.repeat(False, nbFalse)) # Insert False values
|
|
472
|
+
# }
|
|
473
|
+
# }
|
|
474
|
+
|
|
475
|
+
if self.fit_intercept:
|
|
476
|
+
# {
|
|
477
|
+
if '_inter' not in self.isvars: # stop running in validation
|
|
478
|
+
# {
|
|
479
|
+
ones_array = np.ones(J * N) # Create an array of ones with length J * N.
|
|
480
|
+
column_vector = ones_array[:, None] # Reshapes array to have dimensions (J * N, 1)
|
|
481
|
+
X = np.hstack((column_vector, X)) # Stack arrays horizontally
|
|
482
|
+
|
|
483
|
+
# Adjust variables to allow intercept parameters
|
|
484
|
+
# These lines of code check if self has specific attributes. If it does, it will
|
|
485
|
+
# convert it to a NumPy array with boolean dtype using np.array().
|
|
486
|
+
# If the attribute doesn't exist, it will create an array of False values using np.repeat()
|
|
487
|
+
# with length J - 1. Then, it will insert False at the beginning of the array using np.insert().
|
|
488
|
+
self.isvars = np.insert(np.array(self.isvars, dtype="<U64"), 0, '_inter')
|
|
489
|
+
self.varnames = np.insert(np.array(self.varnames, dtype="<U64"), 0, '_inter')
|
|
490
|
+
self.fxidx = np.insert(np.array(self.fxidx, dtype="bool_"), 0, np.repeat(True, J - 1))
|
|
491
|
+
self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"), 0, np.repeat(False, J - 1))
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# Get the attribute if it exits, otherwise add False, 'J-1' times
|
|
495
|
+
current_rvidx = getattr(self, 'rvidx', np.repeat(False, J - 1))
|
|
496
|
+
self.rvidx = np.insert(current_rvidx, 0, np.repeat(False, J - 1)) # Insert at beginning
|
|
497
|
+
|
|
498
|
+
current_rvtransidx = getattr(self, 'rvtransidx', np.repeat(False, J - 1))
|
|
499
|
+
self.rvtransidx = np.insert(current_rvtransidx, 0, np.repeat(False, J - 1)) # Insert at beginning
|
|
500
|
+
|
|
501
|
+
# }
|
|
502
|
+
# }
|
|
503
|
+
|
|
504
|
+
if self.transformation == "boxcox": # {
|
|
505
|
+
self.trans_func = boxcox_transformation
|
|
506
|
+
self.transform_deriv = boxcox_param_deriv
|
|
507
|
+
# }
|
|
508
|
+
|
|
509
|
+
S = np.zeros((self.N, self.P, self.J))
|
|
510
|
+
for i in range(self.N):
|
|
511
|
+
S[i, 0:self.P_i[i], :] = 1
|
|
512
|
+
|
|
513
|
+
self.S = S
|
|
514
|
+
|
|
515
|
+
lst = self.varnames.tolist()
|
|
516
|
+
self.ispos = [lst.index(str) for str in self.isvars if str in lst] # Position of isvars in varnames
|
|
517
|
+
self.aspos = [lst.index(str) for str in asvars_construct_matrix if str in lst] # Position of asvars in varnames
|
|
518
|
+
randpos = [lst.index(str) for str in randvars if str in lst] # Position of randvars
|
|
519
|
+
randtranspos = [lst.index(str) for str in randtransvars if str in lst] # bc transformed variables with random coeffs
|
|
520
|
+
fixedtranspos = [lst.index(str) for str in fixedtransvars if str in lst] # bc transformed variables with fixed coeffs
|
|
521
|
+
|
|
522
|
+
self.correlationpos = []
|
|
523
|
+
if randvars:
|
|
524
|
+
self.correlationpos = [lst.index(str) for str in self.varnames if str in self.randvars] # Position of correlated variables within randvars
|
|
525
|
+
|
|
526
|
+
if (isinstance(self.correlated_vars, list)):
|
|
527
|
+
#{
|
|
528
|
+
self.correlationpos = [lst.index(str) for str in self.varnames if str in self.correlated_vars]
|
|
529
|
+
self.uncorrelatedpos = [lst.index(str) for str in self.varnames if str not in self.correlated_vars]
|
|
530
|
+
#}
|
|
531
|
+
|
|
532
|
+
self.Kf = sum(self.fxidx) # Set number of fixed coeffs from idx
|
|
533
|
+
self.Kr = len(randpos) # Number of random coefficients
|
|
534
|
+
self.Kftrans = len(fixedtranspos) # Number of fixed coefficients of bc transformed vars
|
|
535
|
+
self.Krtrans = len(randtranspos) # Number of random coefficients of bc transformed vars
|
|
536
|
+
self.Kchol = 0 # Number of random beta cholesky factors
|
|
537
|
+
self.correlationLength = 0
|
|
538
|
+
self.Kbw = self.Kr
|
|
539
|
+
|
|
540
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
541
|
+
# set up length of betas required to estimate correlation and/or
|
|
542
|
+
# random variable standard deviations, useful for cholesky matrix
|
|
543
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
544
|
+
if (self.correlated_vars):
|
|
545
|
+
# {
|
|
546
|
+
if (isinstance(self.correlated_vars, list)):
|
|
547
|
+
# {
|
|
548
|
+
nb_corvars = len(self.correlated_vars)
|
|
549
|
+
self.correlationLength = nb_corvars
|
|
550
|
+
self.Kbw = self.Kr - nb_corvars
|
|
551
|
+
self.Kchol = int((nb_corvars * (nb_corvars + 1)) / 2)
|
|
552
|
+
# i.e., Kchol => # permutations of specified params in correlation list
|
|
553
|
+
# }
|
|
554
|
+
else: # {
|
|
555
|
+
self.correlationLength = self.Kr
|
|
556
|
+
self.Kbw = 0
|
|
557
|
+
nb_randvars = len(self.randvars)
|
|
558
|
+
self.Kchol = int((nb_randvars * (nb_randvars + 1)) / 2)
|
|
559
|
+
# i.e., correlated_vars = True, Kchol => permutations of rand vars
|
|
560
|
+
# }
|
|
561
|
+
# }
|
|
562
|
+
|
|
563
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
564
|
+
# Create design matrix for individual specific variables
|
|
565
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
566
|
+
Xis = None
|
|
567
|
+
if len(self.isvars):
|
|
568
|
+
# {
|
|
569
|
+
# Create a dummy individual specific variables for the alts
|
|
570
|
+
dummy = np.tile(np.eye(J), reps=(P_N, 1))
|
|
571
|
+
|
|
572
|
+
# Remove base alternative
|
|
573
|
+
dummy = np.delete(dummy, np.where(self.alts == self.base_alt)[0], axis=1)
|
|
574
|
+
Xis = X[:, self.ispos]
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
if Xis.dtype == np.object_:
|
|
578
|
+
Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
|
|
579
|
+
Xis = Xis_numeric
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
# Multiply dummy representation by the individual specific data
|
|
583
|
+
try:
|
|
584
|
+
|
|
585
|
+
Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
|
|
586
|
+
except:
|
|
587
|
+
Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
|
|
588
|
+
Xis = Xis_numeric
|
|
589
|
+
Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
|
|
590
|
+
# Example of filtering out non-numeric data
|
|
591
|
+
#Xis = np.array([x for x in Xis if isinstance(x, (int, float))], dtype='float64')
|
|
592
|
+
nbOf = (self.J - 1) * len(self.ispos)
|
|
593
|
+
Xis = Xis.reshape((P_N, self.J, nbOf)) # ERROR: UNEXPECTED ARGUMENT?
|
|
594
|
+
|
|
595
|
+
# }
|
|
596
|
+
else: # {
|
|
597
|
+
Xis = np.array([])
|
|
598
|
+
# }
|
|
599
|
+
|
|
600
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
601
|
+
# For alternative specific variables
|
|
602
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
603
|
+
Xas = None
|
|
604
|
+
if asvars_construct_matrix:
|
|
605
|
+
# {
|
|
606
|
+
Xas = X[:, self.aspos]
|
|
607
|
+
Xas = Xas.reshape(N, J, -1)
|
|
608
|
+
# }
|
|
609
|
+
|
|
610
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
611
|
+
# Set design matrix based on existence of asvars and isvars
|
|
612
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
613
|
+
"""
|
|
614
|
+
NEW CODE:
|
|
615
|
+
if asvars_construct_matrix: # There are vars that are not isvars
|
|
616
|
+
X = np.dstack((Xis, Xas)) if self.isvars else Xas
|
|
617
|
+
elif self.isvars:
|
|
618
|
+
X = Xis
|
|
619
|
+
else: # {
|
|
620
|
+
length = len(self.varnames)
|
|
621
|
+
if self.fit_intercept: length += (J - 1) - 1
|
|
622
|
+
X = X.reshape(-1, len(self.alts), length)
|
|
623
|
+
# }"""
|
|
624
|
+
|
|
625
|
+
# OLD CODE:
|
|
626
|
+
if len(asvars_construct_matrix) and len(self.isvars):
|
|
627
|
+
X = np.dstack((Xis, Xas))
|
|
628
|
+
elif len(asvars_construct_matrix):
|
|
629
|
+
X = Xas
|
|
630
|
+
elif (len(self.isvars)):
|
|
631
|
+
X = Xis
|
|
632
|
+
else:
|
|
633
|
+
x_varname_length = len(self.varnames) if not self.fit_intercept \
|
|
634
|
+
else (len(self.varnames) - 1) + (J - 1)
|
|
635
|
+
X = X.reshape((-1, len(self.alts), x_varname_length))
|
|
636
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
intercept_names = ["_intercept.{}".format(j) for j in self.alts
|
|
640
|
+
if j != self.base_alt] if self.fit_intercept else []
|
|
641
|
+
|
|
642
|
+
names = ["{}.{}".format(isvar, j) for isvar in isvars for j in self.alts if j != self.base_alt]
|
|
643
|
+
|
|
644
|
+
lambda_names_fixed = ["lambda.{}".format(transvar) for transvar in fixedtransvars]
|
|
645
|
+
|
|
646
|
+
lambda_names_rand = ["lambda.{}".format(transvar) for transvar in randtransvars]
|
|
647
|
+
|
|
648
|
+
randvars = [x for x in self.varnames if x in self.randvars]
|
|
649
|
+
randvars = np.array(randvars, dtype='<U64')
|
|
650
|
+
|
|
651
|
+
asvars_names = [x for x in asvars if (x not in self.randvars) and
|
|
652
|
+
(x not in fixedtransvars) and (x not in randtransvars)]
|
|
653
|
+
|
|
654
|
+
chol = ["chol." + self.varnames[self.correlationpos[i]] + "." +
|
|
655
|
+
self.varnames[self.correlationpos[j]] for i
|
|
656
|
+
in range(self.correlationLength) for j in range(i + 1)]
|
|
657
|
+
|
|
658
|
+
br_w_names = []
|
|
659
|
+
|
|
660
|
+
# three cases for corr. varnames: no corr, corr list, corr Bool (All)
|
|
661
|
+
if not (self.correlated_vars is True or isinstance(self.correlated_vars, list)):
|
|
662
|
+
if hasattr(self, "rvidx"): # avoid errors with multinomial logit
|
|
663
|
+
br_w_names = np.char.add("sd.", randvars)
|
|
664
|
+
|
|
665
|
+
if (isinstance(self.correlated_vars, list)): # if not all r.v.s correlated
|
|
666
|
+
# {
|
|
667
|
+
sd_uncorrelated_pos = [lst.index(str) for str in self.varnames
|
|
668
|
+
if str not in self.correlated_vars and str in randvars]
|
|
669
|
+
br_w_names = np.char.add("sd.", self.varnames[sd_uncorrelated_pos])
|
|
670
|
+
# }
|
|
671
|
+
|
|
672
|
+
sd_rand_trans = np.char.add("sd.", self.varnames[randtranspos])
|
|
673
|
+
|
|
674
|
+
names = np.concatenate((intercept_names, names, asvars_names, randvars,
|
|
675
|
+
chol, br_w_names, fixedtransvars,
|
|
676
|
+
lambda_names_fixed, randtransvars,
|
|
677
|
+
sd_rand_trans, lambda_names_rand))
|
|
678
|
+
|
|
679
|
+
names = np.array(names, dtype="<U64")
|
|
680
|
+
return X, names
|
|
681
|
+
# }
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
def check_instance(self, obj):
|
|
685
|
+
if "MultinomialLogit" in obj.__class__.__name__:
|
|
686
|
+
|
|
687
|
+
return True
|
|
688
|
+
elif "OrderedLogitLong" in obj.__class__.__name__:
|
|
689
|
+
return True
|
|
690
|
+
else:
|
|
691
|
+
|
|
692
|
+
return False
|
|
693
|
+
#return isinstance(self, MultinomialLogit)
|
|
694
|
+
|
|
695
|
+
def restate_idx(self, ispos, isvar, asvars):
|
|
696
|
+
#todo check if i isvar and aasvars can both be present, otherwise not needed
|
|
697
|
+
if self.check_instance(self):
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
return
|
|
701
|
+
else:
|
|
702
|
+
|
|
703
|
+
#I BELIEVE THIS IS REDUNDANT NOW
|
|
704
|
+
|
|
705
|
+
self.fxidx = np.delete(self.fxidx, ispos)
|
|
706
|
+
|
|
707
|
+
self.fxtransidx = np.delete(self.fxtransidx, ispos)
|
|
708
|
+
# self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"),
|
|
709
|
+
# 0, np.repeat(False, len(self.isvars)*(J - 1)))
|
|
710
|
+
if hasattr(self, 'rvidx'):
|
|
711
|
+
self.rvidx = np.delete(self.rvidx, ispos)
|
|
712
|
+
if hasattr(self, 'rvtransidx'):
|
|
713
|
+
self.rvtransidx = np.delete(self.rvtransidx, ispos)
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
|
|
718
|
+
def setup_design_matrix(self, X):
|
|
719
|
+
"""Setups and reshapes input data after adding isvars and intercept.
|
|
720
|
+
|
|
721
|
+
Setup the design matrix by adding the intercept when necessary and
|
|
722
|
+
converting the isvars to a dummy representation that removes the base
|
|
723
|
+
alternative.
|
|
724
|
+
"""
|
|
725
|
+
J = getattr(self, 'J', len(self.alts))
|
|
726
|
+
|
|
727
|
+
N = P_N = int(len(X)/J)
|
|
728
|
+
self.P = 0
|
|
729
|
+
self.N = N
|
|
730
|
+
self.J = J
|
|
731
|
+
if self.panels is not None:
|
|
732
|
+
# panels size
|
|
733
|
+
self.P_i = ((np.unique(self.panels, return_counts=True)[1])/J).astype(int)
|
|
734
|
+
self.P = np.max(self.P_i)
|
|
735
|
+
self.N = len(self.P_i)
|
|
736
|
+
else:
|
|
737
|
+
self.P = 1
|
|
738
|
+
self.P_i = np.ones([N]).astype(int)
|
|
739
|
+
isvars = self.isvars.copy()
|
|
740
|
+
asvars = self.asvars.copy()
|
|
741
|
+
asvars_construct_matrix = self.asvars_construct_matrix.copy()
|
|
742
|
+
randvars = self.randvars.copy()
|
|
743
|
+
randtransvars = self.randtransvars.copy()
|
|
744
|
+
fixedtransvars = self.fixedtransvars.copy()
|
|
745
|
+
varnames = self.varnames.copy()
|
|
746
|
+
self.varnames = np.array(varnames, dtype="<U64")
|
|
747
|
+
|
|
748
|
+
lst = varnames
|
|
749
|
+
lst = np.array(lst, dtype='<U64')
|
|
750
|
+
if self.fit_intercept:
|
|
751
|
+
lst = np.insert(lst, 0, '_inter').tolist()
|
|
752
|
+
if '_inter' not in self.isvars:
|
|
753
|
+
if hasattr(self, 'ispos'):
|
|
754
|
+
self.isvars = np.insert(self.isvars, 0, '_inter')
|
|
755
|
+
else:
|
|
756
|
+
self.isvars = np.array(['_inter'])
|
|
757
|
+
else:
|
|
758
|
+
lst = lst.tolist()
|
|
759
|
+
|
|
760
|
+
ispos = [lst.index(str) for str in self.isvars if str in lst] # Position of IS vars
|
|
761
|
+
#ispos = [self.varnames.tolist().index(i) for i in self.isvars] # Position of IS vars
|
|
762
|
+
|
|
763
|
+
# adjust index array to include isvars
|
|
764
|
+
|
|
765
|
+
if len(self.isvars) > 0 and not hasattr(self, 'ispos'): # check not done before...
|
|
766
|
+
#self.fxidx = np.insert(np.array(self.fxidx, dtype="bool_"), 0,
|
|
767
|
+
# np.repeat(True, len(self.isvars)*(J - 1)))
|
|
768
|
+
where_h = ispos
|
|
769
|
+
where_h =0
|
|
770
|
+
|
|
771
|
+
|
|
772
|
+
self.restate_idx(ispos, isvars, asvars)
|
|
773
|
+
self.fxidx =np.insert(np.array(self.fxidx, dtype="bool_"), where_h, np.repeat(True, len(self.isvars)*(J-1)))
|
|
774
|
+
self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"), where_h, np.repeat(False, len(self.isvars)*(J - 1)))
|
|
775
|
+
#self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"),
|
|
776
|
+
# 0, np.repeat(False, len(self.isvars)*(J - 1)))
|
|
777
|
+
if hasattr(self, 'rvidx'):
|
|
778
|
+
self.rvidx = np.insert(np.array(self.rvidx, dtype="bool_"), where_h,
|
|
779
|
+
np.repeat(False, len(self.isvars)*(J -1)))
|
|
780
|
+
if hasattr(self, 'rvtransidx'):
|
|
781
|
+
self.rvtransidx = np.insert(np.array(self.rvtransidx, dtype="bool_"),
|
|
782
|
+
0, np.repeat(False, len(self.isvars)*(J - 1)))
|
|
783
|
+
else:
|
|
784
|
+
self.restate_idx(ispos, isvars, asvars)
|
|
785
|
+
if self.fit_intercept:
|
|
786
|
+
X = np.hstack((np.ones(J*N)[:, None], X))
|
|
787
|
+
#X=np.hstack(np.tile(np.eye(J), reps=(P_N, 1)),X)
|
|
788
|
+
#eye = np.tile(np.eye(J), reps=(P_N, 1))
|
|
789
|
+
#X = np.hstack((eye,X))
|
|
790
|
+
if '_inter' not in self.isvars: # stop running in validation
|
|
791
|
+
# adjust variables to allow intercept parameters
|
|
792
|
+
self.isvars = np.insert(np.array(self.isvars, dtype="<U64"), 0, '_inter')
|
|
793
|
+
self.varnames = np.insert(np.array(self.varnames, dtype="<U64"), 0, '_inter')
|
|
794
|
+
self.fxidx = np.insert(np.array(self.fxidx, dtype="bool_"), 0, np.repeat(True, J-1))
|
|
795
|
+
if hasattr(self, 'rvidx'):
|
|
796
|
+
self.rvidx = np.insert(np.array(self.rvidx, dtype="bool_"), 0, np.repeat(False, J-1))
|
|
797
|
+
self.fxtransidx = np.insert(np.array(self.fxtransidx, dtype="bool_"), 0, np.repeat(False, J-1))
|
|
798
|
+
if hasattr(self, 'rvtransidx'):
|
|
799
|
+
self.rvtransidx = np.insert(np.array(self.rvtransidx, dtype="bool_"), 0, np.repeat(False, J-1))
|
|
800
|
+
|
|
801
|
+
|
|
802
|
+
if self.transformation == "boxcox":
|
|
803
|
+
self.trans_func = boxcox_transformation
|
|
804
|
+
self.transform_deriv = boxcox_param_deriv
|
|
805
|
+
|
|
806
|
+
S = np.zeros((self.N, self.P, self.J))
|
|
807
|
+
for i in range(self.N):
|
|
808
|
+
S[i, 0:self.P_i[i], :] = 1
|
|
809
|
+
self.S = S
|
|
810
|
+
|
|
811
|
+
#ispos = [self.varnames.tolist().index(i) for i in self.isvars[self.isvars != '_inter']] # Position of IS vars
|
|
812
|
+
aspos = [self.varnames.tolist().index(i) for i in asvars_construct_matrix] # Position of AS vars
|
|
813
|
+
self.aspos = np.array(aspos) # saved for later use
|
|
814
|
+
self.ispos = np.array(ispos)
|
|
815
|
+
randpos = [self.varnames.tolist().index(i) for i in randvars] # Position of AS vars
|
|
816
|
+
randtranspos = [self.varnames.tolist().index(i) for i in randtransvars] # bc transformed variables with random coeffs
|
|
817
|
+
fixedtranspos = [self.varnames.tolist().index(i) for i in fixedtransvars] # bc transformed variables with fixed coeffs
|
|
818
|
+
|
|
819
|
+
|
|
820
|
+
self.correlationpos = []
|
|
821
|
+
self.uncorrelatedpos = []
|
|
822
|
+
if randvars:
|
|
823
|
+
self.correlationpos = [lst.index(str) for str in self.varnames if
|
|
824
|
+
str in self.randvars] # Position of correlated variables within randvars
|
|
825
|
+
|
|
826
|
+
if (isinstance(self.correlated_vars, list)):
|
|
827
|
+
# {
|
|
828
|
+
self.correlationpos = [lst.index(str) for str in self.varnames if str in self.correlated_vars]
|
|
829
|
+
self.uncorrelatedpos = [lst.index(str) for str in self.varnames if str not in self.correlated_vars and str in randvars]
|
|
830
|
+
# }
|
|
831
|
+
|
|
832
|
+
self.Kf = sum(self.fxidx) # set number of fixed coeffs from idx
|
|
833
|
+
self.Kr = len(randpos) # Number of random coefficients
|
|
834
|
+
self.Kftrans = len(fixedtranspos) # Number of fixed coefficients of bc transformed vars
|
|
835
|
+
self.Krtrans = len(randtranspos) # Number of random coefficients of bc transformed vars
|
|
836
|
+
self.Kchol = 0 # Number of random beta cholesky factors
|
|
837
|
+
self.correlationLength = 0
|
|
838
|
+
self.Kbw = self.Kr
|
|
839
|
+
|
|
840
|
+
# set up length of betas required to estimate correlation and/or
|
|
841
|
+
# random variable standard deviations, useful for cholesky matrix
|
|
842
|
+
if (self.correlated_vars):
|
|
843
|
+
if (isinstance(self.correlated_vars, list)):
|
|
844
|
+
self.correlationLength = len(self.correlated_vars)
|
|
845
|
+
self.Kbw = self.Kr - len(self.correlated_vars)
|
|
846
|
+
else:
|
|
847
|
+
self.correlationLength = self.Kr
|
|
848
|
+
self.Kbw = 0
|
|
849
|
+
if (self.correlated_vars):
|
|
850
|
+
if (isinstance(self.correlated_vars, list)):
|
|
851
|
+
# Kchol, permutations of specified params in correlation list
|
|
852
|
+
self.Kchol = int((len(self.correlated_vars) *
|
|
853
|
+
(len(self.correlated_vars)+1))/2)
|
|
854
|
+
else:
|
|
855
|
+
# i.e. correlation = True, Kchol permutations of rand vars
|
|
856
|
+
self.Kchol = int((len(self.randvars) *
|
|
857
|
+
(len(self.randvars)+1))/2)
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
if (self.correlated_vars):
|
|
861
|
+
# {
|
|
862
|
+
if (isinstance(self.correlated_vars, list)):
|
|
863
|
+
# {
|
|
864
|
+
nb_corvars = len(self.correlated_vars)
|
|
865
|
+
self.correlationLength = nb_corvars
|
|
866
|
+
self.Kbw = self.Kr - nb_corvars
|
|
867
|
+
self.Kchol = int((nb_corvars * (nb_corvars + 1)) / 2)
|
|
868
|
+
# i.e., Kchol => # permutations of specified params in correlation list
|
|
869
|
+
# }
|
|
870
|
+
else: # {
|
|
871
|
+
self.correlationLength = self.Kr
|
|
872
|
+
self.Kbw = 0
|
|
873
|
+
nb_randvars = len(self.randvars)
|
|
874
|
+
self.Kchol = int((nb_randvars * (nb_randvars + 1)) / 2)
|
|
875
|
+
# i.e., correlated_vars = True, Kchol => permutations of rand vars
|
|
876
|
+
# }
|
|
877
|
+
# }
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
# Create design matrix
|
|
881
|
+
# For individual specific variables
|
|
882
|
+
Xis = None
|
|
883
|
+
if len(self.isvars) or len(self.ispos):
|
|
884
|
+
# {
|
|
885
|
+
# Create a dummy individual specific variables for the alts
|
|
886
|
+
dummy = np.tile(np.eye(J), reps=(P_N, 1))
|
|
887
|
+
|
|
888
|
+
# Remove base alternative
|
|
889
|
+
dummy = np.delete(dummy, np.where(self.alts == self.base_alt)[0], axis=1)
|
|
890
|
+
Xis = X[:, self.ispos]
|
|
891
|
+
|
|
892
|
+
if Xis.dtype == np.object_:
|
|
893
|
+
Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
|
|
894
|
+
Xis = Xis_numeric
|
|
895
|
+
|
|
896
|
+
# Multiply dummy representation by the individual specific data
|
|
897
|
+
try:
|
|
898
|
+
|
|
899
|
+
Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
|
|
900
|
+
except:
|
|
901
|
+
Xis_numeric = pd.to_numeric(Xis.flatten(), errors='coerce').reshape(Xis.shape)
|
|
902
|
+
Xis = Xis_numeric
|
|
903
|
+
Xis = np.einsum('nj,nk->njk', Xis, dummy, dtype="float64")
|
|
904
|
+
# Example of filtering out non-numeric data
|
|
905
|
+
# Xis = np.array([x for x in Xis if isinstance(x, (int, float))], dtype='float64')
|
|
906
|
+
nbOf = (self.J - 1) * len(self.ispos)
|
|
907
|
+
Xis = Xis.reshape((P_N, self.J, nbOf)) # ERROR: UNEXPECTED ARGUMENT?
|
|
908
|
+
|
|
909
|
+
# }
|
|
910
|
+
else: # {
|
|
911
|
+
Xis = np.array([])
|
|
912
|
+
# }
|
|
913
|
+
# For alternative specific variables
|
|
914
|
+
Xas = None
|
|
915
|
+
if asvars_construct_matrix:
|
|
916
|
+
Xas = X[:, aspos]
|
|
917
|
+
Xas = Xas.reshape(N, J, -1)
|
|
918
|
+
|
|
919
|
+
# Set design matrix based on existance of asvars and isvars
|
|
920
|
+
if len(asvars_construct_matrix) and len(self.isvars):
|
|
921
|
+
X = np.dstack((Xis, Xas))
|
|
922
|
+
elif len(asvars_construct_matrix):
|
|
923
|
+
X = Xas
|
|
924
|
+
elif (len(self.isvars)):
|
|
925
|
+
X = Xis
|
|
926
|
+
else:
|
|
927
|
+
x_varname_length = len(self.varnames) if not self.fit_intercept \
|
|
928
|
+
else (len(self.varnames) - 1)+(J-1)
|
|
929
|
+
X = X.reshape(-1, len(self.alts), x_varname_length)
|
|
930
|
+
|
|
931
|
+
intercept_names = ["_intercept.{}".format(j) for j in self.alts
|
|
932
|
+
if j != self.base_alt] if self.fit_intercept else []
|
|
933
|
+
|
|
934
|
+
names = ["{}.{}".format(isvar, j) for isvar in isvars for j in self.alts if j != self.base_alt]
|
|
935
|
+
|
|
936
|
+
lambda_names_fixed = ["lambda.{}".format(transvar) for transvar in fixedtransvars]
|
|
937
|
+
|
|
938
|
+
lambda_names_rand = ["lambda.{}".format(transvar) for transvar in randtransvars]
|
|
939
|
+
|
|
940
|
+
randvars = [x for x in self.varnames if x in self.randvars]
|
|
941
|
+
randvars = np.array(randvars, dtype='<U64')
|
|
942
|
+
|
|
943
|
+
asvars_names = [x for x in asvars if (x not in self.randvars) and
|
|
944
|
+
(x not in fixedtransvars) and (x not in randtransvars)]
|
|
945
|
+
|
|
946
|
+
|
|
947
|
+
chol = ["chol." + self.varnames[self.correlationpos[i]] + "." +
|
|
948
|
+
self.varnames[self.correlationpos[j]] for i
|
|
949
|
+
in range(self.correlationLength) for j in range(i + 1)]
|
|
950
|
+
|
|
951
|
+
br_w_names = []
|
|
952
|
+
|
|
953
|
+
# three cases for corr. varnames: no corr, corr list, corr Bool (All)
|
|
954
|
+
if not (self.correlated_vars is True or isinstance(self.correlated_vars, list)):
|
|
955
|
+
if hasattr(self, "rvidx"): # avoid errors with multinomial logit
|
|
956
|
+
br_w_names = np.char.add("sd.", randvars)
|
|
957
|
+
|
|
958
|
+
if (isinstance(self.correlated_vars, list)): # if not all r.v.s correlated
|
|
959
|
+
# {
|
|
960
|
+
if self.fit_intercept:
|
|
961
|
+
if '_inter' not in self.varnames:
|
|
962
|
+
names_for_p = np.insert(self.varnames, 0, '_inter')
|
|
963
|
+
else:
|
|
964
|
+
names_for_p = self.varnames
|
|
965
|
+
else:
|
|
966
|
+
names_for_p = self.varnames
|
|
967
|
+
sd_uncorrelated_pos = [lst.index(str) for str in names_for_p
|
|
968
|
+
if str not in self.correlated_vars and str in randvars]
|
|
969
|
+
br_w_names = np.char.add("sd.", names_for_p[sd_uncorrelated_pos])
|
|
970
|
+
# }
|
|
971
|
+
|
|
972
|
+
sd_rand_trans = np.char.add("sd.", self.varnames[randtranspos])
|
|
973
|
+
|
|
974
|
+
#if isvars then isvars gets positionemed to fromt
|
|
975
|
+
if len(self.isvars) >0:
|
|
976
|
+
|
|
977
|
+
inter_o = ["_inter" for j in self.alts
|
|
978
|
+
if j != self.base_alt] if self.fit_intercept else ['_inter']
|
|
979
|
+
|
|
980
|
+
names_o = [isvar for isvar in isvars for j in self.alts if j != self.base_alt]
|
|
981
|
+
restvars = [var for var in self.varnames if var not in names_o and var not in inter_o]
|
|
982
|
+
self.ordered_varnames = names_o + restvars
|
|
983
|
+
|
|
984
|
+
elif self.fit_intercept:
|
|
985
|
+
inter_o = ["_inter" for j in self.alts
|
|
986
|
+
if j != self.base_alt]
|
|
987
|
+
restvars = [var for var in self.varnames if var not in inter_o]
|
|
988
|
+
self.ordered_varnames = restvars
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
else:
|
|
993
|
+
self.ordered_varnames = self.varnames
|
|
994
|
+
np.insert(np.array(self.varnames, dtype="<U64"), 0, '_inter')
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
names = np.concatenate((intercept_names, names, asvars_names, randvars,
|
|
998
|
+
chol, br_w_names, fixedtransvars,
|
|
999
|
+
lambda_names_fixed, randtransvars,
|
|
1000
|
+
sd_rand_trans, lambda_names_rand))
|
|
1001
|
+
|
|
1002
|
+
names = np.array(names, dtype="<U64")
|
|
1003
|
+
return X, names
|
|
1004
|
+
|
|
1005
|
+
''' ---------------------------------------------------------- '''
|
|
1006
|
+
''' Function. Check data is in long format '''
|
|
1007
|
+
''' ---------------------------------------------------------- '''
|
|
1008
|
+
def check_long_format_consistency(self, ids, alts, sorted_idx):
|
|
1009
|
+
# {
|
|
1010
|
+
alts = alts[sorted_idx]
|
|
1011
|
+
uq_alt = np.unique(alts)
|
|
1012
|
+
|
|
1013
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1014
|
+
# Unused code:
|
|
1015
|
+
# expect_alt = np.tile(uq_alt, int(len(ids)/len(uq_alt)))
|
|
1016
|
+
# if not np.array_equal(alts, expect_alt):
|
|
1017
|
+
# raise ValueError('inconsistent alts values in long format')
|
|
1018
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1019
|
+
|
|
1020
|
+
_, obs_by_id = np.unique(ids, return_counts=True)
|
|
1021
|
+
|
|
1022
|
+
""" An error is raised if the array of alternative indexes is incomplete. """
|
|
1023
|
+
if not np.all(obs_by_id / len(uq_alt)): # Multiple of J
|
|
1024
|
+
raise ValueError('inconsistent alts and ids values in long format')
|
|
1025
|
+
# }
|
|
1026
|
+
|
|
1027
|
+
''' ---------------------------------------------------------- '''
|
|
1028
|
+
''' Function. Check for data consistency.Set to lonf format '''
|
|
1029
|
+
''' ---------------------------------------------------------- '''
|
|
1030
|
+
def arrange_long_format(self, X, y, ids, alts, panels=None):
|
|
1031
|
+
# {
|
|
1032
|
+
if ids is not None:
|
|
1033
|
+
# {
|
|
1034
|
+
pnl = panels if panels is not None else np.ones(len(ids))
|
|
1035
|
+
alts = alts.astype(str)
|
|
1036
|
+
alts = alts if len(alts) == len(ids) else np.tile(alts, int(len(ids) / len(alts)))
|
|
1037
|
+
cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids', 'alts'], 'formats': ['<f4', '<f4', '<U64']})
|
|
1038
|
+
cols['panels'], cols['ids'], cols['alts'] = pnl, ids, alts # Record
|
|
1039
|
+
|
|
1040
|
+
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
1041
|
+
# Unused code:
|
|
1042
|
+
# sorted_idx = np.argsort(cols, order=['panels', 'ids', 'alts'])
|
|
1043
|
+
# X, y = X[sorted_idx], y[sorted_idx]
|
|
1044
|
+
# if panels is not None: panels = panels[sorted_idx]
|
|
1045
|
+
# self._check_long_format_consistency(ids, alts, sorted_idx)
|
|
1046
|
+
# }
|
|
1047
|
+
return X, y, panels
|
|
1048
|
+
# }
|
|
1049
|
+
|
|
1050
|
+
''' ---------------------------------------------------------- '''
|
|
1051
|
+
''' Function. Validate potential mistakes in the input data '''
|
|
1052
|
+
''' ---------------------------------------------------------- '''
|
|
1053
|
+
def validate_inputs(self, X, y, alts, varnames):
|
|
1054
|
+
# {
|
|
1055
|
+
if varnames is None:
|
|
1056
|
+
raise ValueError('The parameter varnames is required')
|
|
1057
|
+
if alts is None:
|
|
1058
|
+
raise ValueError('The parameter alternatives is required')
|
|
1059
|
+
if X.ndim != 2:
|
|
1060
|
+
raise ValueError("X must be an array of two dimensions in long format")
|
|
1061
|
+
if y.ndim != 1:
|
|
1062
|
+
raise ValueError("y must be an array of one dimension in long format")
|
|
1063
|
+
if len(varnames) != X.shape[1]:
|
|
1064
|
+
raise ValueError("The length of varnames must match the number of columns in X")
|
|
1065
|
+
# }
|
|
1066
|
+
|
|
1067
|
+
''' ------------------------------------------------------------- '''
|
|
1068
|
+
''' Function. Regularization of the loglike '''
|
|
1069
|
+
''' Flag affects whether penalty is added or subtracted '''
|
|
1070
|
+
''' ------------------------------------------------------------- '''
|
|
1071
|
+
def regularize_loglik(self, betas, negative=False):
|
|
1072
|
+
# {
|
|
1073
|
+
# Use lasso regularisation L2 to penalise the function
|
|
1074
|
+
penalty = self.reg_penalty * np.sum(np.square(betas))
|
|
1075
|
+
return -penalty if negative else penalty
|
|
1076
|
+
# }
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
''' ---------------------------------------------------------- '''
|
|
1081
|
+
''' Function. Compute the log-likelihood of the null model '''
|
|
1082
|
+
''' |y| = #samples * #choices '''
|
|
1083
|
+
''' ---------------------------------------------------------- '''
|
|
1084
|
+
def get_loglik_null(self): # {
|
|
1085
|
+
factor = 1.0 / self.J
|
|
1086
|
+
y_ = self.y * factor # Scale each element by 1/J
|
|
1087
|
+
lik = np.sum(y_, axis=1) # Compute row sums => |lik| = #samples
|
|
1088
|
+
loglik = np.log(lik) # Log each element
|
|
1089
|
+
loglik = -2 * np.sum(loglik) # Sum the elements => |loglik| = 1
|
|
1090
|
+
return loglik
|
|
1091
|
+
# }
|
|
1092
|
+
|
|
1093
|
+
''' ---------------------------------------------------------- '''
|
|
1094
|
+
''' Function. Print the coefficients and estimation outputs '''
|
|
1095
|
+
''' ---------------------------------------------------------- '''
|
|
1096
|
+
def summarise(self, file=None):
|
|
1097
|
+
# {
|
|
1098
|
+
print("", file=file)
|
|
1099
|
+
print(f"Choice Model: {self.descr}", file=file)
|
|
1100
|
+
|
|
1101
|
+
if self.coeff_est is None: # {
|
|
1102
|
+
warnings.warn("The current model has not been yet estimated", UserWarning)
|
|
1103
|
+
return
|
|
1104
|
+
# }
|
|
1105
|
+
|
|
1106
|
+
if not self.converged: # {
|
|
1107
|
+
print("-" * 50, file=file)
|
|
1108
|
+
print("WARNING: Convergence was not reached during estimation. "
|
|
1109
|
+
"The given estimates may not be reliable", file=file)
|
|
1110
|
+
if hasattr(self, "gtol_res"):
|
|
1111
|
+
print("gtol:", self.gtol, file=file)
|
|
1112
|
+
print("Final gradient norm:", self.gtol_res, file=file)
|
|
1113
|
+
print('*' * 50, file=file)
|
|
1114
|
+
# }
|
|
1115
|
+
|
|
1116
|
+
print("Estimation time= {:.1f} seconds".format(self.estim_time_sec), file=file)
|
|
1117
|
+
|
|
1118
|
+
if hasattr(self, 'pred_prob'):
|
|
1119
|
+
# {
|
|
1120
|
+
print("", file=file)
|
|
1121
|
+
print("Proportion of alternatives: observed choice", file=file)
|
|
1122
|
+
print(self.obs_prob, file=file)
|
|
1123
|
+
|
|
1124
|
+
# CONCEPTUAL ERROR: obs_prob is not defined
|
|
1125
|
+
|
|
1126
|
+
print("", file=file)
|
|
1127
|
+
print("Proportion of alternatives: predicted choice", file=file)
|
|
1128
|
+
print(self.pred_prob, file=file)
|
|
1129
|
+
# }
|
|
1130
|
+
|
|
1131
|
+
if hasattr(self, 'class_freq'): # {
|
|
1132
|
+
print("", file=file)
|
|
1133
|
+
print("Estimated proportion of classes", file=file)
|
|
1134
|
+
print(self.class_freq, file=file)
|
|
1135
|
+
# }
|
|
1136
|
+
|
|
1137
|
+
print("", file=file)
|
|
1138
|
+
print("Table.", file=file)
|
|
1139
|
+
fmt = "{:19} {:13.10f} {:13.10f} {:13.10f} {:13.3g} {:3}"
|
|
1140
|
+
coeff_name_str_length = 19
|
|
1141
|
+
if self.is_latent_class:
|
|
1142
|
+
# {
|
|
1143
|
+
coeff_name_str_length = 28
|
|
1144
|
+
print("-" * 84, file=file)
|
|
1145
|
+
fmt = "{:28} {:13.10f} {:13.10f} {:13.10f} {:13.3g} {:3}"
|
|
1146
|
+
print("{:28} {:>13} {:>13} {:>13} {:>13}"
|
|
1147
|
+
.format("Coefficient", "Estimate", "Std.Err.", "z-val", "P>|z|"), file=file)
|
|
1148
|
+
print("-" * 84, file=file)
|
|
1149
|
+
# }
|
|
1150
|
+
else: # {
|
|
1151
|
+
print("-" * 75, file=file)
|
|
1152
|
+
print("{:19} {:>13} {:>13} {:>13} {:>13}"
|
|
1153
|
+
.format("Coefficient", "Estimate", "Std.Err.", "z-val", "P>|z|"), file=file)
|
|
1154
|
+
print("-" * 75, file=file)
|
|
1155
|
+
# }
|
|
1156
|
+
|
|
1157
|
+
# Dictionary to map p-value thresholds to significance symbols
|
|
1158
|
+
significance_symbols = {0.001: "***", 0.01: "**", 0.05: "*", 0.1: ".", 1.01: ""}
|
|
1159
|
+
sig_sim_items = significance_symbols.items()
|
|
1160
|
+
|
|
1161
|
+
# Iterate through the coefficients
|
|
1162
|
+
for i, coeff in enumerate(self.coeff_est):
|
|
1163
|
+
# {
|
|
1164
|
+
# Get the corresponding significance symbol
|
|
1165
|
+
try:
|
|
1166
|
+
signif = next(symbol for threshold, symbol in sig_sim_items if self.pvalues[i] < threshold)
|
|
1167
|
+
except Exception as e:
|
|
1168
|
+
print(e)
|
|
1169
|
+
signif = ""
|
|
1170
|
+
tmp = self.coeff_names[i][:coeff_name_str_length]
|
|
1171
|
+
print(fmt.format(tmp, self.coeff_est[i], self.stderr[i], self.zvalues[i], self.pvalues[i], signif), file=file)
|
|
1172
|
+
# }
|
|
1173
|
+
|
|
1174
|
+
# CONCEPTUAL ERROR: THIS CODE SHOULD BE IN 'latent*.py'
|
|
1175
|
+
if self.is_latent_class:
|
|
1176
|
+
# {
|
|
1177
|
+
zvalues = np.nan_to_num(self.class_x / self.class_x_stderr)
|
|
1178
|
+
zvalues = truncate_lower(zvalues, -1e+5)
|
|
1179
|
+
pvalues = 2 * (1 - ss.t.cdf(np.abs(zvalues), df=self.sample_size))
|
|
1180
|
+
self.pvalues_member = pvalues
|
|
1181
|
+
coeff_names_member = np.array([])
|
|
1182
|
+
|
|
1183
|
+
# CONCEPTUAL ERROR: self.member_params_spec is not defined
|
|
1184
|
+
for ii, member_class in enumerate(self.member_params_spec):
|
|
1185
|
+
# {
|
|
1186
|
+
# Logic for isvars
|
|
1187
|
+
# Remove lambda coeffs from member class param naget-mes
|
|
1188
|
+
|
|
1189
|
+
# CONCEPTUAL ERROR. get_member_X_idx is from latent_class_model.py and latent_class_mixed_model.py
|
|
1190
|
+
member_class_names_idx = self.get_member_X_idx(ii, coeff_names=member_class)
|
|
1191
|
+
|
|
1192
|
+
lambda_idx = np.where(np.char.find(np.array(member_class, dtype=str), 'lambda') != -1)[0]
|
|
1193
|
+
sd_idx = np.where(np.char.find(np.array(member_class, dtype=str), 'sd') != -1)[0]
|
|
1194
|
+
chol_idx = np.where(np.char.find(np.array(member_class, dtype=str), 'chol') != -1)[0]
|
|
1195
|
+
|
|
1196
|
+
member_class_names_idx = [x for x in member_class_names_idx
|
|
1197
|
+
if x not in sd_idx and x not in chol_idx
|
|
1198
|
+
and x not in lambda_idx]
|
|
1199
|
+
|
|
1200
|
+
member_class_names_idx = np.sort(member_class_names_idx)
|
|
1201
|
+
member_class_names_idx = np.array(member_class_names_idx, dtype='int32')
|
|
1202
|
+
member_class_names = member_class
|
|
1203
|
+
member_class_names = np.array(member_class_names, dtype='<U')
|
|
1204
|
+
# CONCEPTUAL ERROR. membership_as_probability is not a member variable
|
|
1205
|
+
if self.membership_as_probability:
|
|
1206
|
+
member_class_names = ["probability"]
|
|
1207
|
+
|
|
1208
|
+
class_coeff_names = np.core.defchararray.add('class-' + str(ii + 2) + ': ', member_class_names)
|
|
1209
|
+
|
|
1210
|
+
if '_inter' in self.member_params_spec[ii]:
|
|
1211
|
+
# {
|
|
1212
|
+
print('off for now')
|
|
1213
|
+
'''
|
|
1214
|
+
inter_name = 'class-' + str(ii + 2) + ': ' + 'constant'
|
|
1215
|
+
class_coeff_names = np.concatenate(([inter_name], class_coeff_names))
|
|
1216
|
+
'''
|
|
1217
|
+
# }
|
|
1218
|
+
|
|
1219
|
+
coeff_names_member = np.concatenate((coeff_names_member, class_coeff_names))
|
|
1220
|
+
# }
|
|
1221
|
+
|
|
1222
|
+
self.coeff_names_member = coeff_names_member
|
|
1223
|
+
print("-" * 84, file=file)
|
|
1224
|
+
print("{:30} {:>13} {:>13} {:>13} {:>13}"
|
|
1225
|
+
.format("Class Member Coeff", "Estimate", "Std.Err.", "z-val", "P>|z|"), file=file)
|
|
1226
|
+
print("-" * 84, file=file)
|
|
1227
|
+
|
|
1228
|
+
for ii, coeff_name in enumerate(coeff_names_member):
|
|
1229
|
+
# {
|
|
1230
|
+
# Get the corresponding significance symbol
|
|
1231
|
+
signif = [symbol for threshold, symbol in sig_sim_items if self.pvalues_member[ii] < threshold][0]
|
|
1232
|
+
|
|
1233
|
+
# note below: offset coeff_names by num_params to ignore class0
|
|
1234
|
+
print(fmt.format(coeff_name[:30], self.class_x[ii],
|
|
1235
|
+
self.class_x_stderr[ii], zvalues[ii], pvalues[ii], signif), file=file)
|
|
1236
|
+
# }
|
|
1237
|
+
# }
|
|
1238
|
+
|
|
1239
|
+
print("-" * 84) if self.is_latent_class else print("-" * 75, file=file)
|
|
1240
|
+
print("Significance: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1", file=file)
|
|
1241
|
+
print("", file=file)
|
|
1242
|
+
|
|
1243
|
+
text = f"LOGLIK = {self.loglik:0.3f}; AIC = {self.aic:0.3f}; BIC = {self.bic:0.3f};"
|
|
1244
|
+
|
|
1245
|
+
if self.mae is not None:
|
|
1246
|
+
text += f"MAE= {self.mae:0.3f};"
|
|
1247
|
+
|
|
1248
|
+
loglik_null = self.get_loglik_null()
|
|
1249
|
+
adjust_lik_ratio = 1 - (self.aic / loglik_null)
|
|
1250
|
+
self.adjust_lik_ratio = adjust_lik_ratio
|
|
1251
|
+
|
|
1252
|
+
text += f" ADJLIK RATIO: {adjust_lik_ratio:.3f}"
|
|
1253
|
+
print(text, file=file)
|
|
1254
|
+
# }
|
|
1255
|
+
|
|
1256
|
+
|
|
1257
|
+
''' ---------------------------------------------------------- '''
|
|
1258
|
+
''' Function. '''
|
|
1259
|
+
''' ---------------------------------------------------------- '''
|
|
1260
|
+
def print_matrix(self, str_mat, descr): # {
|
|
1261
|
+
print(descr)
|
|
1262
|
+
fmt = "{:11}"
|
|
1263
|
+
for row in str_mat:
|
|
1264
|
+
for el in row: print(fmt.format(el), end=' ')
|
|
1265
|
+
print('')
|
|
1266
|
+
# }
|
|
1267
|
+
|
|
1268
|
+
''' ---------------------------------------------------------- '''
|
|
1269
|
+
''' Function. '''
|
|
1270
|
+
''' ---------------------------------------------------------- '''
|
|
1271
|
+
def setup_print(self, mat):
|
|
1272
|
+
# {
|
|
1273
|
+
corr_varnames = [self.varnames[pos] for pos in self.correlationpos]
|
|
1274
|
+
K = len(corr_varnames)
|
|
1275
|
+
str_mat = np.array([], dtype="<U64")
|
|
1276
|
+
str_mat = np.append(str_mat, np.array([''] + corr_varnames)) # top row of coeff names
|
|
1277
|
+
mat = np.round(mat[0:K, 0:K], 8)
|
|
1278
|
+
|
|
1279
|
+
# ____________________________________________________
|
|
1280
|
+
if dev.using_gpu: mat = dev.convert_array_cpu(mat)
|
|
1281
|
+
# ____________________________________________________
|
|
1282
|
+
|
|
1283
|
+
for ii, row in enumerate(mat): # {
|
|
1284
|
+
str_mat = np.append(str_mat, corr_varnames[ii])
|
|
1285
|
+
str_mat = np.append(str_mat, np.array(row))
|
|
1286
|
+
# }
|
|
1287
|
+
str_mat = str_mat.reshape((K + 1, K + 1)) # + 1 for coeff names row/col
|
|
1288
|
+
return str_mat
|
|
1289
|
+
# }
|
|
1290
|
+
|
|
1291
|
+
''' ---------------------------------------------------------- '''
|
|
1292
|
+
''' Function. Print matrix '''
|
|
1293
|
+
''' ---------------------------------------------------------- '''
|
|
1294
|
+
def print_mat(self, mat, descr): # {
|
|
1295
|
+
str_mat = self.setup_print(mat)
|
|
1296
|
+
self.print_matrix(str_mat, descr)
|
|
1297
|
+
# }
|
|
1298
|
+
|
|
1299
|
+
''' ---------------------------------------------------------- '''
|
|
1300
|
+
''' Function. Return fitted values '''
|
|
1301
|
+
''' ---------------------------------------------------------- '''
|
|
1302
|
+
def fitted(self, type="parameters"): # {
|
|
1303
|
+
if type == "parameters" and hasattr(self, 'pch2_res'):
|
|
1304
|
+
return self.pch2_res
|
|
1305
|
+
# }
|
|
1306
|
+
|
|
1307
|
+
''' ---------------------------------------------------------- '''
|
|
1308
|
+
''' Function '''
|
|
1309
|
+
''' ---------------------------------------------------------- '''
|
|
1310
|
+
def print_stdev(self, stdevs, names): # {
|
|
1311
|
+
fmt = "{:11}"
|
|
1312
|
+
print('Standard Deviations')
|
|
1313
|
+
for name in names: print(fmt.format(name), end=' ')
|
|
1314
|
+
print('')
|
|
1315
|
+
for std in stdevs: print(fmt.format(std), end=' ')
|
|
1316
|
+
print('')
|
|
1317
|
+
# }
|
|
1318
|
+
|
|
1319
|
+
''' ---------------------------------------------------------- '''
|
|
1320
|
+
''' Function. Print standard deviations for randvars '''
|
|
1321
|
+
''' ---------------------------------------------------------- '''
|
|
1322
|
+
def compute_stddev(self):
|
|
1323
|
+
# {
|
|
1324
|
+
# CONCEPTUAL ERROR: covariance_matrix is undefined - it is a member of class mixed_logit
|
|
1325
|
+
diags = np.diag(self.covariance_matrix) # Grab the diagonals of the covariance_matrix matrix
|
|
1326
|
+
diags = np.sqrt(diags)
|
|
1327
|
+
diags = np.round(diags, 8)
|
|
1328
|
+
|
|
1329
|
+
# CHECK: self.covariance_matrix = [0:n, 0:n] where n = len(corr_varnames)
|
|
1330
|
+
|
|
1331
|
+
''' QUERY: CAN THESE OPERATIONS BE DONE ONCE ON CLASS INITIALISATION? WHY DO THIS OVER AND OVER?'''
|
|
1332
|
+
self.corr_varnames = [self.varnames[pos] for pos in self.correlationpos]
|
|
1333
|
+
self.rv_names_noncorr = list(set(self.varnames) & set(self.randvars) - set(self.corr_varnames))
|
|
1334
|
+
self.rvtrans_names = list(set(self.varnames) & set(self.randtransvars))
|
|
1335
|
+
self.rv_names_all = self.corr_varnames + self.rv_names_noncorr + self.rvtrans_names
|
|
1336
|
+
|
|
1337
|
+
# ERROR: randvarsdict is undefined!
|
|
1338
|
+
self.distributions_corr = [self.randvarsdict[name] for name in self.corr_varnames]
|
|
1339
|
+
self.distributions_rv = [self.randvarsdict[name] for name in self.rv_names_noncorr]
|
|
1340
|
+
self.distributions_rvtrans = [self.randvarsdict[name] for name in self.rvtrans_names]
|
|
1341
|
+
self.distributions = self.distributions_corr + self.distributions_rv + self.distributions_rvtrans
|
|
1342
|
+
|
|
1343
|
+
stdevs = np.zeros(len(diags)) # Initialise an array of length len(diags) with zero
|
|
1344
|
+
|
|
1345
|
+
# CONCEPTUAL ERROR: betas is undefined - from multinomial_logit.py and mixed_logit.py
|
|
1346
|
+
means = self.betas[self.Kf: self.Kf + self.Kr]
|
|
1347
|
+
for ii, val in enumerate(diags):
|
|
1348
|
+
# {
|
|
1349
|
+
distr = self.distributions[ii]
|
|
1350
|
+
if distr in ('n', 't'):
|
|
1351
|
+
stdev = val
|
|
1352
|
+
elif distr == 'ln':
|
|
1353
|
+
stdev = np.sqrt(np.exp(val ** 2) - 1) * np.exp(means[ii] + 0.5 * val ** 2)
|
|
1354
|
+
elif distr == 'u':
|
|
1355
|
+
stdev = (val ** 2) / 3
|
|
1356
|
+
else:
|
|
1357
|
+
stdev = -1 # ERROR NO DISTRIBUTION CHOSEN
|
|
1358
|
+
stdevs[ii] = np.round(stdev, 8)
|
|
1359
|
+
# }
|
|
1360
|
+
|
|
1361
|
+
self.print_stdev(stdevs, self.rv_names_all)
|
|
1362
|
+
# }
|
|
1363
|
+
# }
|