metacountregressor 0.1.78__py3-none-any.whl → 0.1.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +258 -0
- metacountregressor/helperprocess.py +257 -5
- metacountregressor/main.py +269 -61
- metacountregressor/metaheuristics.py +22 -11
- metacountregressor/setup.py +3 -2
- metacountregressor/solution.py +555 -214
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.83.dist-info}/METADATA +256 -35
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.83.dist-info}/RECORD +11 -10
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.83.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.83.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.78.dist-info → metacountregressor-0.1.83.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
|
@@ -30,24 +30,24 @@ from scipy.special import gammaln
|
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
33
|
-
from sklearn.preprocessing import StandardScaler
|
|
33
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
34
34
|
from texttable import Texttable
|
|
35
|
-
|
|
35
|
+
import time
|
|
36
36
|
try:
|
|
37
37
|
from ._device_cust import device as dev
|
|
38
38
|
from .pareto_file import Pareto, Solution
|
|
39
39
|
from .data_split_helper import DataProcessor
|
|
40
40
|
except ImportError:
|
|
41
|
-
from
|
|
42
|
-
from
|
|
41
|
+
from _device_cust import device as dev
|
|
42
|
+
from pareto_file import Pareto, Solution
|
|
43
43
|
from data_split_helper import DataProcessor
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
from scipy import stats
|
|
46
46
|
np.seterr(divide='ignore', invalid='ignore')
|
|
47
47
|
warnings.simplefilter("ignore")
|
|
48
48
|
|
|
49
49
|
# define the computation boundary limits
|
|
50
|
-
min_comp_val = 1e-
|
|
50
|
+
min_comp_val = 1e-160
|
|
51
51
|
max_comp_val = 1e+200
|
|
52
52
|
log_lik_min = -1e+200
|
|
53
53
|
log_lik_max = 1e+200
|
|
@@ -122,22 +122,24 @@ class ObjectiveFunction(object):
|
|
|
122
122
|
|
|
123
123
|
def __init__(self, x_data, y_data, **kwargs):
|
|
124
124
|
|
|
125
|
-
self.reg_penalty =
|
|
125
|
+
self.reg_penalty = 0
|
|
126
126
|
self.power_up_ll = False
|
|
127
|
+
self.nb_parma = 1
|
|
127
128
|
self.bic = None
|
|
128
129
|
self.other_bic = False
|
|
129
130
|
self.test_flag = 1
|
|
131
|
+
self.no_extra_param =1 #if true, fix dispersion. w
|
|
130
132
|
if self.other_bic:
|
|
131
133
|
print('change this to false latter ')
|
|
132
134
|
|
|
133
135
|
# initialize values
|
|
134
|
-
self.constant_value =
|
|
135
|
-
self.negative_binomial_value =
|
|
136
|
+
self.constant_value = 0
|
|
137
|
+
self.negative_binomial_value = 1
|
|
136
138
|
|
|
137
|
-
self.verbose_safe =
|
|
139
|
+
self.verbose_safe = kwargs.get('verbose', 0)
|
|
138
140
|
self.please_print = kwargs.get('please_print', 0)
|
|
139
141
|
self.group_halton = None
|
|
140
|
-
self.grad_yes = False
|
|
142
|
+
self.grad_yes = kwargs.get('grad_est', False)
|
|
141
143
|
self.hess_yes = False
|
|
142
144
|
self.group_halton_test = None
|
|
143
145
|
self.panels = None
|
|
@@ -150,15 +152,15 @@ class ObjectiveFunction(object):
|
|
|
150
152
|
self.dist_fit = None
|
|
151
153
|
|
|
152
154
|
self.MAE = None
|
|
153
|
-
self.best_obj_1 =
|
|
154
|
-
self._obj_1 = 'bic'
|
|
155
|
-
self._obj_2 = 'MSE'
|
|
155
|
+
self.best_obj_1 = 1000000.0
|
|
156
|
+
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
157
|
+
self._obj_2 = kwargs.get('_obj_2', 'MSE')
|
|
156
158
|
self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
|
|
157
159
|
self.full_model = None
|
|
158
160
|
self.GP_parameter = 0
|
|
159
|
-
self.is_multi =
|
|
161
|
+
self.is_multi = kwargs.get('is_multi', False)
|
|
160
162
|
self.complexity_level = 6
|
|
161
|
-
self._max_iterations_improvement =
|
|
163
|
+
self._max_iterations_improvement = 10000
|
|
162
164
|
self.generated_sln = set()
|
|
163
165
|
self.ave_mae = 0
|
|
164
166
|
# defalt paramaters for hs #TODO unpack into harmony search class
|
|
@@ -166,23 +168,32 @@ class ObjectiveFunction(object):
|
|
|
166
168
|
self._hms = 20
|
|
167
169
|
self._max_time = 60 * 60 * 24
|
|
168
170
|
self._hmcr = .5
|
|
169
|
-
self._par = 0.3
|
|
171
|
+
self._par = 0.3 #dont think this gets useted
|
|
170
172
|
self._mpai = 1
|
|
171
173
|
self._max_imp = 100000
|
|
172
|
-
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
|
|
174
|
+
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
|
173
175
|
self._panels = None
|
|
174
176
|
self.is_multi = True
|
|
175
177
|
self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
|
+
|
|
176
179
|
self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
|
|
177
|
-
self.method_ll = 'BFGS_2'
|
|
180
|
+
self.method_ll = kwargs.get('method', 'BFGS_2')
|
|
181
|
+
|
|
182
|
+
#self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
183
|
self.Keep_Fit = 2
|
|
179
184
|
self.MP = 0
|
|
180
185
|
# Nelder-Mead-BFGS
|
|
181
186
|
|
|
182
|
-
self._max_characteristics = 26
|
|
187
|
+
self._max_characteristics = kwargs.get('_max_vars', 26)
|
|
183
188
|
|
|
184
189
|
self.beta_dict = dict
|
|
190
|
+
if 'model_terms' in kwargs:
|
|
191
|
+
print('change')
|
|
192
|
+
if kwargs.get('model_terms').get('group') is not None:
|
|
193
|
+
kwargs['group'] = kwargs.get('model_terms').get('group')
|
|
185
194
|
|
|
195
|
+
if kwargs.get('model_terms').get('panels') is not None:
|
|
196
|
+
kwargs['panels'] = kwargs.get('model_terms').get('panels')
|
|
186
197
|
acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
|
|
187
198
|
'algorithm', '_random_seed', '_max_time',
|
|
188
199
|
'forcedvariables', '_obj_1', '_obj_2', '_par',
|
|
@@ -200,12 +211,17 @@ class ObjectiveFunction(object):
|
|
|
200
211
|
if 'instance_number' in kwargs:
|
|
201
212
|
self.instance_number = str(kwargs['instance_number'])
|
|
202
213
|
else:
|
|
214
|
+
|
|
215
|
+
print('no name set, setting name as 0')
|
|
203
216
|
self.instance_number = str(0) # set an arbitrary instance number
|
|
204
217
|
|
|
205
218
|
if not os.path.exists(self.instance_number):
|
|
206
|
-
|
|
219
|
+
if kwargs.get('make_directory', True):
|
|
220
|
+
print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
|
|
221
|
+
os.makedirs(self.instance_number)
|
|
207
222
|
|
|
208
223
|
if not hasattr(self, '_obj_1'):
|
|
224
|
+
print('_obj_1 required, define as bic, aic, ll')
|
|
209
225
|
raise Exception
|
|
210
226
|
|
|
211
227
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
|
@@ -213,6 +229,11 @@ class ObjectiveFunction(object):
|
|
|
213
229
|
self._maximize = False # do we maximize or minimize?
|
|
214
230
|
|
|
215
231
|
x_data = sm.add_constant(x_data)
|
|
232
|
+
standardize_the_data = 0
|
|
233
|
+
if standardize_the_data:
|
|
234
|
+
print('we are standardize the data')
|
|
235
|
+
x_data = self.self_standardize_positive(x_data)
|
|
236
|
+
|
|
216
237
|
self._input_data(x_data, y_data)
|
|
217
238
|
|
|
218
239
|
|
|
@@ -229,9 +250,12 @@ class ObjectiveFunction(object):
|
|
|
229
250
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
|
230
251
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
|
231
252
|
if self.test_percentage == 0:
|
|
253
|
+
print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
|
|
254
|
+
print('continuing single objective')
|
|
255
|
+
time.sleep(2)
|
|
232
256
|
self.is_multi = False
|
|
233
257
|
|
|
234
|
-
if 'panels' in kwargs:
|
|
258
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
235
259
|
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
|
236
260
|
|
|
237
261
|
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
|
@@ -274,11 +298,11 @@ class ObjectiveFunction(object):
|
|
|
274
298
|
|
|
275
299
|
#self.n_obs = N
|
|
276
300
|
self._characteristics_names = list(self._x_data.columns)
|
|
277
|
-
self._max_group_all_means =
|
|
301
|
+
self._max_group_all_means = 2
|
|
278
302
|
|
|
279
303
|
exclude_this_test = [4]
|
|
280
304
|
|
|
281
|
-
if 'panels' in kwargs:
|
|
305
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
282
306
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
|
283
307
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
|
284
308
|
self.ids = np.asarray(
|
|
@@ -294,6 +318,8 @@ class ObjectiveFunction(object):
|
|
|
294
318
|
self.group_halton = group.copy()
|
|
295
319
|
self.group_dummies = pd.get_dummies(group)
|
|
296
320
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
|
321
|
+
|
|
322
|
+
Xnew = pd.DataFrame(Xnew, columns=X.columns)
|
|
297
323
|
self.panel_info = panel_info
|
|
298
324
|
self.N, self.P = panel_info.shape
|
|
299
325
|
Xnew.drop(kwargs['panels'], axis=1, inplace=True)
|
|
@@ -301,9 +327,11 @@ class ObjectiveFunction(object):
|
|
|
301
327
|
K = Xnew.shape[1]
|
|
302
328
|
self._characteristics_names = list(Xnew.columns)
|
|
303
329
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
330
|
+
XX = XX.astype('float')
|
|
304
331
|
self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
|
305
332
|
self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
|
306
333
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
334
|
+
YY = YY.astype('float')
|
|
307
335
|
self._x_data = XX.copy()
|
|
308
336
|
self._y_data = YY.copy()
|
|
309
337
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
|
@@ -325,6 +353,7 @@ class ObjectiveFunction(object):
|
|
|
325
353
|
K = X.shape[1]
|
|
326
354
|
self.columns_names = X.columns
|
|
327
355
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
356
|
+
X = X.astype('float')
|
|
328
357
|
self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
|
329
358
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
330
359
|
Y = Y.astype('float')
|
|
@@ -337,6 +366,7 @@ class ObjectiveFunction(object):
|
|
|
337
366
|
|
|
338
367
|
|
|
339
368
|
else:
|
|
369
|
+
print('No Panels. Grouped Random Paramaters Will not be estimated')
|
|
340
370
|
self.G = None
|
|
341
371
|
self._Gnum = 1
|
|
342
372
|
self._max_group_all_means = 0
|
|
@@ -353,7 +383,9 @@ class ObjectiveFunction(object):
|
|
|
353
383
|
K = Xnew.shape[1]
|
|
354
384
|
self._characteristics_names = list(Xnew.columns)
|
|
355
385
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
386
|
+
XX = XX.astype('float')
|
|
356
387
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
388
|
+
YY = YY.astype('float')
|
|
357
389
|
self._x_data = XX.copy()
|
|
358
390
|
self._y_data = YY.copy()
|
|
359
391
|
|
|
@@ -369,7 +401,9 @@ class ObjectiveFunction(object):
|
|
|
369
401
|
K = X.shape[1]
|
|
370
402
|
self.columns_names = X.columns
|
|
371
403
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
404
|
+
X = X.astype('float')
|
|
372
405
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
406
|
+
Y = Y.astype('float')
|
|
373
407
|
self._x_data_test = X.copy()
|
|
374
408
|
self.y_data_test = Y.copy()
|
|
375
409
|
|
|
@@ -384,11 +418,13 @@ class ObjectiveFunction(object):
|
|
|
384
418
|
|
|
385
419
|
|
|
386
420
|
|
|
387
|
-
self.Ndraws = 200
|
|
421
|
+
self.Ndraws = kwargs.get('Ndraws', 200)
|
|
388
422
|
self.draws1 = None
|
|
389
423
|
self.initial_sig = 1 # pass the test of a single model
|
|
390
424
|
self.pvalue_sig_value = .1
|
|
391
425
|
self.observations = self._x_data.shape[0]
|
|
426
|
+
self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
|
|
427
|
+
|
|
392
428
|
self.batch_size = None
|
|
393
429
|
# open the file in the write mode
|
|
394
430
|
self.grab_transforms = 0
|
|
@@ -400,17 +436,19 @@ class ObjectiveFunction(object):
|
|
|
400
436
|
print('Setup Complete...')
|
|
401
437
|
else:
|
|
402
438
|
print('No Panels Supplied')
|
|
439
|
+
print('Setup Complete...')
|
|
403
440
|
self._characteristics_names = list(self._x_data.columns)
|
|
404
441
|
# define the variables
|
|
405
442
|
# self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
|
|
406
443
|
self._transformations = ["no", "sqrt", "log", "arcsinh"]
|
|
407
444
|
self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
|
|
408
|
-
|
|
445
|
+
self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
|
|
409
446
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
410
447
|
|
|
411
|
-
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', '
|
|
448
|
+
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
412
449
|
|
|
413
450
|
if self.G is not None:
|
|
451
|
+
#TODO need to handle this for groups
|
|
414
452
|
self._distribution = ["trad| " + item for item in self._distribution
|
|
415
453
|
] + ["grpd| " + item for item in self._distribution]
|
|
416
454
|
|
|
@@ -422,18 +460,32 @@ class ObjectiveFunction(object):
|
|
|
422
460
|
|
|
423
461
|
self.significant = 0
|
|
424
462
|
# define the states of our explanatory variables
|
|
463
|
+
|
|
464
|
+
|
|
425
465
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
|
426
|
-
kwargs.get('must_include', []))
|
|
466
|
+
kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
|
|
427
470
|
self._discrete_values = self._discrete_values + \
|
|
428
|
-
|
|
471
|
+
self.define_distributions_analyst(extra=kwargs.get('decisions', None))
|
|
429
472
|
|
|
430
473
|
if 'model_types' in kwargs:
|
|
431
474
|
model_types = kwargs['model_types']
|
|
432
475
|
else:
|
|
433
|
-
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
434
476
|
|
|
477
|
+
|
|
478
|
+
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
479
|
+
#model_types = [[0]]
|
|
480
|
+
#TODO change back and fix NB
|
|
481
|
+
model_t_dict = {'Poisson':0,
|
|
482
|
+
"NB":1}
|
|
483
|
+
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
484
|
+
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
485
|
+
# Print the formatted result
|
|
486
|
+
print(f'The type of models possible will consider: {", ".join(model_keys)}')
|
|
435
487
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
|
436
|
-
self._transformations) + model_types
|
|
488
|
+
self._transformations, kwargs.get('decisions',None)) + model_types
|
|
437
489
|
|
|
438
490
|
self._model_type_codes = ['p', 'nb',
|
|
439
491
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
|
@@ -451,6 +503,7 @@ class ObjectiveFunction(object):
|
|
|
451
503
|
#Manually fit from analyst specification
|
|
452
504
|
manual_fit = kwargs.get('Manual_Fit')
|
|
453
505
|
if manual_fit is not None:
|
|
506
|
+
print('fitting manual')
|
|
454
507
|
self.process_manual_fit(manual_fit)
|
|
455
508
|
|
|
456
509
|
self.solution_analyst = None
|
|
@@ -485,6 +538,7 @@ class ObjectiveFunction(object):
|
|
|
485
538
|
if self.is_multi:
|
|
486
539
|
self._offsets_test = self._x_data_test[:, :, val_od]
|
|
487
540
|
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
|
541
|
+
print(self._offsets)
|
|
488
542
|
else:
|
|
489
543
|
self.initialize_empty_offsets()
|
|
490
544
|
|
|
@@ -757,6 +811,8 @@ class ObjectiveFunction(object):
|
|
|
757
811
|
if dispersion == 0:
|
|
758
812
|
return None, None
|
|
759
813
|
elif dispersion == 2 or dispersion == 1:
|
|
814
|
+
if self.no_extra_param:
|
|
815
|
+
return self.nb_parma, None
|
|
760
816
|
return betas[-1], None
|
|
761
817
|
|
|
762
818
|
elif dispersion == 3:
|
|
@@ -784,14 +840,65 @@ class ObjectiveFunction(object):
|
|
|
784
840
|
par = np.nan_to_num(par)
|
|
785
841
|
return par
|
|
786
842
|
|
|
787
|
-
def
|
|
843
|
+
def rename_distro(self, distro):
|
|
844
|
+
# Mapping dictionary
|
|
845
|
+
mapping = {
|
|
846
|
+
'normal': ['normal', 'n', 'Normal'],
|
|
847
|
+
'triangular': ['triangular', 't', 'Triangular'],
|
|
848
|
+
'uniform': ['uniform', 'u', 'Uniform'],
|
|
849
|
+
'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
|
|
850
|
+
'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
# Use list comprehension with the mapping
|
|
854
|
+
reversed_mapping = {value: key for key, values in mapping.items() for value in values}
|
|
855
|
+
|
|
856
|
+
# Use the reversed mapping to find the corresponding key
|
|
857
|
+
new_distro = [reversed_mapping.get(i, i) for i in distro]
|
|
858
|
+
return new_distro
|
|
859
|
+
|
|
860
|
+
def define_distributions_analyst(self, extra = None):
|
|
861
|
+
|
|
862
|
+
if extra is not None:
|
|
863
|
+
set_alpha = []
|
|
864
|
+
for col in self._characteristics_names:
|
|
865
|
+
if col in extra[('Column')].values:
|
|
866
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
867
|
+
distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
|
|
868
|
+
distro = self.rename_distro(distro)
|
|
869
|
+
set_alpha = set_alpha+[distro]
|
|
870
|
+
elif col == 'const':
|
|
871
|
+
set_alpha = set_alpha +[['normal']]
|
|
872
|
+
return set_alpha
|
|
873
|
+
return [[x for x in self._distribution]] * self._characteristics
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
|
|
788
879
|
'complexity level'
|
|
789
880
|
'''
|
|
790
881
|
2 is feature selection,
|
|
791
|
-
3 is random
|
|
792
|
-
4 is correlated random
|
|
882
|
+
3 is random parameters
|
|
883
|
+
4 is correlated random parameters
|
|
884
|
+
|
|
885
|
+
extra is the stuff defined by the Meta APP
|
|
793
886
|
'''
|
|
794
887
|
set_alpha = []
|
|
888
|
+
if extra is not None:
|
|
889
|
+
for col in self._characteristics_names:
|
|
890
|
+
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
891
|
+
set_alpha = set_alpha + [[1]]
|
|
892
|
+
elif col == 'Offset':
|
|
893
|
+
set_alpha = set_alpha + [[1]]
|
|
894
|
+
|
|
895
|
+
elif col in extra[('Column')].values:
|
|
896
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
897
|
+
check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
|
|
898
|
+
set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
|
|
899
|
+
return set_alpha
|
|
900
|
+
|
|
901
|
+
|
|
795
902
|
for col in self._characteristics_names:
|
|
796
903
|
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
797
904
|
set_alpha = set_alpha + [[1]]
|
|
@@ -841,8 +948,14 @@ class ObjectiveFunction(object):
|
|
|
841
948
|
|
|
842
949
|
return ([self._model_type_codes[dispersion]])
|
|
843
950
|
|
|
844
|
-
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
|
|
845
|
-
|
|
951
|
+
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
|
952
|
+
'''
|
|
953
|
+
setup for naming of the model summary
|
|
954
|
+
'''
|
|
955
|
+
if self.no_extra_param and dispersion ==1:
|
|
956
|
+
|
|
957
|
+
betas = np.append(betas, self.nb_parma)
|
|
958
|
+
|
|
846
959
|
self.name_deleter = []
|
|
847
960
|
group_rpm = None
|
|
848
961
|
group_dist = []
|
|
@@ -962,13 +1075,15 @@ class ObjectiveFunction(object):
|
|
|
962
1075
|
[''] * (len(names) - len(self.transform_id_names))
|
|
963
1076
|
self.coeff_names = names
|
|
964
1077
|
|
|
1078
|
+
'''
|
|
965
1079
|
if betas is not None:
|
|
966
1080
|
try:
|
|
967
1081
|
if len(betas) != len(names):
|
|
968
|
-
print('
|
|
969
|
-
|
|
1082
|
+
print('standard_model', no_draws)
|
|
1083
|
+
|
|
970
1084
|
except Exception as e:
|
|
971
1085
|
print(e)
|
|
1086
|
+
'''
|
|
972
1087
|
|
|
973
1088
|
|
|
974
1089
|
|
|
@@ -993,7 +1108,8 @@ class ObjectiveFunction(object):
|
|
|
993
1108
|
if not isinstance(self.pvalues, np.ndarray):
|
|
994
1109
|
raise Exception
|
|
995
1110
|
|
|
996
|
-
|
|
1111
|
+
if 'nb' in self.coeff_names and self.no_extra_param:
|
|
1112
|
+
self.pvalues = np.append(self.pvalues,0)
|
|
997
1113
|
|
|
998
1114
|
if self.please_print or save_state:
|
|
999
1115
|
|
|
@@ -1009,17 +1125,22 @@ class ObjectiveFunction(object):
|
|
|
1009
1125
|
|
|
1010
1126
|
if solution is not None:
|
|
1011
1127
|
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1012
|
-
|
|
1128
|
+
|
|
1013
1129
|
self.pvalues = [self.round_with_padding(
|
|
1014
1130
|
x, 2) for x in self.pvalues]
|
|
1015
1131
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
|
1016
1132
|
if model == 1:
|
|
1017
1133
|
|
|
1018
|
-
self.coeff_[-1] = np.
|
|
1019
|
-
if self.
|
|
1134
|
+
#self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1135
|
+
if self.no_extra_param:
|
|
1136
|
+
self.coeff_ = np.append(self.coeff_, self.nb_parma)
|
|
1137
|
+
self.stderr = np.append(self.stderr, 0.00001)
|
|
1138
|
+
self.zvalues = np.append(self.zvalues, 50)
|
|
1139
|
+
|
|
1140
|
+
elif self.coeff_[-1] < 0.25:
|
|
1020
1141
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1021
1142
|
print(np.exp(self.coeff_[-1]))
|
|
1022
|
-
self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1143
|
+
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1023
1144
|
|
|
1024
1145
|
self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
|
|
1025
1146
|
|
|
@@ -1231,7 +1352,7 @@ class ObjectiveFunction(object):
|
|
|
1231
1352
|
with open(filename, 'w') as file:
|
|
1232
1353
|
file.write(content)
|
|
1233
1354
|
|
|
1234
|
-
def define_poissible_transforms(self, transforms) -> list:
|
|
1355
|
+
def define_poissible_transforms(self, transforms, extra= None) -> list:
|
|
1235
1356
|
transform_set = []
|
|
1236
1357
|
if not isinstance(self._x_data, pd.DataFrame):
|
|
1237
1358
|
x_data = self._x_data.reshape(self.N * self.P, -1).copy()
|
|
@@ -1242,6 +1363,7 @@ class ObjectiveFunction(object):
|
|
|
1242
1363
|
|
|
1243
1364
|
if 'AADT' in self._characteristics_names[col]:
|
|
1244
1365
|
new_transform = [['log']]
|
|
1366
|
+
#new_transform = [['no']]
|
|
1245
1367
|
transform_set = transform_set + new_transform
|
|
1246
1368
|
|
|
1247
1369
|
elif all(x_data[col] <= 5):
|
|
@@ -1281,6 +1403,18 @@ class ObjectiveFunction(object):
|
|
|
1281
1403
|
|
|
1282
1404
|
return transform_set
|
|
1283
1405
|
|
|
1406
|
+
def poisson_mean_get_dispersion(self, betas, X, y):
|
|
1407
|
+
eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
|
|
1408
|
+
return_EV=True,
|
|
1409
|
+
zi_list=None, draws_grouped=None, Xgroup=None)
|
|
1410
|
+
|
|
1411
|
+
ab = ((y - eVy)**2 - eVy)/eVy
|
|
1412
|
+
bb = eVy -1
|
|
1413
|
+
disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
|
|
1414
|
+
gamma = disp.params[0]
|
|
1415
|
+
#print(f'dispersion is {gamma}')
|
|
1416
|
+
return gamma
|
|
1417
|
+
|
|
1284
1418
|
def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
|
|
1285
1419
|
model_nature=None, halton=1, testing=1, validation=0):
|
|
1286
1420
|
'validation if mu needs to be calculated'
|
|
@@ -1314,7 +1448,7 @@ class ObjectiveFunction(object):
|
|
|
1314
1448
|
XG = model_nature.get('XGtest')[:total_percent, :, :]
|
|
1315
1449
|
else:
|
|
1316
1450
|
XG = model_nature.get('XGtest')[total_percent:, :, :]
|
|
1317
|
-
|
|
1451
|
+
|
|
1318
1452
|
else:
|
|
1319
1453
|
if 'XG' in model_nature:
|
|
1320
1454
|
XG = model_nature.get('XG')
|
|
@@ -1436,7 +1570,7 @@ class ObjectiveFunction(object):
|
|
|
1436
1570
|
5: herogeneity_in _means
|
|
1437
1571
|
|
|
1438
1572
|
|
|
1439
|
-
a: how to
|
|
1573
|
+
a: how to transform the original data
|
|
1440
1574
|
b: grab dispersion '''
|
|
1441
1575
|
|
|
1442
1576
|
# todo: better way
|
|
@@ -1784,7 +1918,10 @@ class ObjectiveFunction(object):
|
|
|
1784
1918
|
elif dispersion == 4:
|
|
1785
1919
|
return 2
|
|
1786
1920
|
else:
|
|
1787
|
-
|
|
1921
|
+
if self.no_extra_param:
|
|
1922
|
+
return 0
|
|
1923
|
+
else:
|
|
1924
|
+
return 1
|
|
1788
1925
|
|
|
1789
1926
|
def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
|
|
1790
1927
|
return_violated_terms=0):
|
|
@@ -1799,6 +1936,7 @@ class ObjectiveFunction(object):
|
|
|
1799
1936
|
|
|
1800
1937
|
else:
|
|
1801
1938
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1939
|
+
slice_this_amount = 1 #TODO handle this
|
|
1802
1940
|
if pvalues[-1] > sig_value:
|
|
1803
1941
|
vio_counts += 1
|
|
1804
1942
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -2223,7 +2361,7 @@ class ObjectiveFunction(object):
|
|
|
2223
2361
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
|
2224
2362
|
|
|
2225
2363
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
|
2226
|
-
obj_1 = 10.0 **
|
|
2364
|
+
obj_1 = 10.0 ** 5
|
|
2227
2365
|
obj_best = None
|
|
2228
2366
|
sub_slns = list()
|
|
2229
2367
|
|
|
@@ -2234,7 +2372,7 @@ class ObjectiveFunction(object):
|
|
|
2234
2372
|
try:
|
|
2235
2373
|
self.repair(vector)
|
|
2236
2374
|
except Exception as e:
|
|
2237
|
-
print('
|
|
2375
|
+
print('prolem repairing here')
|
|
2238
2376
|
print(vector)
|
|
2239
2377
|
print(e)
|
|
2240
2378
|
layout = vector.copy()
|
|
@@ -2481,7 +2619,7 @@ class ObjectiveFunction(object):
|
|
|
2481
2619
|
random.seed(seed)
|
|
2482
2620
|
|
|
2483
2621
|
def set_random_seed(self):
|
|
2484
|
-
print('
|
|
2622
|
+
print('Imbedding Seed', self._random_seed)
|
|
2485
2623
|
np.random.seed(self._random_seed)
|
|
2486
2624
|
|
|
2487
2625
|
random.seed(self._random_seed)
|
|
@@ -2515,7 +2653,7 @@ class ObjectiveFunction(object):
|
|
|
2515
2653
|
self._hmcr = (
|
|
2516
2654
|
self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
|
|
2517
2655
|
|
|
2518
|
-
|
|
2656
|
+
|
|
2519
2657
|
|
|
2520
2658
|
def update_par(self, iteration, is_sin=False):
|
|
2521
2659
|
"""
|
|
@@ -2683,7 +2821,7 @@ class ObjectiveFunction(object):
|
|
|
2683
2821
|
grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
|
|
2684
2822
|
return gradient, grad_n
|
|
2685
2823
|
|
|
2686
|
-
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
|
|
2824
|
+
def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
|
|
2687
2825
|
"""
|
|
2688
2826
|
Negative Binomial model score (gradient) vector of the log-likelihood
|
|
2689
2827
|
Parameters
|
|
@@ -2703,9 +2841,43 @@ class ObjectiveFunction(object):
|
|
|
2703
2841
|
|
|
2704
2842
|
"""
|
|
2705
2843
|
|
|
2706
|
-
|
|
2844
|
+
# Calculate common terms
|
|
2845
|
+
'''
|
|
2846
|
+
n = len(y)
|
|
2847
|
+
n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
|
|
2848
|
+
|
|
2849
|
+
# Flatten the data since there's only one panel, simplifying the operations
|
|
2850
|
+
X_flat = X.reshape(n * p, d)
|
|
2851
|
+
y_flat = y.flatten()
|
|
2852
|
+
mu_flat = mu.flatten()
|
|
2707
2853
|
|
|
2708
|
-
|
|
2854
|
+
# Prepare score array
|
|
2855
|
+
score = np.zeros(d + 1) # +1 for alpha
|
|
2856
|
+
|
|
2857
|
+
# Compute the gradient for regression coefficients
|
|
2858
|
+
for j in range(d): # Exclude the last parameter (alpha)
|
|
2859
|
+
score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
|
|
2860
|
+
|
|
2861
|
+
# Compute the gradient for the dispersion parameter
|
|
2862
|
+
if obs_specific:
|
|
2863
|
+
# Adjust the calculation if observation-specific effects are considered
|
|
2864
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
|
2865
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
|
2866
|
+
score[-1] = np.sum(sum_terms)
|
|
2867
|
+
else:
|
|
2868
|
+
# Standard calculation
|
|
2869
|
+
sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
|
|
2870
|
+
y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
|
|
2871
|
+
score[-1] = np.sum(sum_terms)
|
|
2872
|
+
return score
|
|
2873
|
+
'''
|
|
2874
|
+
#return score
|
|
2875
|
+
|
|
2876
|
+
try:
|
|
2877
|
+
if alpha is None:
|
|
2878
|
+
alpha = np.exp(params[-1])
|
|
2879
|
+
else:
|
|
2880
|
+
alpha = np.exp(params[-1])
|
|
2709
2881
|
a1 = 1 / alpha * mu ** Q
|
|
2710
2882
|
prob = a1 / (a1 + mu)
|
|
2711
2883
|
exog = X
|
|
@@ -2747,7 +2919,8 @@ class ObjectiveFunction(object):
|
|
|
2747
2919
|
return np.concatenate((dparams, dalpha),
|
|
2748
2920
|
axis=1)
|
|
2749
2921
|
except Exception as e:
|
|
2750
|
-
print(
|
|
2922
|
+
print(e)
|
|
2923
|
+
print('NB score exception problem..')
|
|
2751
2924
|
exc_type, exc_obj, exc_tb = sys.exc_info()
|
|
2752
2925
|
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
|
|
2753
2926
|
print(exc_type, fname, exc_tb.tb_lineno)
|
|
@@ -2840,7 +3013,7 @@ class ObjectiveFunction(object):
|
|
|
2840
3013
|
argument = prob.mean(axis=1)
|
|
2841
3014
|
# if less than 0 penalise
|
|
2842
3015
|
if np.min(argument) < 0:
|
|
2843
|
-
print('
|
|
3016
|
+
print('Error with args..')
|
|
2844
3017
|
if np.min(argument) < limit:
|
|
2845
3018
|
# add a penalty for too small argument of log
|
|
2846
3019
|
log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
|
|
@@ -3331,6 +3504,7 @@ class ObjectiveFunction(object):
|
|
|
3331
3504
|
else:
|
|
3332
3505
|
corr_pairs = list(itertools.combinations(corr_indices, 2))
|
|
3333
3506
|
|
|
3507
|
+
|
|
3334
3508
|
for ii, corr_pair in enumerate(corr_pairs):
|
|
3335
3509
|
# lower cholesky matrix
|
|
3336
3510
|
chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
|
|
@@ -3358,7 +3532,7 @@ class ObjectiveFunction(object):
|
|
|
3358
3532
|
a = 0
|
|
3359
3533
|
b = 0
|
|
3360
3534
|
stuff = []
|
|
3361
|
-
#
|
|
3535
|
+
# TODO get order
|
|
3362
3536
|
for j, i in enumerate(list_sizes):
|
|
3363
3537
|
br_mean = betas_hetro[a:i + a]
|
|
3364
3538
|
a += i
|
|
@@ -3385,7 +3559,30 @@ class ObjectiveFunction(object):
|
|
|
3385
3559
|
br_mean = betas_m
|
|
3386
3560
|
br_sd = betas_sd # Last Kr positions
|
|
3387
3561
|
# Compute: betas = mean + sd*draws
|
|
3388
|
-
|
|
3562
|
+
if len(br_sd) != draws.shape[1]:
|
|
3563
|
+
#get the same size as the mean
|
|
3564
|
+
betas_random = self.Br.copy()
|
|
3565
|
+
|
|
3566
|
+
'''
|
|
3567
|
+
c = self.get_num_params()[3:5]
|
|
3568
|
+
|
|
3569
|
+
cor = []
|
|
3570
|
+
for i in range(c[0]):
|
|
3571
|
+
cor.append(i)
|
|
3572
|
+
|
|
3573
|
+
vall =[]
|
|
3574
|
+
for i, val in enumerate(reversed(br_sd)):
|
|
3575
|
+
vall.append()
|
|
3576
|
+
|
|
3577
|
+
remaining = draws.shape[1] - len(betas_sd)
|
|
3578
|
+
'''
|
|
3579
|
+
|
|
3580
|
+
else:
|
|
3581
|
+
|
|
3582
|
+
|
|
3583
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
|
3584
|
+
|
|
3585
|
+
|
|
3389
3586
|
betas_random = self._apply_distribution(betas_random)
|
|
3390
3587
|
|
|
3391
3588
|
return betas_random
|
|
@@ -3404,28 +3601,71 @@ class ObjectiveFunction(object):
|
|
|
3404
3601
|
# if gamma <= 0.01: #min defined value for stable nb
|
|
3405
3602
|
# gamma = 0.01
|
|
3406
3603
|
|
|
3604
|
+
#g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
|
|
3605
|
+
|
|
3606
|
+
#gg = stats.poisson.rvs(g)
|
|
3607
|
+
|
|
3608
|
+
|
|
3609
|
+
|
|
3610
|
+
|
|
3407
3611
|
endog = y
|
|
3408
3612
|
mu = lam
|
|
3613
|
+
''''
|
|
3614
|
+
mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
|
|
3615
|
+
alpha = np.exp(gamma)
|
|
3616
|
+
|
|
3617
|
+
'''
|
|
3409
3618
|
alpha = gamma
|
|
3410
3619
|
size = 1.0 / alpha * mu ** Q
|
|
3411
|
-
|
|
3412
|
-
|
|
3413
|
-
|
|
3414
|
-
|
|
3620
|
+
|
|
3621
|
+
prob = size/(size+mu)
|
|
3622
|
+
|
|
3623
|
+
|
|
3624
|
+
|
|
3625
|
+
'''test'''
|
|
3626
|
+
|
|
3627
|
+
|
|
3628
|
+
'''
|
|
3629
|
+
size = 1 / np.exp(gamma) * mu ** 0
|
|
3630
|
+
prob = size / (size + mu)
|
|
3631
|
+
coeff = (gammaln(size + y) - gammaln(y + 1) -
|
|
3632
|
+
gammaln(size))
|
|
3633
|
+
llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
|
|
3634
|
+
'''
|
|
3635
|
+
|
|
3415
3636
|
try:
|
|
3416
3637
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
|
3417
|
-
#
|
|
3638
|
+
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
|
3639
|
+
#import time
|
|
3640
|
+
#start_time = time.time()
|
|
3418
3641
|
|
|
3642
|
+
|
|
3643
|
+
# Measure time for negbinom_pmf
|
|
3644
|
+
#start_time = time.time()
|
|
3645
|
+
#for _ in range(10000):
|
|
3646
|
+
|
|
3647
|
+
|
|
3648
|
+
#end_time = time.time()
|
|
3649
|
+
#print("Custom functieon time:", end_time - start_time)
|
|
3650
|
+
#start_time = time.time()
|
|
3651
|
+
#for _ in range(10000):
|
|
3652
|
+
'''
|
|
3419
3653
|
gg = np.exp(
|
|
3420
3654
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
|
3421
3655
|
y + alpha) * np.log(mu + alpha))
|
|
3422
|
-
|
|
3423
|
-
|
|
3424
|
-
|
|
3656
|
+
gg[np.isnan(gg)] = 1
|
|
3657
|
+
'''
|
|
3658
|
+
gg_alt = nbinom.pmf(y ,1/alpha, prob)
|
|
3659
|
+
#gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
|
|
3660
|
+
#gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
|
|
3661
|
+
#print('check theses')
|
|
3662
|
+
#gg = nbinom.pmf(y ,alpha, prob)
|
|
3663
|
+
#end_time = time.time()
|
|
3664
|
+
#print("Custom functieon time:", end_time - start_time)
|
|
3425
3665
|
|
|
3426
3666
|
except Exception as e:
|
|
3427
|
-
print(
|
|
3428
|
-
return
|
|
3667
|
+
print("Neg Binom error.")
|
|
3668
|
+
return gg_alt
|
|
3429
3669
|
|
|
3430
3670
|
def lindley_pmf(self, x, r, theta, k=50):
|
|
3431
3671
|
"""
|
|
@@ -3492,7 +3732,7 @@ class ObjectiveFunction(object):
|
|
|
3492
3732
|
|
|
3493
3733
|
endog = y
|
|
3494
3734
|
mu = lam
|
|
3495
|
-
alpha = gamma
|
|
3735
|
+
alpha = np.exp(gamma)
|
|
3496
3736
|
alpha = alpha * mu ** Q
|
|
3497
3737
|
size = 1 / alpha * mu ** Q # also r
|
|
3498
3738
|
# self.rate_param = size
|
|
@@ -3572,21 +3812,8 @@ class ObjectiveFunction(object):
|
|
|
3572
3812
|
|
|
3573
3813
|
if dispersion == 1 or dispersion == 4: # nb
|
|
3574
3814
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
|
3575
|
-
#
|
|
3576
|
-
#
|
|
3577
|
-
|
|
3578
|
-
# if abs(b_gam) < 0.01:
|
|
3579
|
-
# penalty += 1/np.abs(b_gam)
|
|
3580
|
-
|
|
3581
|
-
if b_gam >= 4.5:
|
|
3582
|
-
penalty += b_gam
|
|
3583
|
-
b_gam = 4.61
|
|
3584
|
-
# b_gam = 7.9
|
|
3585
|
-
# penalty += model_nature['dispersion_penalty'] -b_gam
|
|
3586
|
-
# penalty += 1/np.max((0.01,abs(b_gam)))
|
|
3587
|
-
# b_gam = model_nature['dispersion_penalty']
|
|
3588
|
-
|
|
3589
|
-
"""
|
|
3815
|
+
#b_gam = 1/np.exp(b_gam)
|
|
3816
|
+
#print(b_gam)
|
|
3590
3817
|
if b_gam <= 0:
|
|
3591
3818
|
#penalty += 100
|
|
3592
3819
|
#penalty += abs(b_gam)
|
|
@@ -3594,21 +3821,21 @@ class ObjectiveFunction(object):
|
|
|
3594
3821
|
#b_gam = 1
|
|
3595
3822
|
|
|
3596
3823
|
# if b_gam < 0.03:
|
|
3597
|
-
penalty +=
|
|
3824
|
+
penalty += min(1, np.abs(b_gam), 0)
|
|
3598
3825
|
|
|
3599
|
-
b_gam = 0.
|
|
3826
|
+
#b_gam = 0.001
|
|
3600
3827
|
#
|
|
3601
3828
|
|
|
3602
|
-
if b_gam >= 10:
|
|
3603
|
-
|
|
3829
|
+
#if b_gam >= 10:
|
|
3830
|
+
# penalty+= b_gam
|
|
3604
3831
|
|
|
3605
|
-
|
|
3606
|
-
b_gam = min_comp_val
|
|
3832
|
+
# if b_gam == 0:
|
|
3833
|
+
#b_gam = min_comp_val
|
|
3607
3834
|
#b_gam = 0.03
|
|
3608
3835
|
|
|
3609
|
-
|
|
3836
|
+
# b_gam = abs(b_gam)
|
|
3610
3837
|
|
|
3611
|
-
|
|
3838
|
+
|
|
3612
3839
|
|
|
3613
3840
|
elif dispersion == 2:
|
|
3614
3841
|
if b_gam >= 1:
|
|
@@ -3628,8 +3855,15 @@ class ObjectiveFunction(object):
|
|
|
3628
3855
|
def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
|
|
3629
3856
|
|
|
3630
3857
|
# print('this was 0')
|
|
3631
|
-
|
|
3858
|
+
if dispersion:
|
|
3859
|
+
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3860
|
+
|
|
3861
|
+
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3862
|
+
#print('check if this holds size')
|
|
3863
|
+
else:
|
|
3864
|
+
eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3632
3865
|
eta = np.array(eta)
|
|
3866
|
+
|
|
3633
3867
|
# eta = np.float64(eta)
|
|
3634
3868
|
# eta = np.dot(Xd, params_main)+offset[:,:,0]
|
|
3635
3869
|
# eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
|
|
@@ -3648,7 +3882,7 @@ class ObjectiveFunction(object):
|
|
|
3648
3882
|
|
|
3649
3883
|
else:
|
|
3650
3884
|
# eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
|
|
3651
|
-
|
|
3885
|
+
eta = eta.astype('float')
|
|
3652
3886
|
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
|
3653
3887
|
return eVd
|
|
3654
3888
|
|
|
@@ -3761,9 +3995,10 @@ class ObjectiveFunction(object):
|
|
|
3761
3995
|
elif dispersion == 1:
|
|
3762
3996
|
|
|
3763
3997
|
proba_r = self._nonlog_nbin(y, eVd, b_gam)
|
|
3764
|
-
|
|
3998
|
+
|
|
3999
|
+
|
|
3765
4000
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
|
3766
|
-
|
|
4001
|
+
|
|
3767
4002
|
|
|
3768
4003
|
elif dispersion == 2:
|
|
3769
4004
|
|
|
@@ -3784,7 +4019,7 @@ class ObjectiveFunction(object):
|
|
|
3784
4019
|
# proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
|
|
3785
4020
|
proba_r = np.array(store)
|
|
3786
4021
|
proba_r = np.atleast_2d(proba_r).T
|
|
3787
|
-
|
|
4022
|
+
|
|
3788
4023
|
|
|
3789
4024
|
else:
|
|
3790
4025
|
raise Exception('not implemented other modeling forms')
|
|
@@ -3793,7 +4028,7 @@ class ObjectiveFunction(object):
|
|
|
3793
4028
|
proba_p = self._prob_product_across_panels(
|
|
3794
4029
|
proba_r, self.panel_info)
|
|
3795
4030
|
proba_r = proba_p
|
|
3796
|
-
proba_r = np.clip(proba_r, min_comp_val,
|
|
4031
|
+
proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
|
|
3797
4032
|
loglik = np.log(proba_r)
|
|
3798
4033
|
return loglik
|
|
3799
4034
|
|
|
@@ -3801,6 +4036,8 @@ class ObjectiveFunction(object):
|
|
|
3801
4036
|
if dispersion == 0 or dispersion == 3:
|
|
3802
4037
|
return 0
|
|
3803
4038
|
else:
|
|
4039
|
+
|
|
4040
|
+
|
|
3804
4041
|
return 1
|
|
3805
4042
|
|
|
3806
4043
|
def _prob_product_across_panels(self, pch, panel_info):
|
|
@@ -3856,7 +4093,7 @@ class ObjectiveFunction(object):
|
|
|
3856
4093
|
if y[i] == 0:
|
|
3857
4094
|
gr_e[i] = 0
|
|
3858
4095
|
|
|
3859
|
-
if self.is_dispersion(dispersion):
|
|
4096
|
+
if self.is_dispersion(dispersion) and not self.no_extra_param:
|
|
3860
4097
|
gr_d = np.zeros((N, 1))
|
|
3861
4098
|
if dispersion == 1:
|
|
3862
4099
|
# trying alt
|
|
@@ -3960,12 +4197,13 @@ class ObjectiveFunction(object):
|
|
|
3960
4197
|
br, draws_, brstd, dis_fit_long) # (N,K,R)
|
|
3961
4198
|
dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
|
|
3962
4199
|
einsum_model_form, dtype=np.float64) # (N,K,R)
|
|
3963
|
-
der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
3964
|
-
der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
3965
|
-
|
|
3966
|
-
|
|
4200
|
+
#der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4201
|
+
#der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4202
|
+
|
|
4203
|
+
der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
|
|
4204
|
+
|
|
3967
4205
|
der_t = self._compute_derivatives(
|
|
3968
|
-
br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
|
|
4206
|
+
br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
|
|
3969
4207
|
# er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
|
|
3970
4208
|
der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
|
|
3971
4209
|
der_t * proba_n[:, None, :] # (N,K,R)
|
|
@@ -4026,14 +4264,18 @@ class ObjectiveFunction(object):
|
|
|
4026
4264
|
grad_n = self._concat_gradients(
|
|
4027
4265
|
(gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
|
|
4028
4266
|
else:
|
|
4029
|
-
|
|
4030
|
-
|
|
4031
|
-
|
|
4032
|
-
|
|
4267
|
+
if self.no_extra_param:
|
|
4268
|
+
grad_n = self._concat_gradients(
|
|
4269
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
|
|
4270
|
+
else:
|
|
4271
|
+
grad_n = self._concat_gradients(
|
|
4272
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
|
|
4273
|
+
grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
|
|
4274
|
+
grad_n = np.clip(grad_n, -100, 100)
|
|
4033
4275
|
n = np.shape(grad_n)[0]
|
|
4034
4276
|
# subtract out mean gradient value
|
|
4035
|
-
|
|
4036
|
-
|
|
4277
|
+
grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
|
|
4278
|
+
grad_n = grad_n_sub
|
|
4037
4279
|
grad = grad_n.sum(axis=0)
|
|
4038
4280
|
return grad, grad_n
|
|
4039
4281
|
|
|
@@ -4095,9 +4337,9 @@ class ObjectiveFunction(object):
|
|
|
4095
4337
|
|
|
4096
4338
|
elif dispersion == 1:
|
|
4097
4339
|
|
|
4098
|
-
der =
|
|
4340
|
+
der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
|
|
4099
4341
|
if both:
|
|
4100
|
-
grad_n =
|
|
4342
|
+
grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
|
|
4101
4343
|
return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
|
|
4102
4344
|
neginf=-140)
|
|
4103
4345
|
|
|
@@ -4184,7 +4426,7 @@ class ObjectiveFunction(object):
|
|
|
4184
4426
|
return proba_r.sum(axis=1), np.squeeze(proba_r)
|
|
4185
4427
|
|
|
4186
4428
|
def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
|
|
4187
|
-
penalty_val = 0.
|
|
4429
|
+
penalty_val = 0.1
|
|
4188
4430
|
penalty_val_max = 130
|
|
4189
4431
|
|
|
4190
4432
|
# print('change_later')
|
|
@@ -4200,8 +4442,8 @@ class ObjectiveFunction(object):
|
|
|
4200
4442
|
if abs(i) > penalty_val_max:
|
|
4201
4443
|
penalty += abs(i)
|
|
4202
4444
|
|
|
4203
|
-
#
|
|
4204
|
-
#
|
|
4445
|
+
#if abs(i) < penalty_val:
|
|
4446
|
+
# penalty += 5
|
|
4205
4447
|
|
|
4206
4448
|
# penalty = 0
|
|
4207
4449
|
return penalty
|
|
@@ -4308,8 +4550,7 @@ class ObjectiveFunction(object):
|
|
|
4308
4550
|
index += 1
|
|
4309
4551
|
|
|
4310
4552
|
brstd = br_std
|
|
4311
|
-
|
|
4312
|
-
print(brstd)
|
|
4553
|
+
|
|
4313
4554
|
|
|
4314
4555
|
|
|
4315
4556
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
@@ -4341,7 +4582,7 @@ class ObjectiveFunction(object):
|
|
|
4341
4582
|
penalty = self._penalty_betas(
|
|
4342
4583
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
|
4343
4584
|
self.n_obs = len(y) # feeds into gradient
|
|
4344
|
-
if draws is None and draws_grouped is None and (
|
|
4585
|
+
if draws is None and draws_grouped is None and (model_nature is None or
|
|
4345
4586
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
|
4346
4587
|
#TODO do i shuffle the draws
|
|
4347
4588
|
if type(Xd) == dict:
|
|
@@ -4351,7 +4592,7 @@ class ObjectiveFunction(object):
|
|
|
4351
4592
|
P += Xd[key].shape[1]
|
|
4352
4593
|
Kf += Xd[key].shape[2]
|
|
4353
4594
|
else:
|
|
4354
|
-
self.naming_for_printing(betas, 1, dispersion,
|
|
4595
|
+
self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
|
|
4355
4596
|
N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
|
|
4356
4597
|
betas = np.array(betas)
|
|
4357
4598
|
Bf = betas[0:Kf] # Fixed betas
|
|
@@ -4381,7 +4622,7 @@ class ObjectiveFunction(object):
|
|
|
4381
4622
|
llf_main = self.loglik_obs(
|
|
4382
4623
|
y, eVd, dispersion, main_disper, lindley_disp, betas)
|
|
4383
4624
|
|
|
4384
|
-
|
|
4625
|
+
llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
|
|
4385
4626
|
|
|
4386
4627
|
loglik = llf_main.sum()
|
|
4387
4628
|
|
|
@@ -4394,7 +4635,7 @@ class ObjectiveFunction(object):
|
|
|
4394
4635
|
penalty = self.regularise_l2(betas)
|
|
4395
4636
|
|
|
4396
4637
|
if not np.isreal(loglik):
|
|
4397
|
-
loglik = -
|
|
4638
|
+
loglik = - 10000000.0
|
|
4398
4639
|
|
|
4399
4640
|
output = (-loglik + penalty,)
|
|
4400
4641
|
if return_gradient:
|
|
@@ -4402,14 +4643,19 @@ class ObjectiveFunction(object):
|
|
|
4402
4643
|
if return_gradient_n:
|
|
4403
4644
|
der, grad_n = self.simple_score_grad(
|
|
4404
4645
|
betas, y, eVd, Xd, dispersion, both=True)
|
|
4405
|
-
return (-loglik + penalty, -der, grad_n)
|
|
4646
|
+
#return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
|
|
4647
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
|
|
4648
|
+
return scaled_tuple
|
|
4406
4649
|
else:
|
|
4407
4650
|
der = self.simple_score_grad(
|
|
4408
4651
|
betas, y, eVd, Xd, dispersion, both=False)
|
|
4409
|
-
|
|
4410
|
-
|
|
4652
|
+
scaled_tuple = tuple(
|
|
4653
|
+
x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
|
|
4654
|
+
return scaled_tuple
|
|
4655
|
+
#return (-loglik + penalty, -der.ravel())*self.minimize_scaler
|
|
4411
4656
|
else:
|
|
4412
|
-
|
|
4657
|
+
|
|
4658
|
+
return (-loglik + penalty)*self.minimize_scaler
|
|
4413
4659
|
# Else, we have draws
|
|
4414
4660
|
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
|
4415
4661
|
penalty += self._penalty_betas(
|
|
@@ -4420,7 +4666,7 @@ class ObjectiveFunction(object):
|
|
|
4420
4666
|
# Kf =0
|
|
4421
4667
|
betas = np.array(betas)
|
|
4422
4668
|
betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
|
|
4423
|
-
self.naming_for_printing(betas, 0, dispersion,
|
|
4669
|
+
self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
|
|
4424
4670
|
y = dev.to_gpu(y)
|
|
4425
4671
|
if draws is not None and draws_grouped is not None:
|
|
4426
4672
|
draws = np.concatenate((draws_grouped, draws), axis=1)
|
|
@@ -4483,7 +4729,9 @@ class ObjectiveFunction(object):
|
|
|
4483
4729
|
Kf = 0
|
|
4484
4730
|
else:
|
|
4485
4731
|
if n_coeff != len(betas):
|
|
4486
|
-
raise Exception
|
|
4732
|
+
raise Exception(
|
|
4733
|
+
|
|
4734
|
+
)
|
|
4487
4735
|
Bf = betas[0:Kf] # Fixed betas
|
|
4488
4736
|
|
|
4489
4737
|
|
|
@@ -4509,11 +4757,11 @@ class ObjectiveFunction(object):
|
|
|
4509
4757
|
# brstd), draws_) # Get random coefficients, old method
|
|
4510
4758
|
Br = self._transform_rand_betas(br,
|
|
4511
4759
|
brstd, draws_) # Get random coefficients
|
|
4512
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
|
4760
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
|
4513
4761
|
self.Br = Br.copy()
|
|
4514
4762
|
|
|
4515
4763
|
else:
|
|
4516
|
-
self.naming_for_printing(betas, dispersion=dispersion,
|
|
4764
|
+
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
|
4517
4765
|
chol_mat = self._chol_mat(
|
|
4518
4766
|
len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
|
|
4519
4767
|
self.chol_mat = chol_mat.copy()
|
|
@@ -4585,7 +4833,8 @@ class ObjectiveFunction(object):
|
|
|
4585
4833
|
eVd = self.lam_transform(eVd, dispersion, betas[-1])
|
|
4586
4834
|
|
|
4587
4835
|
if self.is_dispersion(dispersion):
|
|
4588
|
-
|
|
4836
|
+
if not self.no_extra_param:
|
|
4837
|
+
penalty, betas[-1] = self._penalty_dispersion(
|
|
4589
4838
|
dispersion, betas[-1], eVd, y, penalty, model_nature)
|
|
4590
4839
|
|
|
4591
4840
|
'''
|
|
@@ -4629,38 +4878,22 @@ class ObjectiveFunction(object):
|
|
|
4629
4878
|
proba.append(dev.to_cpu(proba_))
|
|
4630
4879
|
|
|
4631
4880
|
lik = np.stack(proba).sum(axis=0) / R # (N, )
|
|
4632
|
-
lik = np.clip(lik, min_comp_val,
|
|
4881
|
+
lik = np.clip(lik, min_comp_val, max_comp_val)
|
|
4633
4882
|
# lik = np.nan_to_num(lik, )
|
|
4634
4883
|
loglik = np.log(lik)
|
|
4635
4884
|
llf_main = loglik
|
|
4636
|
-
if 'exog_infl' in model_nature:
|
|
4637
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
|
4638
|
-
params_main = Bf
|
|
4639
|
-
exog_infl = model_nature.get('exog_inflX')
|
|
4640
|
-
llf_main = llf_main.ravel() # TODO test this
|
|
4641
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
|
4642
|
-
|
|
4643
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
|
4644
|
-
|
|
4645
|
-
zero_idx = np.nonzero(y == 0)[0]
|
|
4646
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
|
4647
|
-
|
|
4648
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
|
4649
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
|
4650
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
|
4651
|
-
loglik = llf.sum()
|
|
4652
|
-
else:
|
|
4653
4885
|
|
|
4654
|
-
|
|
4886
|
+
|
|
4887
|
+
loglik = loglik.sum()
|
|
4655
4888
|
|
|
4656
4889
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
|
4657
4890
|
if self.power_up_ll:
|
|
4658
4891
|
penalty += self.regularise_l2(betas)
|
|
4659
|
-
|
|
4892
|
+
|
|
4660
4893
|
penalty += self.regularise_l2(betas)
|
|
4661
4894
|
if not return_gradient:
|
|
4662
4895
|
|
|
4663
|
-
output = (-loglik + penalty,)
|
|
4896
|
+
output = ((-loglik + penalty)*self.minimize_scaler,)
|
|
4664
4897
|
if verbose > 1:
|
|
4665
4898
|
print(
|
|
4666
4899
|
f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
|
|
@@ -4690,19 +4923,24 @@ class ObjectiveFunction(object):
|
|
|
4690
4923
|
# Hinv = np.linalg.inv(H)
|
|
4691
4924
|
# except Exception:
|
|
4692
4925
|
# Hinv = np.linalg.pinv(H)
|
|
4693
|
-
|
|
4926
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
|
|
4927
|
+
return scaled_tuple
|
|
4928
|
+
#output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
|
|
4694
4929
|
|
|
4695
|
-
return output
|
|
4930
|
+
#return output
|
|
4696
4931
|
else:
|
|
4932
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
|
|
4933
|
+
return scaled_tuple
|
|
4934
|
+
#output = (-loglik + penalty, -grad)*self.minimize_scaler
|
|
4697
4935
|
|
|
4698
|
-
output
|
|
4699
|
-
|
|
4700
|
-
return output
|
|
4936
|
+
#return output
|
|
4701
4937
|
except Exception as e:
|
|
4702
4938
|
traceback.print_exc()
|
|
4703
4939
|
print(e)
|
|
4704
4940
|
|
|
4705
|
-
|
|
4941
|
+
def minimize_function(self, loglike):
|
|
4942
|
+
r'Takes the logliklihood function and tranforms it to a more handed minimization function'
|
|
4943
|
+
return loglike/self.n_obs
|
|
4706
4944
|
def print_chol_mat(self, betas):
|
|
4707
4945
|
print(self.chol_mat)
|
|
4708
4946
|
self.get_br_and_bstd(betas)
|
|
@@ -4938,12 +5176,16 @@ class ObjectiveFunction(object):
|
|
|
4938
5176
|
return H
|
|
4939
5177
|
|
|
4940
5178
|
def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
|
|
4941
|
-
|
|
5179
|
+
#method = 'BFGS'
|
|
4942
5180
|
if method == "BFGS":
|
|
4943
5181
|
|
|
4944
5182
|
try:
|
|
5183
|
+
argbs = list(args)
|
|
4945
5184
|
|
|
4946
|
-
|
|
5185
|
+
argbs[7] = True
|
|
5186
|
+
argsb = tuple(argbs)
|
|
5187
|
+
a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
|
|
5188
|
+
return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
|
|
4947
5189
|
|
|
4948
5190
|
except:
|
|
4949
5191
|
return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
|
|
@@ -4966,7 +5208,7 @@ class ObjectiveFunction(object):
|
|
|
4966
5208
|
H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
|
|
4967
5209
|
result['Hessian'] = H
|
|
4968
5210
|
result['hess_inv'] = np.linalg.pinv(H)
|
|
4969
|
-
|
|
5211
|
+
|
|
4970
5212
|
standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
|
|
4971
5213
|
return result
|
|
4972
5214
|
# return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
|
|
@@ -5190,7 +5432,7 @@ class ObjectiveFunction(object):
|
|
|
5190
5432
|
if self.power_up_ll:
|
|
5191
5433
|
loglikelihood =-optim_res['fun']/2 - penalty
|
|
5192
5434
|
else:
|
|
5193
|
-
loglikelihood = -optim_res['fun'] - penalty
|
|
5435
|
+
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
|
5194
5436
|
|
|
5195
5437
|
# self.coeff_names = coeff_names
|
|
5196
5438
|
# self.total_iter = optim_res['nit']
|
|
@@ -5237,7 +5479,7 @@ class ObjectiveFunction(object):
|
|
|
5237
5479
|
return a
|
|
5238
5480
|
|
|
5239
5481
|
def fitRegression(self, mod,
|
|
5240
|
-
dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
|
|
5482
|
+
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5241
5483
|
|
|
5242
5484
|
"""
|
|
5243
5485
|
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
@@ -5249,12 +5491,12 @@ class ObjectiveFunction(object):
|
|
|
5249
5491
|
betas_est - array. Coefficients which maximize the negative log-liklihood.
|
|
5250
5492
|
"""
|
|
5251
5493
|
# Set defualt method
|
|
5252
|
-
|
|
5253
|
-
|
|
5254
|
-
|
|
5494
|
+
#TODO, the inital fit worked but it throws
|
|
5495
|
+
|
|
5496
|
+
|
|
5255
5497
|
|
|
5256
5498
|
sol = Solution()
|
|
5257
|
-
|
|
5499
|
+
|
|
5258
5500
|
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
5259
5501
|
is_delete = 0
|
|
5260
5502
|
dispersion = mod.get('dispersion')
|
|
@@ -5266,10 +5508,7 @@ class ObjectiveFunction(object):
|
|
|
5266
5508
|
if self.hess_yes == False:
|
|
5267
5509
|
method2 = 'BFGS_2'
|
|
5268
5510
|
method2 = self.method_ll
|
|
5269
|
-
# method2 = 'BFGS_2'
|
|
5270
5511
|
|
|
5271
|
-
# method2 = 'BFGS_2'
|
|
5272
|
-
# method2 = 'dogleg'
|
|
5273
5512
|
bic = None
|
|
5274
5513
|
pvalue_alt = None
|
|
5275
5514
|
zvalues = None
|
|
@@ -5286,8 +5525,10 @@ class ObjectiveFunction(object):
|
|
|
5286
5525
|
_g, pg, kg = 0, 0, 0
|
|
5287
5526
|
|
|
5288
5527
|
dispersion_param_num = self.is_dispersion(dispersion)
|
|
5528
|
+
if self.no_extra_param:
|
|
5529
|
+
dispersion_param_num =0
|
|
5289
5530
|
|
|
5290
|
-
paramNum = self.get_param_num(dispersion)
|
|
5531
|
+
#paramNum = self.get_param_num(dispersion)
|
|
5291
5532
|
self.no_random_paramaters = 0
|
|
5292
5533
|
if 'XG' in mod:
|
|
5293
5534
|
XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
|
|
@@ -5313,7 +5554,7 @@ class ObjectiveFunction(object):
|
|
|
5313
5554
|
XX_test = mod.get('Xr_test')
|
|
5314
5555
|
|
|
5315
5556
|
bb = np.random.uniform(
|
|
5316
|
-
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num
|
|
5557
|
+
-0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
|
|
5317
5558
|
|
|
5318
5559
|
if method == 'L-BFGS-B':
|
|
5319
5560
|
if dispersion == 0:
|
|
@@ -5340,17 +5581,28 @@ class ObjectiveFunction(object):
|
|
|
5340
5581
|
else:
|
|
5341
5582
|
bb[0] = self.constant_value
|
|
5342
5583
|
if dispersion == 1:
|
|
5343
|
-
|
|
5584
|
+
if not self.no_extra_param:
|
|
5585
|
+
bb[-1] = self.negative_binomial_value
|
|
5344
5586
|
bounds = None
|
|
5345
5587
|
|
|
5588
|
+
|
|
5589
|
+
|
|
5346
5590
|
# intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
|
|
5347
5591
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
|
5348
|
-
|
|
5592
|
+
|
|
5593
|
+
if self.no_extra_param:
|
|
5594
|
+
dispersion_poisson = 0
|
|
5595
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5349
5596
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
|
5350
|
-
|
|
5597
|
+
dispersion_poisson, 0, False, 0, None, None, None, None, None,
|
|
5351
5598
|
mod),
|
|
5352
5599
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
|
5353
5600
|
bounds=bounds)
|
|
5601
|
+
if dispersion:
|
|
5602
|
+
nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
|
|
5603
|
+
|
|
5604
|
+
|
|
5605
|
+
|
|
5354
5606
|
|
|
5355
5607
|
if method2 == 'L-BFGS-B':
|
|
5356
5608
|
if hasattr(initial_beta.hess_inv, 'todense'):
|
|
@@ -5363,7 +5615,7 @@ class ObjectiveFunction(object):
|
|
|
5363
5615
|
if initial_beta is not None and np.isnan(initial_beta['fun']):
|
|
5364
5616
|
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5365
5617
|
args=(XX, y, None, None, None, None, True, True, dispersion,
|
|
5366
|
-
0, False, 0, None,
|
|
5618
|
+
0, False, 0, None, None, None, None, None, mod),
|
|
5367
5619
|
method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
|
|
5368
5620
|
|
|
5369
5621
|
if initial_beta is not None and not np.isnan(initial_beta['fun']):
|
|
@@ -5387,24 +5639,24 @@ class ObjectiveFunction(object):
|
|
|
5387
5639
|
loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
|
|
5388
5640
|
|
|
5389
5641
|
self.naming_for_printing(
|
|
5390
|
-
initial_beta['x'], 1, dispersion,
|
|
5642
|
+
initial_beta['x'], 1, dispersion, model_nature=mod)
|
|
5391
5643
|
|
|
5392
5644
|
if self.is_multi:
|
|
5393
5645
|
in_sample_mae = self.validation(
|
|
5394
5646
|
initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
|
|
5395
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5647
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
|
5396
5648
|
testing=0)
|
|
5397
5649
|
|
|
5398
5650
|
sol.add_objective(TRAIN=in_sample_mae)
|
|
5399
5651
|
MAE_out = self.validation(
|
|
5400
5652
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
|
5401
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5653
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
|
|
5402
5654
|
sol.add_objective(TEST=MAE_out)
|
|
5403
5655
|
|
|
5404
5656
|
if self.val_percentage >0:
|
|
5405
5657
|
MAE_VAL = self.validation(
|
|
5406
5658
|
initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
|
|
5407
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5659
|
+
rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
|
|
5408
5660
|
validation=1)
|
|
5409
5661
|
sol.add_objective(VAL=MAE_VAL)
|
|
5410
5662
|
if sol[self._obj_1] <= self.best_obj_1:
|
|
@@ -5448,7 +5700,7 @@ class ObjectiveFunction(object):
|
|
|
5448
5700
|
|
|
5449
5701
|
b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
|
|
5450
5702
|
self.none_handler(self.rdm_fit)) + len(
|
|
5451
|
-
self.none_handler(self.rdm_cor_fit)) else b[i] / 1
|
|
5703
|
+
self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
|
|
5452
5704
|
else:
|
|
5453
5705
|
b = bb
|
|
5454
5706
|
|
|
@@ -5458,9 +5710,10 @@ class ObjectiveFunction(object):
|
|
|
5458
5710
|
else:
|
|
5459
5711
|
b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
|
|
5460
5712
|
if dispersion == 1:
|
|
5461
|
-
|
|
5462
|
-
|
|
5463
|
-
b[-1]
|
|
5713
|
+
if not self.no_extra_param:
|
|
5714
|
+
b[-1] = np.abs(b[-1])
|
|
5715
|
+
if b[-1] > 10:
|
|
5716
|
+
b[-1] = 5
|
|
5464
5717
|
elif dispersion == 2:
|
|
5465
5718
|
b[-1] = .5
|
|
5466
5719
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
|
@@ -5509,9 +5762,6 @@ class ObjectiveFunction(object):
|
|
|
5509
5762
|
|
|
5510
5763
|
bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
|
|
5511
5764
|
count += 1
|
|
5512
|
-
|
|
5513
|
-
|
|
5514
|
-
|
|
5515
5765
|
elif ii < jj:
|
|
5516
5766
|
if bob2[count] > 0:
|
|
5517
5767
|
|
|
@@ -5584,18 +5834,35 @@ class ObjectiveFunction(object):
|
|
|
5584
5834
|
mod['dispersion_penalty'] = np.abs(b[-1])
|
|
5585
5835
|
grad_args = (
|
|
5586
5836
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
|
|
5587
|
-
None,
|
|
5837
|
+
None, None, draws_grouped, XG, mod)
|
|
5588
5838
|
# self.gradients_est_yes = (1, 1)
|
|
5589
5839
|
|
|
5590
5840
|
if draws is None and draws_hetro is not None:
|
|
5591
5841
|
print('hold')
|
|
5592
|
-
|
|
5593
|
-
|
|
5594
|
-
self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
|
|
5595
|
-
method=method2, tol=tol['ftol'],
|
|
5596
|
-
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5597
|
-
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5842
|
+
#self.grad_yes = True
|
|
5843
|
+
#self.hess_yes = True
|
|
5598
5844
|
|
|
5845
|
+
if self.no_extra_param:
|
|
5846
|
+
dispersion_poisson = 0
|
|
5847
|
+
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
5848
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
|
|
5849
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5850
|
+
method=method2, tol=tol['ftol'],
|
|
5851
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5852
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5853
|
+
if dispersion:
|
|
5854
|
+
initial_fit_beta = betas_est.x
|
|
5855
|
+
parmas = np.append(initial_fit_beta, nb_parma)
|
|
5856
|
+
self.nb_parma = nb_parma
|
|
5857
|
+
#print(f'neg binomi,{self.nb_parma}')
|
|
5858
|
+
betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
|
|
5859
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5860
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5861
|
+
method=method2, tol=tol['ftol'],
|
|
5862
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5863
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5864
|
+
|
|
5865
|
+
#print('refit with estimation of NB')
|
|
5599
5866
|
# self.numerical_hessian_calc = True
|
|
5600
5867
|
if self.numerical_hessian_calc:
|
|
5601
5868
|
try:
|
|
@@ -5610,7 +5877,7 @@ class ObjectiveFunction(object):
|
|
|
5610
5877
|
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
5611
5878
|
X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
|
|
5612
5879
|
self.rdm_cor_fit,
|
|
5613
|
-
None,
|
|
5880
|
+
None, None, draws_grouped, XG, mod),
|
|
5614
5881
|
method=method2, tol=tol['ftol'],
|
|
5615
5882
|
options={'gtol': tol['gtol']})
|
|
5616
5883
|
|
|
@@ -5646,7 +5913,7 @@ class ObjectiveFunction(object):
|
|
|
5646
5913
|
|
|
5647
5914
|
paramNum = len(betas_est['x'])
|
|
5648
5915
|
self.naming_for_printing(
|
|
5649
|
-
betas_est['x'], 0, dispersion,
|
|
5916
|
+
betas_est['x'], 0, dispersion, model_nature=mod)
|
|
5650
5917
|
|
|
5651
5918
|
sol.add_objective(bic=bic, aic=aic,
|
|
5652
5919
|
loglik=log_ll, num_parm=paramNum, GOF=other_measures)
|
|
@@ -5656,19 +5923,19 @@ class ObjectiveFunction(object):
|
|
|
5656
5923
|
try:
|
|
5657
5924
|
|
|
5658
5925
|
in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
|
|
5659
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5926
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
|
5660
5927
|
model_nature=mod, testing=0)
|
|
5661
5928
|
sol.add_objective(TRAIN=in_sample_mae)
|
|
5662
5929
|
y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
|
|
5663
5930
|
Xr_grouped_test = mod.get('Xrtest')
|
|
5664
5931
|
MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
|
5665
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5932
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
|
5666
5933
|
model_nature=mod)
|
|
5667
5934
|
|
|
5668
5935
|
sol.add_objective(TEST=MAE_test)
|
|
5669
|
-
if self.val_percentage >0:
|
|
5936
|
+
if self.val_percentage > 0:
|
|
5670
5937
|
MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
|
|
5671
|
-
rdm_cor_fit=self.rdm_cor_fit,
|
|
5938
|
+
rdm_cor_fit=self.rdm_cor_fit,
|
|
5672
5939
|
model_nature=mod, validation=1)
|
|
5673
5940
|
sol.add_objective(VAL=MAE_val)
|
|
5674
5941
|
|
|
@@ -5894,7 +6161,7 @@ class ObjectiveFunction(object):
|
|
|
5894
6161
|
return delim + self._model_type_codes[dispersion]
|
|
5895
6162
|
|
|
5896
6163
|
def self_standardize_positive(self, X):
|
|
5897
|
-
scaler =
|
|
6164
|
+
scaler = MinMaxScaler()
|
|
5898
6165
|
if type(X) == list:
|
|
5899
6166
|
return X
|
|
5900
6167
|
|
|
@@ -5904,12 +6171,26 @@ class ObjectiveFunction(object):
|
|
|
5904
6171
|
# Reshaping to 2D - combining the last two dimensions
|
|
5905
6172
|
df_tf_reshaped = X.reshape(original_shape[0], -1)
|
|
5906
6173
|
df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
|
|
5907
|
-
df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6174
|
+
#df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
5908
6175
|
# Reshape back to original 3D shape if necessary
|
|
5909
6176
|
df_tf = df_tf_scaled.reshape(original_shape)
|
|
5910
6177
|
return df_tf
|
|
5911
6178
|
else:
|
|
5912
|
-
|
|
6179
|
+
# Initialize the MinMaxScaler
|
|
6180
|
+
scaler = MinMaxScaler()
|
|
6181
|
+
float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
|
|
6182
|
+
non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
|
|
6183
|
+
|
|
6184
|
+
# Fit the scaler to the float columns and transform them
|
|
6185
|
+
X[float_columns] = scaler.fit_transform(X[float_columns])
|
|
6186
|
+
# Fit the scaler to the data and transform it
|
|
6187
|
+
#scaled_data = scaler.fit_transform(X)
|
|
6188
|
+
|
|
6189
|
+
# Convert the result back to a DataFrame
|
|
6190
|
+
#scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
|
|
6191
|
+
|
|
6192
|
+
|
|
6193
|
+
return X
|
|
5913
6194
|
|
|
5914
6195
|
def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
|
|
5915
6196
|
*args, **kwargs):
|
|
@@ -5964,8 +6245,9 @@ class ObjectiveFunction(object):
|
|
|
5964
6245
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
|
5965
6246
|
t, idx, df_test[:, :, idx])
|
|
5966
6247
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
|
6248
|
+
#TODO need to normalise the data
|
|
5967
6249
|
|
|
5968
|
-
|
|
6250
|
+
print('should not be possible')
|
|
5969
6251
|
|
|
5970
6252
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
5971
6253
|
indices = self.get_named_indices(self.fixed_fit)
|
|
@@ -6022,7 +6304,7 @@ class ObjectiveFunction(object):
|
|
|
6022
6304
|
model_nature['XH'] = XH
|
|
6023
6305
|
X_test = None
|
|
6024
6306
|
if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
|
|
6025
|
-
raise Exception('there is some kind of error')
|
|
6307
|
+
raise Exception('there is some kind of error in X')
|
|
6026
6308
|
|
|
6027
6309
|
# numpy data setup fpr estimation
|
|
6028
6310
|
indices2 = self.get_named_indices(self.rdm_fit)
|
|
@@ -6105,24 +6387,24 @@ class ObjectiveFunction(object):
|
|
|
6105
6387
|
if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
|
|
6106
6388
|
self.bic = obj_1['bic']
|
|
6107
6389
|
self.pvalues = pvalues
|
|
6108
|
-
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c",
|
|
6390
|
+
if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
|
|
6109
6391
|
# todo: probably delete
|
|
6110
6392
|
self.naming_for_printing(
|
|
6111
|
-
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
|
6393
|
+
pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
|
6112
6394
|
obj_1, model_nature)
|
|
6113
6395
|
else:
|
|
6114
6396
|
if is_delete == 0:
|
|
6115
6397
|
# todo: probably delete
|
|
6116
6398
|
self.naming_for_printing(
|
|
6117
6399
|
pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
|
|
6118
|
-
|
|
6400
|
+
obj_1, model_nature)
|
|
6119
6401
|
self.coeff_ = betas
|
|
6120
6402
|
self.stderr = stderr
|
|
6121
6403
|
self.zvalues = zvalues
|
|
6122
6404
|
self.log_lik = log_lik
|
|
6123
6405
|
if self.significant == 0:
|
|
6124
6406
|
|
|
6125
|
-
|
|
6407
|
+
|
|
6126
6408
|
if not self.test_flag:
|
|
6127
6409
|
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
6128
6410
|
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
|
@@ -6173,6 +6455,53 @@ class ObjectiveFunction(object):
|
|
|
6173
6455
|
|
|
6174
6456
|
return obj_1, model_nature
|
|
6175
6457
|
|
|
6458
|
+
def get_X_tril(self):
|
|
6459
|
+
'''For correlations find the repeating terms'''
|
|
6460
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
|
6461
|
+
rv_count_all = 0
|
|
6462
|
+
chol_count = 0
|
|
6463
|
+
rv_count = 0
|
|
6464
|
+
corr_indices = []
|
|
6465
|
+
rv_indices = []
|
|
6466
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
|
6467
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6468
|
+
is_correlated = True
|
|
6469
|
+
else:
|
|
6470
|
+
is_correlated = False
|
|
6471
|
+
|
|
6472
|
+
rv_count_all += 1
|
|
6473
|
+
if is_correlated:
|
|
6474
|
+
chol_count += 1
|
|
6475
|
+
else:
|
|
6476
|
+
rv_count += 1
|
|
6477
|
+
|
|
6478
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6479
|
+
|
|
6480
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
|
6481
|
+
|
|
6482
|
+
else:
|
|
6483
|
+
rv_indices.append(rv_count_all - 1)
|
|
6484
|
+
|
|
6485
|
+
# for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
|
|
6486
|
+
draws_tril_idx = np.array([corr_indices[j]
|
|
6487
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6488
|
+
for j in range(i + 1)]) # varnames pos.
|
|
6489
|
+
X_tril_idx = np.array([corr_indices[i]
|
|
6490
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6491
|
+
for j in range(i + 1)])
|
|
6492
|
+
# Find the s.d. for random variables that are not correlated
|
|
6493
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
|
6494
|
+
range_var = [x for x in
|
|
6495
|
+
range(len(self.none_handler(var_uncor)))]
|
|
6496
|
+
range_var = sorted(range_var)
|
|
6497
|
+
draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
|
|
6498
|
+
X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
|
|
6499
|
+
draws_tril_idx = draws_tril_idx.astype(int)
|
|
6500
|
+
X_tril_idx = X_tril_idx.astype(int)
|
|
6501
|
+
return X_tril_idx
|
|
6502
|
+
|
|
6503
|
+
|
|
6504
|
+
|
|
6176
6505
|
def modifyn(self, data):
|
|
6177
6506
|
select_data = self._characteristics_names
|
|
6178
6507
|
alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
|
|
@@ -6380,23 +6709,35 @@ class ObjectiveFunction(object):
|
|
|
6380
6709
|
# N, D = draws.shape[0], draws.shape[1]
|
|
6381
6710
|
N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
|
|
6382
6711
|
der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
|
|
6383
|
-
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
|
6384
|
-
Br_come_one = self.Br.copy()
|
|
6385
|
-
# Br_come_one =
|
|
6386
|
-
else:
|
|
6387
6712
|
|
|
6388
|
-
Br_come_one = self.Br.copy()
|
|
6389
6713
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
|
6390
6714
|
#todo make sure this works for ln and truncated normal
|
|
6391
6715
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
|
6392
|
-
|
|
6716
|
+
|
|
6717
|
+
#print('check this, intesection shouldn not happen for all')
|
|
6718
|
+
|
|
6719
|
+
if der.shape[1] != draws.shape[1]:
|
|
6720
|
+
print('why')
|
|
6393
6721
|
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
|
6722
|
+
if der.shape[1] != draws.shape[1]:
|
|
6723
|
+
print('why')
|
|
6724
|
+
#TODO need to get the stuction of the rdms
|
|
6394
6725
|
for k, dist_k in enumerate(distribution):
|
|
6395
6726
|
if dist_k == 'ln_normal':
|
|
6727
|
+
if der.shape[1] != draws.shape[1]:
|
|
6728
|
+
print('why')
|
|
6396
6729
|
der[:, k, :] = Br_come_one[:, k, :]
|
|
6730
|
+
if der.shape[1] != draws.shape[1]:
|
|
6731
|
+
print('why')
|
|
6397
6732
|
elif dist_k == 'tn_normal':
|
|
6733
|
+
if der.shape[1] != draws.shape[1]:
|
|
6734
|
+
print('why')
|
|
6398
6735
|
der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
|
|
6736
|
+
if der.shape[1] != draws.shape[1]:
|
|
6737
|
+
print('why')
|
|
6399
6738
|
|
|
6739
|
+
if der.shape[1] != draws.shape[1]:
|
|
6740
|
+
print('why')
|
|
6400
6741
|
return der
|
|
6401
6742
|
|
|
6402
6743
|
def _copy_size_display_as_ones(self, matrix):
|