metacountregressor 0.1.113__py3-none-any.whl → 0.1.116__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +9 -4
- metacountregressor/helperprocess.py +267 -5
- metacountregressor/main.py +172 -61
- metacountregressor/metaheuristics.py +20 -9
- metacountregressor/setup.py +3 -2
- metacountregressor/solution.py +483 -131
- {metacountregressor-0.1.113.dist-info → metacountregressor-0.1.116.dist-info}/METADATA +21 -7
- {metacountregressor-0.1.113.dist-info → metacountregressor-0.1.116.dist-info}/RECORD +11 -11
- {metacountregressor-0.1.113.dist-info → metacountregressor-0.1.116.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.113.dist-info → metacountregressor-0.1.116.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.113.dist-info → metacountregressor-0.1.116.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
|
@@ -30,9 +30,9 @@ from scipy.special import gammaln
|
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
33
|
-
from sklearn.preprocessing import StandardScaler
|
|
33
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
34
34
|
from texttable import Texttable
|
|
35
|
-
|
|
35
|
+
import time
|
|
36
36
|
try:
|
|
37
37
|
from ._device_cust import device as dev
|
|
38
38
|
from .pareto_file import Pareto, Solution
|
|
@@ -42,7 +42,7 @@ except ImportError:
|
|
|
42
42
|
from pareto_file import Pareto, Solution
|
|
43
43
|
from data_split_helper import DataProcessor
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
from scipy import stats
|
|
46
46
|
np.seterr(divide='ignore', invalid='ignore')
|
|
47
47
|
warnings.simplefilter("ignore")
|
|
48
48
|
|
|
@@ -124,10 +124,11 @@ class ObjectiveFunction(object):
|
|
|
124
124
|
|
|
125
125
|
self.reg_penalty = 0
|
|
126
126
|
self.power_up_ll = False
|
|
127
|
-
|
|
127
|
+
self.nb_parma = 1
|
|
128
128
|
self.bic = None
|
|
129
129
|
self.other_bic = False
|
|
130
130
|
self.test_flag = 1
|
|
131
|
+
self.no_extra_param =1 #if true, fix dispersion. w
|
|
131
132
|
if self.other_bic:
|
|
132
133
|
print('change this to false latter ')
|
|
133
134
|
|
|
@@ -135,10 +136,10 @@ class ObjectiveFunction(object):
|
|
|
135
136
|
self.constant_value = 0
|
|
136
137
|
self.negative_binomial_value = 1
|
|
137
138
|
|
|
138
|
-
self.verbose_safe =
|
|
139
|
+
self.verbose_safe = kwargs.get('verbose', 0)
|
|
139
140
|
self.please_print = kwargs.get('please_print', 0)
|
|
140
141
|
self.group_halton = None
|
|
141
|
-
self.grad_yes = False
|
|
142
|
+
self.grad_yes = kwargs.get('grad_est', False)
|
|
142
143
|
self.hess_yes = False
|
|
143
144
|
self.group_halton_test = None
|
|
144
145
|
self.panels = None
|
|
@@ -151,15 +152,15 @@ class ObjectiveFunction(object):
|
|
|
151
152
|
self.dist_fit = None
|
|
152
153
|
|
|
153
154
|
self.MAE = None
|
|
154
|
-
self.best_obj_1 =
|
|
155
|
-
self._obj_1 = 'bic'
|
|
156
|
-
self._obj_2 = 'MSE'
|
|
155
|
+
self.best_obj_1 = 1000000.0
|
|
156
|
+
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
157
|
+
self._obj_2 = kwargs.get('_obj_2', 'MSE')
|
|
157
158
|
self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
|
|
158
159
|
self.full_model = None
|
|
159
160
|
self.GP_parameter = 0
|
|
160
|
-
self.is_multi =
|
|
161
|
+
self.is_multi = kwargs.get('is_multi', False)
|
|
161
162
|
self.complexity_level = 6
|
|
162
|
-
self._max_iterations_improvement =
|
|
163
|
+
self._max_iterations_improvement = 10000
|
|
163
164
|
self.generated_sln = set()
|
|
164
165
|
self.ave_mae = 0
|
|
165
166
|
# defalt paramaters for hs #TODO unpack into harmony search class
|
|
@@ -167,23 +168,32 @@ class ObjectiveFunction(object):
|
|
|
167
168
|
self._hms = 20
|
|
168
169
|
self._max_time = 60 * 60 * 24
|
|
169
170
|
self._hmcr = .5
|
|
170
|
-
self._par = 0.3
|
|
171
|
+
self._par = 0.3 #dont think this gets useted
|
|
171
172
|
self._mpai = 1
|
|
172
173
|
self._max_imp = 100000
|
|
173
174
|
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
|
174
175
|
self._panels = None
|
|
175
176
|
self.is_multi = True
|
|
176
177
|
self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
|
+
|
|
177
179
|
self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
|
|
178
|
-
self.method_ll = 'BFGS_2'
|
|
180
|
+
self.method_ll = kwargs.get('method', 'BFGS_2')
|
|
181
|
+
|
|
182
|
+
#self.method_ll = 'Nelder-Mead-BFGS'
|
|
179
183
|
self.Keep_Fit = 2
|
|
180
184
|
self.MP = 0
|
|
181
185
|
# Nelder-Mead-BFGS
|
|
182
186
|
|
|
183
|
-
self._max_characteristics = 26
|
|
187
|
+
self._max_characteristics = kwargs.get('_max_vars', 26)
|
|
184
188
|
|
|
185
189
|
self.beta_dict = dict
|
|
190
|
+
if 'model_terms' in kwargs:
|
|
191
|
+
print('change')
|
|
192
|
+
if kwargs.get('model_terms').get('group') is not None:
|
|
193
|
+
kwargs['group'] = kwargs.get('model_terms').get('group')
|
|
186
194
|
|
|
195
|
+
if kwargs.get('model_terms').get('panels') is not None:
|
|
196
|
+
kwargs['panels'] = kwargs.get('model_terms').get('panels')
|
|
187
197
|
acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
|
|
188
198
|
'algorithm', '_random_seed', '_max_time',
|
|
189
199
|
'forcedvariables', '_obj_1', '_obj_2', '_par',
|
|
@@ -201,12 +211,17 @@ class ObjectiveFunction(object):
|
|
|
201
211
|
if 'instance_number' in kwargs:
|
|
202
212
|
self.instance_number = str(kwargs['instance_number'])
|
|
203
213
|
else:
|
|
214
|
+
|
|
215
|
+
print('no name set, setting name as 0')
|
|
204
216
|
self.instance_number = str(0) # set an arbitrary instance number
|
|
205
217
|
|
|
206
218
|
if not os.path.exists(self.instance_number):
|
|
207
|
-
|
|
219
|
+
if kwargs.get('make_directory', True):
|
|
220
|
+
print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
|
|
221
|
+
os.makedirs(self.instance_number)
|
|
208
222
|
|
|
209
223
|
if not hasattr(self, '_obj_1'):
|
|
224
|
+
print('_obj_1 required, define as bic, aic, ll')
|
|
210
225
|
raise Exception
|
|
211
226
|
|
|
212
227
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
|
@@ -214,6 +229,11 @@ class ObjectiveFunction(object):
|
|
|
214
229
|
self._maximize = False # do we maximize or minimize?
|
|
215
230
|
|
|
216
231
|
x_data = sm.add_constant(x_data)
|
|
232
|
+
standardize_the_data = 0
|
|
233
|
+
if standardize_the_data:
|
|
234
|
+
print('we are standardize the data')
|
|
235
|
+
x_data = self.self_standardize_positive(x_data)
|
|
236
|
+
|
|
217
237
|
self._input_data(x_data, y_data)
|
|
218
238
|
|
|
219
239
|
|
|
@@ -230,13 +250,17 @@ class ObjectiveFunction(object):
|
|
|
230
250
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
|
231
251
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
|
232
252
|
if self.test_percentage == 0:
|
|
253
|
+
print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
|
|
254
|
+
print('continuing single objective')
|
|
255
|
+
time.sleep(2)
|
|
233
256
|
self.is_multi = False
|
|
234
257
|
|
|
235
|
-
if 'panels' in kwargs and not
|
|
236
|
-
|
|
258
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
259
|
+
if kwargs.get('group') is not None:
|
|
260
|
+
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
|
237
261
|
|
|
238
|
-
|
|
239
|
-
|
|
262
|
+
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
|
263
|
+
'category').cat.codes
|
|
240
264
|
self.complexity_level = 6
|
|
241
265
|
# create test dataset
|
|
242
266
|
|
|
@@ -275,26 +299,31 @@ class ObjectiveFunction(object):
|
|
|
275
299
|
|
|
276
300
|
#self.n_obs = N
|
|
277
301
|
self._characteristics_names = list(self._x_data.columns)
|
|
278
|
-
self._max_group_all_means =
|
|
302
|
+
self._max_group_all_means = 2
|
|
279
303
|
|
|
280
304
|
exclude_this_test = [4]
|
|
281
305
|
|
|
282
|
-
if 'panels' in kwargs and not
|
|
306
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
283
307
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
|
284
308
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
|
285
309
|
self.ids = np.asarray(
|
|
286
310
|
df_train[kwargs['panels']]) if kwargs['panels'] is not None else None
|
|
287
311
|
self.ids_test = np.asarray(
|
|
288
312
|
df_test[kwargs['panels']]) if kwargs['panels'] is not None else None
|
|
289
|
-
|
|
290
|
-
'
|
|
291
|
-
|
|
292
|
-
'
|
|
313
|
+
if kwargs.get('group') is not None:
|
|
314
|
+
groupll = np.asarray(df_train[kwargs['group']].astype(
|
|
315
|
+
'category').cat.codes)
|
|
316
|
+
group_test = np.asarray(df_test[kwargs['group']].astype(
|
|
317
|
+
'category').cat.codes)
|
|
318
|
+
else:
|
|
319
|
+
groupll = None
|
|
293
320
|
X, Y, panel, group = self._arrange_long_format(
|
|
294
321
|
df_train, y_train, self.ids, self.ids, groupll)
|
|
295
322
|
self.group_halton = group.copy()
|
|
296
323
|
self.group_dummies = pd.get_dummies(group)
|
|
297
324
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
|
325
|
+
|
|
326
|
+
Xnew = pd.DataFrame(Xnew, columns=X.columns)
|
|
298
327
|
self.panel_info = panel_info
|
|
299
328
|
self.N, self.P = panel_info.shape
|
|
300
329
|
Xnew.drop(kwargs['panels'], axis=1, inplace=True)
|
|
@@ -302,9 +331,11 @@ class ObjectiveFunction(object):
|
|
|
302
331
|
K = Xnew.shape[1]
|
|
303
332
|
self._characteristics_names = list(Xnew.columns)
|
|
304
333
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
334
|
+
XX = XX.astype('float')
|
|
305
335
|
self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
|
306
336
|
self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
|
307
337
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
338
|
+
YY = YY.astype('float')
|
|
308
339
|
self._x_data = XX.copy()
|
|
309
340
|
self._y_data = YY.copy()
|
|
310
341
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
|
@@ -326,6 +357,7 @@ class ObjectiveFunction(object):
|
|
|
326
357
|
K = X.shape[1]
|
|
327
358
|
self.columns_names = X.columns
|
|
328
359
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
360
|
+
X = X.astype('float')
|
|
329
361
|
self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
|
330
362
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
331
363
|
Y = Y.astype('float')
|
|
@@ -338,6 +370,7 @@ class ObjectiveFunction(object):
|
|
|
338
370
|
|
|
339
371
|
|
|
340
372
|
else:
|
|
373
|
+
print('No Panels. Grouped Random Paramaters Will not be estimated')
|
|
341
374
|
self.G = None
|
|
342
375
|
self._Gnum = 1
|
|
343
376
|
self._max_group_all_means = 0
|
|
@@ -354,7 +387,9 @@ class ObjectiveFunction(object):
|
|
|
354
387
|
K = Xnew.shape[1]
|
|
355
388
|
self._characteristics_names = list(Xnew.columns)
|
|
356
389
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
390
|
+
XX = XX.astype('float')
|
|
357
391
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
392
|
+
YY = YY.astype('float')
|
|
358
393
|
self._x_data = XX.copy()
|
|
359
394
|
self._y_data = YY.copy()
|
|
360
395
|
|
|
@@ -370,7 +405,9 @@ class ObjectiveFunction(object):
|
|
|
370
405
|
K = X.shape[1]
|
|
371
406
|
self.columns_names = X.columns
|
|
372
407
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
408
|
+
X = X.astype('float')
|
|
373
409
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
410
|
+
Y = Y.astype('float')
|
|
374
411
|
self._x_data_test = X.copy()
|
|
375
412
|
self.y_data_test = Y.copy()
|
|
376
413
|
|
|
@@ -385,7 +422,7 @@ class ObjectiveFunction(object):
|
|
|
385
422
|
|
|
386
423
|
|
|
387
424
|
|
|
388
|
-
self.Ndraws = 200
|
|
425
|
+
self.Ndraws = kwargs.get('Ndraws', 200)
|
|
389
426
|
self.draws1 = None
|
|
390
427
|
self.initial_sig = 1 # pass the test of a single model
|
|
391
428
|
self.pvalue_sig_value = .1
|
|
@@ -403,12 +440,13 @@ class ObjectiveFunction(object):
|
|
|
403
440
|
print('Setup Complete...')
|
|
404
441
|
else:
|
|
405
442
|
print('No Panels Supplied')
|
|
443
|
+
print('Setup Complete...')
|
|
406
444
|
self._characteristics_names = list(self._x_data.columns)
|
|
407
445
|
# define the variables
|
|
408
446
|
# self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
|
|
409
447
|
self._transformations = ["no", "sqrt", "log", "arcsinh"]
|
|
410
448
|
self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
|
|
411
|
-
|
|
449
|
+
self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
|
|
412
450
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
413
451
|
|
|
414
452
|
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
@@ -439,8 +477,17 @@ class ObjectiveFunction(object):
|
|
|
439
477
|
if 'model_types' in kwargs:
|
|
440
478
|
model_types = kwargs['model_types']
|
|
441
479
|
else:
|
|
442
|
-
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
443
480
|
|
|
481
|
+
|
|
482
|
+
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
483
|
+
#model_types = [[0]]
|
|
484
|
+
#TODO change back and fix NB
|
|
485
|
+
model_t_dict = {'Poisson':0,
|
|
486
|
+
"NB":1}
|
|
487
|
+
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
488
|
+
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
489
|
+
# Print the formatted result
|
|
490
|
+
print(f'The type of models possible will consider: {", ".join(model_keys)}')
|
|
444
491
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
|
445
492
|
self._transformations, kwargs.get('decisions',None)) + model_types
|
|
446
493
|
|
|
@@ -458,8 +505,9 @@ class ObjectiveFunction(object):
|
|
|
458
505
|
self._max_hurdle = 4
|
|
459
506
|
|
|
460
507
|
#Manually fit from analyst specification
|
|
461
|
-
manual_fit = kwargs.get('Manual_Fit')
|
|
508
|
+
manual_fit = kwargs.get('Manual_Fit', None)
|
|
462
509
|
if manual_fit is not None:
|
|
510
|
+
print('fitting manual')
|
|
463
511
|
self.process_manual_fit(manual_fit)
|
|
464
512
|
|
|
465
513
|
self.solution_analyst = None
|
|
@@ -494,6 +542,7 @@ class ObjectiveFunction(object):
|
|
|
494
542
|
if self.is_multi:
|
|
495
543
|
self._offsets_test = self._x_data_test[:, :, val_od]
|
|
496
544
|
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
|
545
|
+
#print(self._offsets)
|
|
497
546
|
else:
|
|
498
547
|
self.initialize_empty_offsets()
|
|
499
548
|
|
|
@@ -766,6 +815,8 @@ class ObjectiveFunction(object):
|
|
|
766
815
|
if dispersion == 0:
|
|
767
816
|
return None, None
|
|
768
817
|
elif dispersion == 2 or dispersion == 1:
|
|
818
|
+
if self.no_extra_param:
|
|
819
|
+
return self.nb_parma, None
|
|
769
820
|
return betas[-1], None
|
|
770
821
|
|
|
771
822
|
elif dispersion == 3:
|
|
@@ -796,15 +847,18 @@ class ObjectiveFunction(object):
|
|
|
796
847
|
def rename_distro(self, distro):
|
|
797
848
|
# Mapping dictionary
|
|
798
849
|
mapping = {
|
|
799
|
-
'
|
|
800
|
-
'
|
|
801
|
-
'
|
|
802
|
-
'
|
|
803
|
-
'
|
|
850
|
+
'normal': ['normal', 'n', 'Normal'],
|
|
851
|
+
'triangular': ['triangular', 't', 'Triangular'],
|
|
852
|
+
'uniform': ['uniform', 'u', 'Uniform'],
|
|
853
|
+
'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
|
|
854
|
+
'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
|
|
804
855
|
}
|
|
805
856
|
|
|
806
857
|
# Use list comprehension with the mapping
|
|
807
|
-
|
|
858
|
+
reversed_mapping = {value: key for key, values in mapping.items() for value in values}
|
|
859
|
+
|
|
860
|
+
# Use the reversed mapping to find the corresponding key
|
|
861
|
+
new_distro = [reversed_mapping.get(i, i) for i in distro]
|
|
808
862
|
return new_distro
|
|
809
863
|
|
|
810
864
|
def define_distributions_analyst(self, extra = None):
|
|
@@ -817,6 +871,8 @@ class ObjectiveFunction(object):
|
|
|
817
871
|
distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
|
|
818
872
|
distro = self.rename_distro(distro)
|
|
819
873
|
set_alpha = set_alpha+[distro]
|
|
874
|
+
elif col == 'const':
|
|
875
|
+
set_alpha = set_alpha +[['normal']]
|
|
820
876
|
return set_alpha
|
|
821
877
|
return [[x for x in self._distribution]] * self._characteristics
|
|
822
878
|
|
|
@@ -897,10 +953,12 @@ class ObjectiveFunction(object):
|
|
|
897
953
|
return ([self._model_type_codes[dispersion]])
|
|
898
954
|
|
|
899
955
|
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
|
900
|
-
|
|
956
|
+
'''
|
|
901
957
|
setup for naming of the model summary
|
|
902
958
|
'''
|
|
959
|
+
if self.no_extra_param and dispersion ==1:
|
|
903
960
|
|
|
961
|
+
betas = np.append(betas, self.nb_parma)
|
|
904
962
|
|
|
905
963
|
self.name_deleter = []
|
|
906
964
|
group_rpm = None
|
|
@@ -1021,13 +1079,15 @@ class ObjectiveFunction(object):
|
|
|
1021
1079
|
[''] * (len(names) - len(self.transform_id_names))
|
|
1022
1080
|
self.coeff_names = names
|
|
1023
1081
|
|
|
1082
|
+
'''
|
|
1024
1083
|
if betas is not None:
|
|
1025
1084
|
try:
|
|
1026
1085
|
if len(betas) != len(names):
|
|
1027
|
-
print('
|
|
1028
|
-
|
|
1086
|
+
print('standard_model', no_draws)
|
|
1087
|
+
|
|
1029
1088
|
except Exception as e:
|
|
1030
1089
|
print(e)
|
|
1090
|
+
'''
|
|
1031
1091
|
|
|
1032
1092
|
|
|
1033
1093
|
|
|
@@ -1052,7 +1112,8 @@ class ObjectiveFunction(object):
|
|
|
1052
1112
|
if not isinstance(self.pvalues, np.ndarray):
|
|
1053
1113
|
raise Exception
|
|
1054
1114
|
|
|
1055
|
-
|
|
1115
|
+
if 'nb' in self.coeff_names and self.no_extra_param:
|
|
1116
|
+
self.pvalues = np.append(self.pvalues,0)
|
|
1056
1117
|
|
|
1057
1118
|
if self.please_print or save_state:
|
|
1058
1119
|
|
|
@@ -1068,17 +1129,22 @@ class ObjectiveFunction(object):
|
|
|
1068
1129
|
|
|
1069
1130
|
if solution is not None:
|
|
1070
1131
|
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1071
|
-
|
|
1132
|
+
|
|
1072
1133
|
self.pvalues = [self.round_with_padding(
|
|
1073
1134
|
x, 2) for x in self.pvalues]
|
|
1074
1135
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
|
1075
1136
|
if model == 1:
|
|
1076
1137
|
|
|
1077
|
-
self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1078
|
-
if self.
|
|
1138
|
+
#self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1139
|
+
if self.no_extra_param:
|
|
1140
|
+
self.coeff_ = np.append(self.coeff_, self.nb_parma)
|
|
1141
|
+
self.stderr = np.append(self.stderr, 0.00001)
|
|
1142
|
+
self.zvalues = np.append(self.zvalues, 50)
|
|
1143
|
+
|
|
1144
|
+
elif self.coeff_[-1] < 0.25:
|
|
1079
1145
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1080
1146
|
print(np.exp(self.coeff_[-1]))
|
|
1081
|
-
self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1147
|
+
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1082
1148
|
|
|
1083
1149
|
self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
|
|
1084
1150
|
|
|
@@ -1301,6 +1367,7 @@ class ObjectiveFunction(object):
|
|
|
1301
1367
|
|
|
1302
1368
|
if 'AADT' in self._characteristics_names[col]:
|
|
1303
1369
|
new_transform = [['log']]
|
|
1370
|
+
#new_transform = [['no']]
|
|
1304
1371
|
transform_set = transform_set + new_transform
|
|
1305
1372
|
|
|
1306
1373
|
elif all(x_data[col] <= 5):
|
|
@@ -1340,6 +1407,18 @@ class ObjectiveFunction(object):
|
|
|
1340
1407
|
|
|
1341
1408
|
return transform_set
|
|
1342
1409
|
|
|
1410
|
+
def poisson_mean_get_dispersion(self, betas, X, y):
|
|
1411
|
+
eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
|
|
1412
|
+
return_EV=True,
|
|
1413
|
+
zi_list=None, draws_grouped=None, Xgroup=None)
|
|
1414
|
+
|
|
1415
|
+
ab = ((y - eVy)**2 - eVy)/eVy
|
|
1416
|
+
bb = eVy -1
|
|
1417
|
+
disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
|
|
1418
|
+
gamma = disp.params[0]
|
|
1419
|
+
#print(f'dispersion is {gamma}')
|
|
1420
|
+
return gamma
|
|
1421
|
+
|
|
1343
1422
|
def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
|
|
1344
1423
|
model_nature=None, halton=1, testing=1, validation=0):
|
|
1345
1424
|
'validation if mu needs to be calculated'
|
|
@@ -1373,7 +1452,7 @@ class ObjectiveFunction(object):
|
|
|
1373
1452
|
XG = model_nature.get('XGtest')[:total_percent, :, :]
|
|
1374
1453
|
else:
|
|
1375
1454
|
XG = model_nature.get('XGtest')[total_percent:, :, :]
|
|
1376
|
-
|
|
1455
|
+
|
|
1377
1456
|
else:
|
|
1378
1457
|
if 'XG' in model_nature:
|
|
1379
1458
|
XG = model_nature.get('XG')
|
|
@@ -1495,7 +1574,7 @@ class ObjectiveFunction(object):
|
|
|
1495
1574
|
5: herogeneity_in _means
|
|
1496
1575
|
|
|
1497
1576
|
|
|
1498
|
-
a: how to
|
|
1577
|
+
a: how to transform the original data
|
|
1499
1578
|
b: grab dispersion '''
|
|
1500
1579
|
|
|
1501
1580
|
# todo: better way
|
|
@@ -1843,7 +1922,10 @@ class ObjectiveFunction(object):
|
|
|
1843
1922
|
elif dispersion == 4:
|
|
1844
1923
|
return 2
|
|
1845
1924
|
else:
|
|
1846
|
-
|
|
1925
|
+
if self.no_extra_param:
|
|
1926
|
+
return 0
|
|
1927
|
+
else:
|
|
1928
|
+
return 1
|
|
1847
1929
|
|
|
1848
1930
|
def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
|
|
1849
1931
|
return_violated_terms=0):
|
|
@@ -1858,6 +1940,7 @@ class ObjectiveFunction(object):
|
|
|
1858
1940
|
|
|
1859
1941
|
else:
|
|
1860
1942
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1943
|
+
slice_this_amount = 1 #TODO handle this
|
|
1861
1944
|
if pvalues[-1] > sig_value:
|
|
1862
1945
|
vio_counts += 1
|
|
1863
1946
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -2282,7 +2365,7 @@ class ObjectiveFunction(object):
|
|
|
2282
2365
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
|
2283
2366
|
|
|
2284
2367
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
|
2285
|
-
obj_1 = 10.0 **
|
|
2368
|
+
obj_1 = 10.0 ** 4
|
|
2286
2369
|
obj_best = None
|
|
2287
2370
|
sub_slns = list()
|
|
2288
2371
|
|
|
@@ -2290,12 +2373,14 @@ class ObjectiveFunction(object):
|
|
|
2290
2373
|
vector) # just added to grab the fixed fit TODO: Clean up
|
|
2291
2374
|
dispersion = model_nature.get('dispersion')
|
|
2292
2375
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
2376
|
+
print('before', vector)
|
|
2293
2377
|
try:
|
|
2294
2378
|
self.repair(vector)
|
|
2295
2379
|
except Exception as e:
|
|
2296
|
-
print('
|
|
2380
|
+
print('problem repairing here')
|
|
2297
2381
|
print(vector)
|
|
2298
2382
|
print(e)
|
|
2383
|
+
print('after', vector)
|
|
2299
2384
|
layout = vector.copy()
|
|
2300
2385
|
trial_run = 0
|
|
2301
2386
|
max_trial = 0
|
|
@@ -2374,10 +2459,10 @@ class ObjectiveFunction(object):
|
|
|
2374
2459
|
|
|
2375
2460
|
|
|
2376
2461
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
|
2377
|
-
obj_1[self._obj_1] = 10 **
|
|
2462
|
+
obj_1[self._obj_1] = 10 ** 5
|
|
2378
2463
|
else:
|
|
2379
2464
|
if obj_1[self._obj_1] <= 0:
|
|
2380
|
-
obj_1[self._obj_1] = 10 **
|
|
2465
|
+
obj_1[self._obj_1] = 10 ** 5
|
|
2381
2466
|
|
|
2382
2467
|
if multi:
|
|
2383
2468
|
|
|
@@ -2408,10 +2493,10 @@ class ObjectiveFunction(object):
|
|
|
2408
2493
|
|
|
2409
2494
|
self.reset_sln()
|
|
2410
2495
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
|
2411
|
-
obj_1[self._obj_1] = 10 **
|
|
2496
|
+
obj_1[self._obj_1] = 10 ** 5
|
|
2412
2497
|
else:
|
|
2413
2498
|
if obj_1[self._obj_1] == 0:
|
|
2414
|
-
obj_1[self._obj_1] = 10 **
|
|
2499
|
+
obj_1[self._obj_1] = 10 **5
|
|
2415
2500
|
if verbose:
|
|
2416
2501
|
print('The best solution iteratively is of objective value:', obj_1)
|
|
2417
2502
|
|
|
@@ -2574,7 +2659,7 @@ class ObjectiveFunction(object):
|
|
|
2574
2659
|
self._hmcr = (
|
|
2575
2660
|
self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
|
|
2576
2661
|
|
|
2577
|
-
|
|
2662
|
+
|
|
2578
2663
|
|
|
2579
2664
|
def update_par(self, iteration, is_sin=False):
|
|
2580
2665
|
"""
|
|
@@ -2794,10 +2879,6 @@ class ObjectiveFunction(object):
|
|
|
2794
2879
|
'''
|
|
2795
2880
|
#return score
|
|
2796
2881
|
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
2882
|
try:
|
|
2802
2883
|
if alpha is None:
|
|
2803
2884
|
alpha = np.exp(params[-1])
|
|
@@ -2938,7 +3019,7 @@ class ObjectiveFunction(object):
|
|
|
2938
3019
|
argument = prob.mean(axis=1)
|
|
2939
3020
|
# if less than 0 penalise
|
|
2940
3021
|
if np.min(argument) < 0:
|
|
2941
|
-
print('
|
|
3022
|
+
print('Error with args..')
|
|
2942
3023
|
if np.min(argument) < limit:
|
|
2943
3024
|
# add a penalty for too small argument of log
|
|
2944
3025
|
log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
|
|
@@ -2954,6 +3035,39 @@ class ObjectiveFunction(object):
|
|
|
2954
3035
|
# print('log_lik poisson', log_lik)
|
|
2955
3036
|
return -log_lik
|
|
2956
3037
|
|
|
3038
|
+
def extract_parameters(self, betas, Kf, Kr, Kchol_a, Krb_a):
|
|
3039
|
+
"""
|
|
3040
|
+
Extracts parameters from the `betas` array based on the given sizes.
|
|
3041
|
+
|
|
3042
|
+
Parameters:
|
|
3043
|
+
betas (numpy.ndarray): The array of betas.
|
|
3044
|
+
Kf (int): Size of Bf (first Kf elements of betas).
|
|
3045
|
+
Kr (int): Size of Br.
|
|
3046
|
+
Kchol_a (int): Part of the size for brstd.
|
|
3047
|
+
Krb_a (int): Part of the size for brstd.
|
|
3048
|
+
|
|
3049
|
+
Returns:
|
|
3050
|
+
tuple: A tuple containing:
|
|
3051
|
+
- Bf (numpy.ndarray): The first Kf elements of betas.
|
|
3052
|
+
- Br (numpy.ndarray): The next Kr elements of betas after Bf.
|
|
3053
|
+
- brstd (numpy.ndarray): The next Kchol_a + Krb_a elements of betas after Br.
|
|
3054
|
+
- remaining_betas (numpy.ndarray): Any remaining elements in betas after brstd.
|
|
3055
|
+
"""
|
|
3056
|
+
# Step 1: Extract Bf
|
|
3057
|
+
Bf = betas[:Kf] # First Kf elements
|
|
3058
|
+
|
|
3059
|
+
# Step 2: Extract Br
|
|
3060
|
+
Br = betas[Kf:Kf + Kr] # Next Kr elements after Bf
|
|
3061
|
+
|
|
3062
|
+
# Step 3: Extract brstd
|
|
3063
|
+
brstd_size = Kchol_a + Krb_a # Total size of brstd
|
|
3064
|
+
brstd = betas[Kf + Kr:Kf + Kr + brstd_size] # Next brstd_size elements after Br
|
|
3065
|
+
|
|
3066
|
+
# Step 4: Extract remaining betas
|
|
3067
|
+
remaining_betas = betas[Kf + Kr + brstd_size:] # Remaining elements in betas
|
|
3068
|
+
|
|
3069
|
+
return Bf, Br, brstd, remaining_betas
|
|
3070
|
+
|
|
2957
3071
|
def convert_nbinom_params(self, mu, theta):
|
|
2958
3072
|
"""
|
|
2959
3073
|
Convert mean/dispersion parameterization of a negative binomial to the ones scipy supports
|
|
@@ -3429,6 +3543,7 @@ class ObjectiveFunction(object):
|
|
|
3429
3543
|
else:
|
|
3430
3544
|
corr_pairs = list(itertools.combinations(corr_indices, 2))
|
|
3431
3545
|
|
|
3546
|
+
|
|
3432
3547
|
for ii, corr_pair in enumerate(corr_pairs):
|
|
3433
3548
|
# lower cholesky matrix
|
|
3434
3549
|
chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
|
|
@@ -3456,7 +3571,7 @@ class ObjectiveFunction(object):
|
|
|
3456
3571
|
a = 0
|
|
3457
3572
|
b = 0
|
|
3458
3573
|
stuff = []
|
|
3459
|
-
#
|
|
3574
|
+
# TODO get order
|
|
3460
3575
|
for j, i in enumerate(list_sizes):
|
|
3461
3576
|
br_mean = betas_hetro[a:i + a]
|
|
3462
3577
|
a += i
|
|
@@ -3483,7 +3598,33 @@ class ObjectiveFunction(object):
|
|
|
3483
3598
|
br_mean = betas_m
|
|
3484
3599
|
br_sd = betas_sd # Last Kr positions
|
|
3485
3600
|
# Compute: betas = mean + sd*draws
|
|
3486
|
-
|
|
3601
|
+
if len(br_sd) != draws.shape[1]:
|
|
3602
|
+
#get the same size as the mean
|
|
3603
|
+
#if hasattr(self.Br):
|
|
3604
|
+
# betas_random = self.Br.copy()
|
|
3605
|
+
#else:
|
|
3606
|
+
idx = self.get_X_draw_tril()
|
|
3607
|
+
betas_random = br_mean[None, :, None] + draws[:,idx, :] * br_sd[None, :, None]
|
|
3608
|
+
'''
|
|
3609
|
+
c = self.get_num_params()[3:5]
|
|
3610
|
+
|
|
3611
|
+
cor = []
|
|
3612
|
+
for i in range(c[0]):
|
|
3613
|
+
cor.append(i)
|
|
3614
|
+
|
|
3615
|
+
vall =[]
|
|
3616
|
+
for i, val in enumerate(reversed(br_sd)):
|
|
3617
|
+
vall.append()
|
|
3618
|
+
|
|
3619
|
+
remaining = draws.shape[1] - len(betas_sd)
|
|
3620
|
+
'''
|
|
3621
|
+
|
|
3622
|
+
else:
|
|
3623
|
+
|
|
3624
|
+
|
|
3625
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
|
3626
|
+
|
|
3627
|
+
|
|
3487
3628
|
betas_random = self._apply_distribution(betas_random)
|
|
3488
3629
|
|
|
3489
3630
|
return betas_random
|
|
@@ -3502,21 +3643,38 @@ class ObjectiveFunction(object):
|
|
|
3502
3643
|
# if gamma <= 0.01: #min defined value for stable nb
|
|
3503
3644
|
# gamma = 0.01
|
|
3504
3645
|
|
|
3646
|
+
#g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
|
|
3505
3647
|
|
|
3648
|
+
#gg = stats.poisson.rvs(g)
|
|
3506
3649
|
|
|
3650
|
+
|
|
3507
3651
|
|
|
3652
|
+
|
|
3508
3653
|
endog = y
|
|
3509
3654
|
mu = lam
|
|
3655
|
+
''''
|
|
3656
|
+
mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
|
|
3510
3657
|
alpha = np.exp(gamma)
|
|
3511
|
-
|
|
3512
|
-
|
|
3513
|
-
|
|
3514
|
-
|
|
3515
|
-
|
|
3658
|
+
|
|
3659
|
+
'''
|
|
3660
|
+
alpha = gamma
|
|
3661
|
+
size = 1.0 / alpha * mu ** Q
|
|
3662
|
+
|
|
3663
|
+
prob = size/(size+mu)
|
|
3664
|
+
|
|
3665
|
+
|
|
3516
3666
|
|
|
3517
3667
|
'''test'''
|
|
3518
3668
|
|
|
3519
3669
|
|
|
3670
|
+
'''
|
|
3671
|
+
size = 1 / np.exp(gamma) * mu ** 0
|
|
3672
|
+
prob = size / (size + mu)
|
|
3673
|
+
coeff = (gammaln(size + y) - gammaln(y + 1) -
|
|
3674
|
+
gammaln(size))
|
|
3675
|
+
llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
|
|
3676
|
+
'''
|
|
3677
|
+
|
|
3520
3678
|
try:
|
|
3521
3679
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
|
3522
3680
|
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
|
@@ -3528,22 +3686,28 @@ class ObjectiveFunction(object):
|
|
|
3528
3686
|
#start_time = time.time()
|
|
3529
3687
|
#for _ in range(10000):
|
|
3530
3688
|
|
|
3531
|
-
|
|
3689
|
+
|
|
3532
3690
|
#end_time = time.time()
|
|
3533
3691
|
#print("Custom functieon time:", end_time - start_time)
|
|
3534
3692
|
#start_time = time.time()
|
|
3535
3693
|
#for _ in range(10000):
|
|
3694
|
+
'''
|
|
3536
3695
|
gg = np.exp(
|
|
3537
3696
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
|
3538
3697
|
y + alpha) * np.log(mu + alpha))
|
|
3539
3698
|
gg[np.isnan(gg)] = 1
|
|
3699
|
+
'''
|
|
3700
|
+
gg_alt = nbinom.pmf(y ,1/alpha, prob)
|
|
3701
|
+
#gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
|
|
3702
|
+
#gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
|
|
3703
|
+
#print('check theses')
|
|
3540
3704
|
#gg = nbinom.pmf(y ,alpha, prob)
|
|
3541
3705
|
#end_time = time.time()
|
|
3542
3706
|
#print("Custom functieon time:", end_time - start_time)
|
|
3543
3707
|
|
|
3544
3708
|
except Exception as e:
|
|
3545
|
-
print(
|
|
3546
|
-
return
|
|
3709
|
+
print("Neg Binom error.")
|
|
3710
|
+
return gg_alt
|
|
3547
3711
|
|
|
3548
3712
|
def lindley_pmf(self, x, r, theta, k=50):
|
|
3549
3713
|
"""
|
|
@@ -3690,8 +3854,8 @@ class ObjectiveFunction(object):
|
|
|
3690
3854
|
|
|
3691
3855
|
if dispersion == 1 or dispersion == 4: # nb
|
|
3692
3856
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
|
3693
|
-
|
|
3694
|
-
|
|
3857
|
+
#b_gam = 1/np.exp(b_gam)
|
|
3858
|
+
#print(b_gam)
|
|
3695
3859
|
if b_gam <= 0:
|
|
3696
3860
|
#penalty += 100
|
|
3697
3861
|
#penalty += abs(b_gam)
|
|
@@ -3699,9 +3863,9 @@ class ObjectiveFunction(object):
|
|
|
3699
3863
|
#b_gam = 1
|
|
3700
3864
|
|
|
3701
3865
|
# if b_gam < 0.03:
|
|
3702
|
-
penalty += min(1, np.abs(b_gam))
|
|
3866
|
+
penalty += min(1, np.abs(b_gam), 0)
|
|
3703
3867
|
|
|
3704
|
-
b_gam = 0.001
|
|
3868
|
+
#b_gam = 0.001
|
|
3705
3869
|
#
|
|
3706
3870
|
|
|
3707
3871
|
#if b_gam >= 10:
|
|
@@ -3733,8 +3897,15 @@ class ObjectiveFunction(object):
|
|
|
3733
3897
|
def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
|
|
3734
3898
|
|
|
3735
3899
|
# print('this was 0')
|
|
3736
|
-
|
|
3900
|
+
if dispersion:
|
|
3901
|
+
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3902
|
+
|
|
3903
|
+
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3904
|
+
#print('check if this holds size')
|
|
3905
|
+
else:
|
|
3906
|
+
eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3737
3907
|
eta = np.array(eta)
|
|
3908
|
+
|
|
3738
3909
|
# eta = np.float64(eta)
|
|
3739
3910
|
# eta = np.dot(Xd, params_main)+offset[:,:,0]
|
|
3740
3911
|
# eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
|
|
@@ -3753,7 +3924,7 @@ class ObjectiveFunction(object):
|
|
|
3753
3924
|
|
|
3754
3925
|
else:
|
|
3755
3926
|
# eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
|
|
3756
|
-
|
|
3927
|
+
eta = eta.astype('float')
|
|
3757
3928
|
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
|
3758
3929
|
return eVd
|
|
3759
3930
|
|
|
@@ -3869,7 +4040,7 @@ class ObjectiveFunction(object):
|
|
|
3869
4040
|
|
|
3870
4041
|
|
|
3871
4042
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
|
3872
|
-
|
|
4043
|
+
|
|
3873
4044
|
|
|
3874
4045
|
elif dispersion == 2:
|
|
3875
4046
|
|
|
@@ -3890,7 +4061,7 @@ class ObjectiveFunction(object):
|
|
|
3890
4061
|
# proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
|
|
3891
4062
|
proba_r = np.array(store)
|
|
3892
4063
|
proba_r = np.atleast_2d(proba_r).T
|
|
3893
|
-
|
|
4064
|
+
|
|
3894
4065
|
|
|
3895
4066
|
else:
|
|
3896
4067
|
raise Exception('not implemented other modeling forms')
|
|
@@ -3907,6 +4078,8 @@ class ObjectiveFunction(object):
|
|
|
3907
4078
|
if dispersion == 0 or dispersion == 3:
|
|
3908
4079
|
return 0
|
|
3909
4080
|
else:
|
|
4081
|
+
|
|
4082
|
+
|
|
3910
4083
|
return 1
|
|
3911
4084
|
|
|
3912
4085
|
def _prob_product_across_panels(self, pch, panel_info):
|
|
@@ -3962,7 +4135,7 @@ class ObjectiveFunction(object):
|
|
|
3962
4135
|
if y[i] == 0:
|
|
3963
4136
|
gr_e[i] = 0
|
|
3964
4137
|
|
|
3965
|
-
if self.is_dispersion(dispersion):
|
|
4138
|
+
if self.is_dispersion(dispersion) and not self.no_extra_param:
|
|
3966
4139
|
gr_d = np.zeros((N, 1))
|
|
3967
4140
|
if dispersion == 1:
|
|
3968
4141
|
# trying alt
|
|
@@ -4066,12 +4239,13 @@ class ObjectiveFunction(object):
|
|
|
4066
4239
|
br, draws_, brstd, dis_fit_long) # (N,K,R)
|
|
4067
4240
|
dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
|
|
4068
4241
|
einsum_model_form, dtype=np.float64) # (N,K,R)
|
|
4069
|
-
der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4070
|
-
der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4071
|
-
|
|
4072
|
-
|
|
4242
|
+
#der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4243
|
+
#der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4244
|
+
|
|
4245
|
+
der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
|
|
4246
|
+
|
|
4073
4247
|
der_t = self._compute_derivatives(
|
|
4074
|
-
br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
|
|
4248
|
+
br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
|
|
4075
4249
|
# er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
|
|
4076
4250
|
der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
|
|
4077
4251
|
der_t * proba_n[:, None, :] # (N,K,R)
|
|
@@ -4132,14 +4306,18 @@ class ObjectiveFunction(object):
|
|
|
4132
4306
|
grad_n = self._concat_gradients(
|
|
4133
4307
|
(gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
|
|
4134
4308
|
else:
|
|
4135
|
-
|
|
4136
|
-
|
|
4137
|
-
|
|
4138
|
-
|
|
4309
|
+
if self.no_extra_param:
|
|
4310
|
+
grad_n = self._concat_gradients(
|
|
4311
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
|
|
4312
|
+
else:
|
|
4313
|
+
grad_n = self._concat_gradients(
|
|
4314
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
|
|
4315
|
+
grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
|
|
4316
|
+
grad_n = np.clip(grad_n, -100, 100)
|
|
4139
4317
|
n = np.shape(grad_n)[0]
|
|
4140
4318
|
# subtract out mean gradient value
|
|
4141
|
-
|
|
4142
|
-
|
|
4319
|
+
grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
|
|
4320
|
+
grad_n = grad_n_sub
|
|
4143
4321
|
grad = grad_n.sum(axis=0)
|
|
4144
4322
|
return grad, grad_n
|
|
4145
4323
|
|
|
@@ -4290,7 +4468,7 @@ class ObjectiveFunction(object):
|
|
|
4290
4468
|
return proba_r.sum(axis=1), np.squeeze(proba_r)
|
|
4291
4469
|
|
|
4292
4470
|
def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
|
|
4293
|
-
penalty_val = 0.
|
|
4471
|
+
penalty_val = 0.1
|
|
4294
4472
|
penalty_val_max = 130
|
|
4295
4473
|
|
|
4296
4474
|
# print('change_later')
|
|
@@ -4306,8 +4484,8 @@ class ObjectiveFunction(object):
|
|
|
4306
4484
|
if abs(i) > penalty_val_max:
|
|
4307
4485
|
penalty += abs(i)
|
|
4308
4486
|
|
|
4309
|
-
#
|
|
4310
|
-
#
|
|
4487
|
+
#if abs(i) < penalty_val:
|
|
4488
|
+
# penalty += 5
|
|
4311
4489
|
|
|
4312
4490
|
# penalty = 0
|
|
4313
4491
|
return penalty
|
|
@@ -4414,8 +4592,7 @@ class ObjectiveFunction(object):
|
|
|
4414
4592
|
index += 1
|
|
4415
4593
|
|
|
4416
4594
|
brstd = br_std
|
|
4417
|
-
|
|
4418
|
-
print(brstd)
|
|
4595
|
+
|
|
4419
4596
|
|
|
4420
4597
|
|
|
4421
4598
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
@@ -4447,7 +4624,7 @@ class ObjectiveFunction(object):
|
|
|
4447
4624
|
penalty = self._penalty_betas(
|
|
4448
4625
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
|
4449
4626
|
self.n_obs = len(y) # feeds into gradient
|
|
4450
|
-
if draws is None and draws_grouped is None and (
|
|
4627
|
+
if draws is None and draws_grouped is None and (model_nature is None or
|
|
4451
4628
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
|
4452
4629
|
#TODO do i shuffle the draws
|
|
4453
4630
|
if type(Xd) == dict:
|
|
@@ -4500,7 +4677,7 @@ class ObjectiveFunction(object):
|
|
|
4500
4677
|
penalty = self.regularise_l2(betas)
|
|
4501
4678
|
|
|
4502
4679
|
if not np.isreal(loglik):
|
|
4503
|
-
loglik = -
|
|
4680
|
+
loglik = - 10000000.0
|
|
4504
4681
|
|
|
4505
4682
|
output = (-loglik + penalty,)
|
|
4506
4683
|
if return_gradient:
|
|
@@ -4581,10 +4758,10 @@ class ObjectiveFunction(object):
|
|
|
4581
4758
|
n_coeff = self.get_param_num(dispersion)
|
|
4582
4759
|
Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
|
|
4583
4760
|
if Kchol_a != Kchol:
|
|
4584
|
-
print('hold')
|
|
4761
|
+
print('hold qhy')
|
|
4585
4762
|
|
|
4586
4763
|
if Kr_b != Kr_b_a:
|
|
4587
|
-
print('hold')
|
|
4764
|
+
print('hold qhy')
|
|
4588
4765
|
|
|
4589
4766
|
|
|
4590
4767
|
|
|
@@ -4594,17 +4771,41 @@ class ObjectiveFunction(object):
|
|
|
4594
4771
|
Kf = 0
|
|
4595
4772
|
else:
|
|
4596
4773
|
if n_coeff != len(betas):
|
|
4597
|
-
raise Exception
|
|
4598
|
-
Bf = betas[0:Kf] # Fixed betas
|
|
4599
|
-
|
|
4774
|
+
raise Exception(
|
|
4600
4775
|
|
|
4776
|
+
)
|
|
4777
|
+
Bf = betas[0:Kf] # Fixed betas
|
|
4601
4778
|
|
|
4602
4779
|
|
|
4780
|
+
Bf_new, Br_new, Br_std_new, Br_rema = self.extract_parameters(betas, Kf, Kr, Kchol_a, Kr_b_a)
|
|
4781
|
+
if Bf_new != Bf:
|
|
4782
|
+
print('check this')
|
|
4603
4783
|
|
|
4604
4784
|
Vdf = dev.np.einsum('njk,k -> nj', Xdf, Bf, dtype=np.float64) # (N, P)
|
|
4605
4785
|
br = betas[Kf:Kf + Kr]
|
|
4786
|
+
if br != Br_new:
|
|
4787
|
+
print('why')
|
|
4788
|
+
|
|
4789
|
+
|
|
4790
|
+
#i have an array of betas, Kf represents the first kf of the betas array
|
|
4791
|
+
# now return Bf where size of bf = kf
|
|
4792
|
+
|
|
4793
|
+
# size of br needs to be Kr
|
|
4794
|
+
#Kr
|
|
4795
|
+
#now extract from betas, after all the Bf
|
|
4796
|
+
# cakk
|
|
4797
|
+
|
|
4798
|
+
#the next array is brstd
|
|
4799
|
+
|
|
4800
|
+
# size of brstd needs to be
|
|
4801
|
+
# Kchol_a + Krb_a
|
|
4802
|
+
#its grabbing from the
|
|
4803
|
+
|
|
4804
|
+
|
|
4606
4805
|
|
|
4607
4806
|
brstd = betas[Kf + Kr:Kf + Kr + Kr_b + Kchol]
|
|
4807
|
+
if brstd != Br_std_new:
|
|
4808
|
+
print('okay')
|
|
4608
4809
|
# initialises size matrix
|
|
4609
4810
|
proba = [] # Temp batching storage
|
|
4610
4811
|
|
|
@@ -4618,6 +4819,8 @@ class ObjectiveFunction(object):
|
|
|
4618
4819
|
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
|
4619
4820
|
# Br = self._transform_rand_betas(br, np.abs(
|
|
4620
4821
|
# brstd), draws_) # Get random coefficients, old method
|
|
4822
|
+
#TODO
|
|
4823
|
+
print('tril the draws')
|
|
4621
4824
|
Br = self._transform_rand_betas(br,
|
|
4622
4825
|
brstd, draws_) # Get random coefficients
|
|
4623
4826
|
self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
|
|
@@ -4696,7 +4899,8 @@ class ObjectiveFunction(object):
|
|
|
4696
4899
|
eVd = self.lam_transform(eVd, dispersion, betas[-1])
|
|
4697
4900
|
|
|
4698
4901
|
if self.is_dispersion(dispersion):
|
|
4699
|
-
|
|
4902
|
+
if not self.no_extra_param:
|
|
4903
|
+
penalty, betas[-1] = self._penalty_dispersion(
|
|
4700
4904
|
dispersion, betas[-1], eVd, y, penalty, model_nature)
|
|
4701
4905
|
|
|
4702
4906
|
'''
|
|
@@ -4740,7 +4944,7 @@ class ObjectiveFunction(object):
|
|
|
4740
4944
|
proba.append(dev.to_cpu(proba_))
|
|
4741
4945
|
|
|
4742
4946
|
lik = np.stack(proba).sum(axis=0) / R # (N, )
|
|
4743
|
-
lik = np.clip(lik, min_comp_val,
|
|
4947
|
+
lik = np.clip(lik, min_comp_val, max_comp_val)
|
|
4744
4948
|
# lik = np.nan_to_num(lik, )
|
|
4745
4949
|
loglik = np.log(lik)
|
|
4746
4950
|
llf_main = loglik
|
|
@@ -5070,7 +5274,7 @@ class ObjectiveFunction(object):
|
|
|
5070
5274
|
H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
|
|
5071
5275
|
result['Hessian'] = H
|
|
5072
5276
|
result['hess_inv'] = np.linalg.pinv(H)
|
|
5073
|
-
|
|
5277
|
+
|
|
5074
5278
|
standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
|
|
5075
5279
|
return result
|
|
5076
5280
|
# return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
|
|
@@ -5341,7 +5545,7 @@ class ObjectiveFunction(object):
|
|
|
5341
5545
|
return a
|
|
5342
5546
|
|
|
5343
5547
|
def fitRegression(self, mod,
|
|
5344
|
-
dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
|
|
5548
|
+
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5345
5549
|
|
|
5346
5550
|
"""
|
|
5347
5551
|
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
@@ -5358,7 +5562,7 @@ class ObjectiveFunction(object):
|
|
|
5358
5562
|
|
|
5359
5563
|
|
|
5360
5564
|
sol = Solution()
|
|
5361
|
-
|
|
5565
|
+
|
|
5362
5566
|
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
5363
5567
|
is_delete = 0
|
|
5364
5568
|
dispersion = mod.get('dispersion')
|
|
@@ -5387,6 +5591,8 @@ class ObjectiveFunction(object):
|
|
|
5387
5591
|
_g, pg, kg = 0, 0, 0
|
|
5388
5592
|
|
|
5389
5593
|
dispersion_param_num = self.is_dispersion(dispersion)
|
|
5594
|
+
if self.no_extra_param:
|
|
5595
|
+
dispersion_param_num =0
|
|
5390
5596
|
|
|
5391
5597
|
#paramNum = self.get_param_num(dispersion)
|
|
5392
5598
|
self.no_random_paramaters = 0
|
|
@@ -5441,17 +5647,26 @@ class ObjectiveFunction(object):
|
|
|
5441
5647
|
else:
|
|
5442
5648
|
bb[0] = self.constant_value
|
|
5443
5649
|
if dispersion == 1:
|
|
5444
|
-
|
|
5650
|
+
if not self.no_extra_param:
|
|
5651
|
+
bb[-1] = self.negative_binomial_value
|
|
5445
5652
|
bounds = None
|
|
5446
5653
|
|
|
5654
|
+
|
|
5655
|
+
|
|
5447
5656
|
# intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
|
|
5448
5657
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
|
5449
|
-
|
|
5658
|
+
|
|
5659
|
+
if self.no_extra_param:
|
|
5660
|
+
dispersion_poisson = 0
|
|
5661
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5450
5662
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
|
5451
|
-
|
|
5663
|
+
dispersion_poisson, 0, False, 0, None, None, None, None, None,
|
|
5452
5664
|
mod),
|
|
5453
5665
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
|
5454
5666
|
bounds=bounds)
|
|
5667
|
+
if dispersion:
|
|
5668
|
+
nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
|
|
5669
|
+
|
|
5455
5670
|
|
|
5456
5671
|
|
|
5457
5672
|
|
|
@@ -5551,7 +5766,7 @@ class ObjectiveFunction(object):
|
|
|
5551
5766
|
|
|
5552
5767
|
b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
|
|
5553
5768
|
self.none_handler(self.rdm_fit)) + len(
|
|
5554
|
-
self.none_handler(self.rdm_cor_fit)) else b[i] / 1
|
|
5769
|
+
self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
|
|
5555
5770
|
else:
|
|
5556
5771
|
b = bb
|
|
5557
5772
|
|
|
@@ -5561,9 +5776,10 @@ class ObjectiveFunction(object):
|
|
|
5561
5776
|
else:
|
|
5562
5777
|
b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
|
|
5563
5778
|
if dispersion == 1:
|
|
5564
|
-
|
|
5565
|
-
|
|
5566
|
-
b[-1]
|
|
5779
|
+
if not self.no_extra_param:
|
|
5780
|
+
b[-1] = np.abs(b[-1])
|
|
5781
|
+
if b[-1] > 10:
|
|
5782
|
+
b[-1] = 5
|
|
5567
5783
|
elif dispersion == 2:
|
|
5568
5784
|
b[-1] = .5
|
|
5569
5785
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
|
@@ -5689,13 +5905,35 @@ class ObjectiveFunction(object):
|
|
|
5689
5905
|
|
|
5690
5906
|
if draws is None and draws_hetro is not None:
|
|
5691
5907
|
print('hold')
|
|
5692
|
-
|
|
5693
|
-
|
|
5694
|
-
|
|
5695
|
-
|
|
5696
|
-
|
|
5697
|
-
|
|
5698
|
-
|
|
5908
|
+
#self.grad_yes = True
|
|
5909
|
+
#self.hess_yes = True
|
|
5910
|
+
|
|
5911
|
+
if self.no_extra_param:
|
|
5912
|
+
dispersion_poisson = 0
|
|
5913
|
+
print('b :', len(b))
|
|
5914
|
+
print(self.get_param_num())
|
|
5915
|
+
baby = self.get_param_num()
|
|
5916
|
+
if len(b) != baby:
|
|
5917
|
+
print('modify')
|
|
5918
|
+
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
5919
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
|
|
5920
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5921
|
+
method=method2, tol=tol['ftol'],
|
|
5922
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5923
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5924
|
+
if dispersion:
|
|
5925
|
+
initial_fit_beta = betas_est.x
|
|
5926
|
+
parmas = np.append(initial_fit_beta, nb_parma)
|
|
5927
|
+
self.nb_parma = nb_parma
|
|
5928
|
+
#print(f'neg binomi,{self.nb_parma}')
|
|
5929
|
+
betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
|
|
5930
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5931
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5932
|
+
method=method2, tol=tol['ftol'],
|
|
5933
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5934
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5935
|
+
|
|
5936
|
+
#print('refit with estimation of NB')
|
|
5699
5937
|
# self.numerical_hessian_calc = True
|
|
5700
5938
|
if self.numerical_hessian_calc:
|
|
5701
5939
|
try:
|
|
@@ -5994,7 +6232,7 @@ class ObjectiveFunction(object):
|
|
|
5994
6232
|
return delim + self._model_type_codes[dispersion]
|
|
5995
6233
|
|
|
5996
6234
|
def self_standardize_positive(self, X):
|
|
5997
|
-
scaler =
|
|
6235
|
+
scaler = MinMaxScaler()
|
|
5998
6236
|
if type(X) == list:
|
|
5999
6237
|
return X
|
|
6000
6238
|
|
|
@@ -6004,12 +6242,26 @@ class ObjectiveFunction(object):
|
|
|
6004
6242
|
# Reshaping to 2D - combining the last two dimensions
|
|
6005
6243
|
df_tf_reshaped = X.reshape(original_shape[0], -1)
|
|
6006
6244
|
df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
|
|
6007
|
-
df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6245
|
+
#df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6008
6246
|
# Reshape back to original 3D shape if necessary
|
|
6009
6247
|
df_tf = df_tf_scaled.reshape(original_shape)
|
|
6010
6248
|
return df_tf
|
|
6011
6249
|
else:
|
|
6012
|
-
|
|
6250
|
+
# Initialize the MinMaxScaler
|
|
6251
|
+
scaler = MinMaxScaler()
|
|
6252
|
+
float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
|
|
6253
|
+
non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
|
|
6254
|
+
|
|
6255
|
+
# Fit the scaler to the float columns and transform them
|
|
6256
|
+
X[float_columns] = scaler.fit_transform(X[float_columns])
|
|
6257
|
+
# Fit the scaler to the data and transform it
|
|
6258
|
+
#scaled_data = scaler.fit_transform(X)
|
|
6259
|
+
|
|
6260
|
+
# Convert the result back to a DataFrame
|
|
6261
|
+
#scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
|
|
6262
|
+
|
|
6263
|
+
|
|
6264
|
+
return X
|
|
6013
6265
|
|
|
6014
6266
|
def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
|
|
6015
6267
|
*args, **kwargs):
|
|
@@ -6064,8 +6316,9 @@ class ObjectiveFunction(object):
|
|
|
6064
6316
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
|
6065
6317
|
t, idx, df_test[:, :, idx])
|
|
6066
6318
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
|
6319
|
+
#TODO need to normalise the data
|
|
6067
6320
|
|
|
6068
|
-
|
|
6321
|
+
print('should not be possible')
|
|
6069
6322
|
|
|
6070
6323
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
6071
6324
|
indices = self.get_named_indices(self.fixed_fit)
|
|
@@ -6122,7 +6375,7 @@ class ObjectiveFunction(object):
|
|
|
6122
6375
|
model_nature['XH'] = XH
|
|
6123
6376
|
X_test = None
|
|
6124
6377
|
if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
|
|
6125
|
-
raise Exception('there is some kind of error')
|
|
6378
|
+
raise Exception('there is some kind of error in X')
|
|
6126
6379
|
|
|
6127
6380
|
# numpy data setup fpr estimation
|
|
6128
6381
|
indices2 = self.get_named_indices(self.rdm_fit)
|
|
@@ -6222,7 +6475,7 @@ class ObjectiveFunction(object):
|
|
|
6222
6475
|
self.log_lik = log_lik
|
|
6223
6476
|
if self.significant == 0:
|
|
6224
6477
|
|
|
6225
|
-
|
|
6478
|
+
|
|
6226
6479
|
if not self.test_flag:
|
|
6227
6480
|
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
6228
6481
|
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
|
@@ -6273,6 +6526,93 @@ class ObjectiveFunction(object):
|
|
|
6273
6526
|
|
|
6274
6527
|
return obj_1, model_nature
|
|
6275
6528
|
|
|
6529
|
+
def get_X_tril(self):
|
|
6530
|
+
'''For correlations find the repeating terms'''
|
|
6531
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
|
6532
|
+
rv_count_all = 0
|
|
6533
|
+
chol_count = 0
|
|
6534
|
+
rv_count = 0
|
|
6535
|
+
corr_indices = []
|
|
6536
|
+
rv_indices = []
|
|
6537
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
|
6538
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6539
|
+
is_correlated = True
|
|
6540
|
+
else:
|
|
6541
|
+
is_correlated = False
|
|
6542
|
+
|
|
6543
|
+
rv_count_all += 1
|
|
6544
|
+
if is_correlated:
|
|
6545
|
+
chol_count += 1
|
|
6546
|
+
else:
|
|
6547
|
+
rv_count += 1
|
|
6548
|
+
|
|
6549
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6550
|
+
|
|
6551
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
|
6552
|
+
|
|
6553
|
+
else:
|
|
6554
|
+
rv_indices.append(rv_count_all - 1)
|
|
6555
|
+
|
|
6556
|
+
|
|
6557
|
+
X_tril_idx = np.array([corr_indices[i]
|
|
6558
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6559
|
+
for j in range(i + 1)])
|
|
6560
|
+
# Find the s.d. for random variables that are not correlated
|
|
6561
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
|
6562
|
+
range_var = [x for x in
|
|
6563
|
+
range(len(self.none_handler(var_uncor)))]
|
|
6564
|
+
range_var = sorted(range_var)
|
|
6565
|
+
|
|
6566
|
+
X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
|
|
6567
|
+
|
|
6568
|
+
X_tril_idx = X_tril_idx.astype(int)
|
|
6569
|
+
return X_tril_idx
|
|
6570
|
+
|
|
6571
|
+
def get_X_draw_tril(self):
|
|
6572
|
+
'''For correlations find the repeating terms'''
|
|
6573
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
|
6574
|
+
rv_count_all = 0
|
|
6575
|
+
chol_count = 0
|
|
6576
|
+
rv_count = 0
|
|
6577
|
+
corr_indices = []
|
|
6578
|
+
rv_indices = []
|
|
6579
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
|
6580
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6581
|
+
is_correlated = True
|
|
6582
|
+
else:
|
|
6583
|
+
is_correlated = False
|
|
6584
|
+
|
|
6585
|
+
rv_count_all += 1
|
|
6586
|
+
if is_correlated:
|
|
6587
|
+
chol_count += 1
|
|
6588
|
+
else:
|
|
6589
|
+
rv_count += 1
|
|
6590
|
+
|
|
6591
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6592
|
+
|
|
6593
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
|
6594
|
+
|
|
6595
|
+
else:
|
|
6596
|
+
rv_indices.append(rv_count_all - 1)
|
|
6597
|
+
|
|
6598
|
+
# for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
|
|
6599
|
+
draws_tril_idx = np.array([corr_indices[j]
|
|
6600
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6601
|
+
for j in range(i + 1)]) # varnames pos.
|
|
6602
|
+
|
|
6603
|
+
# Find the s.d. for random variables that are not correlated
|
|
6604
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
|
6605
|
+
range_var = [x for x in
|
|
6606
|
+
range(len(self.none_handler(var_uncor)))]
|
|
6607
|
+
range_var = sorted(range_var)
|
|
6608
|
+
draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
|
|
6609
|
+
|
|
6610
|
+
draws_tril_idx = draws_tril_idx.astype(int)
|
|
6611
|
+
|
|
6612
|
+
return draws_tril_idx
|
|
6613
|
+
|
|
6614
|
+
|
|
6615
|
+
|
|
6276
6616
|
def modifyn(self, data):
|
|
6277
6617
|
select_data = self._characteristics_names
|
|
6278
6618
|
alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
|
|
@@ -6480,23 +6820,35 @@ class ObjectiveFunction(object):
|
|
|
6480
6820
|
# N, D = draws.shape[0], draws.shape[1]
|
|
6481
6821
|
N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
|
|
6482
6822
|
der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
|
|
6483
|
-
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
|
6484
|
-
Br_come_one = self.Br.copy()
|
|
6485
|
-
# Br_come_one =
|
|
6486
|
-
else:
|
|
6487
6823
|
|
|
6488
|
-
Br_come_one = self.Br.copy()
|
|
6489
6824
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
|
6490
6825
|
#todo make sure this works for ln and truncated normal
|
|
6491
6826
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
|
6492
|
-
|
|
6827
|
+
|
|
6828
|
+
#print('check this, intesection shouldn not happen for all')
|
|
6829
|
+
|
|
6830
|
+
if der.shape[1] != draws.shape[1]:
|
|
6831
|
+
print('why')
|
|
6493
6832
|
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
|
6833
|
+
if der.shape[1] != draws.shape[1]:
|
|
6834
|
+
print('why')
|
|
6835
|
+
#TODO need to get the stuction of the rdms
|
|
6494
6836
|
for k, dist_k in enumerate(distribution):
|
|
6495
6837
|
if dist_k == 'ln_normal':
|
|
6838
|
+
if der.shape[1] != draws.shape[1]:
|
|
6839
|
+
print('why')
|
|
6496
6840
|
der[:, k, :] = Br_come_one[:, k, :]
|
|
6841
|
+
if der.shape[1] != draws.shape[1]:
|
|
6842
|
+
print('why')
|
|
6497
6843
|
elif dist_k == 'tn_normal':
|
|
6844
|
+
if der.shape[1] != draws.shape[1]:
|
|
6845
|
+
print('why')
|
|
6498
6846
|
der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
|
|
6847
|
+
if der.shape[1] != draws.shape[1]:
|
|
6848
|
+
print('why')
|
|
6499
6849
|
|
|
6850
|
+
if der.shape[1] != draws.shape[1]:
|
|
6851
|
+
print('why')
|
|
6500
6852
|
return der
|
|
6501
6853
|
|
|
6502
6854
|
def _copy_size_display_as_ones(self, matrix):
|