metacountregressor 0.1.98__py3-none-any.whl → 0.1.107__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +258 -0
- metacountregressor/helperprocess.py +267 -5
- metacountregressor/main.py +241 -98
- metacountregressor/metaheuristics.py +22 -11
- metacountregressor/setup.py +3 -2
- metacountregressor/solution.py +438 -132
- {metacountregressor-0.1.98.dist-info → metacountregressor-0.1.107.dist-info}/METADATA +37 -9
- metacountregressor-0.1.107.dist-info/RECORD +20 -0
- {metacountregressor-0.1.98.dist-info → metacountregressor-0.1.107.dist-info}/WHEEL +1 -1
- metacountregressor-0.1.98.dist-info/RECORD +0 -19
- {metacountregressor-0.1.98.dist-info → metacountregressor-0.1.107.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.98.dist-info → metacountregressor-0.1.107.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
|
@@ -30,19 +30,19 @@ from scipy.special import gammaln
|
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
33
|
-
from sklearn.preprocessing import StandardScaler
|
|
33
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
34
34
|
from texttable import Texttable
|
|
35
|
-
|
|
35
|
+
import time
|
|
36
36
|
try:
|
|
37
37
|
from ._device_cust import device as dev
|
|
38
38
|
from .pareto_file import Pareto, Solution
|
|
39
39
|
from .data_split_helper import DataProcessor
|
|
40
40
|
except ImportError:
|
|
41
|
-
from
|
|
42
|
-
from
|
|
41
|
+
from _device_cust import device as dev
|
|
42
|
+
from pareto_file import Pareto, Solution
|
|
43
43
|
from data_split_helper import DataProcessor
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
from scipy import stats
|
|
46
46
|
np.seterr(divide='ignore', invalid='ignore')
|
|
47
47
|
warnings.simplefilter("ignore")
|
|
48
48
|
|
|
@@ -124,10 +124,11 @@ class ObjectiveFunction(object):
|
|
|
124
124
|
|
|
125
125
|
self.reg_penalty = 0
|
|
126
126
|
self.power_up_ll = False
|
|
127
|
-
|
|
127
|
+
self.nb_parma = 1
|
|
128
128
|
self.bic = None
|
|
129
129
|
self.other_bic = False
|
|
130
130
|
self.test_flag = 1
|
|
131
|
+
self.no_extra_param =1 #if true, fix dispersion. w
|
|
131
132
|
if self.other_bic:
|
|
132
133
|
print('change this to false latter ')
|
|
133
134
|
|
|
@@ -135,10 +136,10 @@ class ObjectiveFunction(object):
|
|
|
135
136
|
self.constant_value = 0
|
|
136
137
|
self.negative_binomial_value = 1
|
|
137
138
|
|
|
138
|
-
self.verbose_safe =
|
|
139
|
+
self.verbose_safe = kwargs.get('verbose', 0)
|
|
139
140
|
self.please_print = kwargs.get('please_print', 0)
|
|
140
141
|
self.group_halton = None
|
|
141
|
-
self.grad_yes = False
|
|
142
|
+
self.grad_yes = kwargs.get('grad_est', False)
|
|
142
143
|
self.hess_yes = False
|
|
143
144
|
self.group_halton_test = None
|
|
144
145
|
self.panels = None
|
|
@@ -151,15 +152,15 @@ class ObjectiveFunction(object):
|
|
|
151
152
|
self.dist_fit = None
|
|
152
153
|
|
|
153
154
|
self.MAE = None
|
|
154
|
-
self.best_obj_1 =
|
|
155
|
-
self._obj_1 = 'bic'
|
|
156
|
-
self._obj_2 = 'MSE'
|
|
155
|
+
self.best_obj_1 = 1000000.0
|
|
156
|
+
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
157
|
+
self._obj_2 = kwargs.get('_obj_2', 'MSE')
|
|
157
158
|
self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
|
|
158
159
|
self.full_model = None
|
|
159
160
|
self.GP_parameter = 0
|
|
160
|
-
self.is_multi =
|
|
161
|
+
self.is_multi = kwargs.get('is_multi', False)
|
|
161
162
|
self.complexity_level = 6
|
|
162
|
-
self._max_iterations_improvement =
|
|
163
|
+
self._max_iterations_improvement = 10000
|
|
163
164
|
self.generated_sln = set()
|
|
164
165
|
self.ave_mae = 0
|
|
165
166
|
# defalt paramaters for hs #TODO unpack into harmony search class
|
|
@@ -167,23 +168,32 @@ class ObjectiveFunction(object):
|
|
|
167
168
|
self._hms = 20
|
|
168
169
|
self._max_time = 60 * 60 * 24
|
|
169
170
|
self._hmcr = .5
|
|
170
|
-
self._par = 0.3
|
|
171
|
+
self._par = 0.3 #dont think this gets useted
|
|
171
172
|
self._mpai = 1
|
|
172
173
|
self._max_imp = 100000
|
|
173
174
|
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
|
174
175
|
self._panels = None
|
|
175
176
|
self.is_multi = True
|
|
176
177
|
self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
|
+
|
|
177
179
|
self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
|
|
178
|
-
self.method_ll = 'BFGS_2'
|
|
180
|
+
self.method_ll = kwargs.get('method', 'BFGS_2')
|
|
181
|
+
|
|
182
|
+
#self.method_ll = 'Nelder-Mead-BFGS'
|
|
179
183
|
self.Keep_Fit = 2
|
|
180
184
|
self.MP = 0
|
|
181
185
|
# Nelder-Mead-BFGS
|
|
182
186
|
|
|
183
|
-
self._max_characteristics = 26
|
|
187
|
+
self._max_characteristics = kwargs.get('_max_vars', 26)
|
|
184
188
|
|
|
185
189
|
self.beta_dict = dict
|
|
190
|
+
if 'model_terms' in kwargs:
|
|
191
|
+
print('change')
|
|
192
|
+
if kwargs.get('model_terms').get('group') is not None:
|
|
193
|
+
kwargs['group'] = kwargs.get('model_terms').get('group')
|
|
186
194
|
|
|
195
|
+
if kwargs.get('model_terms').get('panels') is not None:
|
|
196
|
+
kwargs['panels'] = kwargs.get('model_terms').get('panels')
|
|
187
197
|
acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
|
|
188
198
|
'algorithm', '_random_seed', '_max_time',
|
|
189
199
|
'forcedvariables', '_obj_1', '_obj_2', '_par',
|
|
@@ -201,12 +211,17 @@ class ObjectiveFunction(object):
|
|
|
201
211
|
if 'instance_number' in kwargs:
|
|
202
212
|
self.instance_number = str(kwargs['instance_number'])
|
|
203
213
|
else:
|
|
214
|
+
|
|
215
|
+
print('no name set, setting name as 0')
|
|
204
216
|
self.instance_number = str(0) # set an arbitrary instance number
|
|
205
217
|
|
|
206
218
|
if not os.path.exists(self.instance_number):
|
|
207
|
-
|
|
219
|
+
if kwargs.get('make_directory', True):
|
|
220
|
+
print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
|
|
221
|
+
os.makedirs(self.instance_number)
|
|
208
222
|
|
|
209
223
|
if not hasattr(self, '_obj_1'):
|
|
224
|
+
print('_obj_1 required, define as bic, aic, ll')
|
|
210
225
|
raise Exception
|
|
211
226
|
|
|
212
227
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
|
@@ -214,6 +229,11 @@ class ObjectiveFunction(object):
|
|
|
214
229
|
self._maximize = False # do we maximize or minimize?
|
|
215
230
|
|
|
216
231
|
x_data = sm.add_constant(x_data)
|
|
232
|
+
standardize_the_data = 0
|
|
233
|
+
if standardize_the_data:
|
|
234
|
+
print('we are standardize the data')
|
|
235
|
+
x_data = self.self_standardize_positive(x_data)
|
|
236
|
+
|
|
217
237
|
self._input_data(x_data, y_data)
|
|
218
238
|
|
|
219
239
|
|
|
@@ -230,13 +250,17 @@ class ObjectiveFunction(object):
|
|
|
230
250
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
|
231
251
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
|
232
252
|
if self.test_percentage == 0:
|
|
253
|
+
print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
|
|
254
|
+
print('continuing single objective')
|
|
255
|
+
time.sleep(2)
|
|
233
256
|
self.is_multi = False
|
|
234
257
|
|
|
235
|
-
if 'panels' in kwargs:
|
|
236
|
-
|
|
258
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
259
|
+
if kwargs.get('group') is not None:
|
|
260
|
+
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
|
237
261
|
|
|
238
|
-
|
|
239
|
-
|
|
262
|
+
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
|
263
|
+
'category').cat.codes
|
|
240
264
|
self.complexity_level = 6
|
|
241
265
|
# create test dataset
|
|
242
266
|
|
|
@@ -275,26 +299,31 @@ class ObjectiveFunction(object):
|
|
|
275
299
|
|
|
276
300
|
#self.n_obs = N
|
|
277
301
|
self._characteristics_names = list(self._x_data.columns)
|
|
278
|
-
self._max_group_all_means =
|
|
302
|
+
self._max_group_all_means = 2
|
|
279
303
|
|
|
280
304
|
exclude_this_test = [4]
|
|
281
305
|
|
|
282
|
-
if 'panels' in kwargs:
|
|
306
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
283
307
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
|
284
308
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
|
285
309
|
self.ids = np.asarray(
|
|
286
310
|
df_train[kwargs['panels']]) if kwargs['panels'] is not None else None
|
|
287
311
|
self.ids_test = np.asarray(
|
|
288
312
|
df_test[kwargs['panels']]) if kwargs['panels'] is not None else None
|
|
289
|
-
|
|
290
|
-
'
|
|
291
|
-
|
|
292
|
-
'
|
|
313
|
+
if kwargs.get('group') is not None:
|
|
314
|
+
groupll = np.asarray(df_train[kwargs['group']].astype(
|
|
315
|
+
'category').cat.codes)
|
|
316
|
+
group_test = np.asarray(df_test[kwargs['group']].astype(
|
|
317
|
+
'category').cat.codes)
|
|
318
|
+
else:
|
|
319
|
+
groupll = None
|
|
293
320
|
X, Y, panel, group = self._arrange_long_format(
|
|
294
321
|
df_train, y_train, self.ids, self.ids, groupll)
|
|
295
322
|
self.group_halton = group.copy()
|
|
296
323
|
self.group_dummies = pd.get_dummies(group)
|
|
297
324
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
|
325
|
+
|
|
326
|
+
Xnew = pd.DataFrame(Xnew, columns=X.columns)
|
|
298
327
|
self.panel_info = panel_info
|
|
299
328
|
self.N, self.P = panel_info.shape
|
|
300
329
|
Xnew.drop(kwargs['panels'], axis=1, inplace=True)
|
|
@@ -302,9 +331,11 @@ class ObjectiveFunction(object):
|
|
|
302
331
|
K = Xnew.shape[1]
|
|
303
332
|
self._characteristics_names = list(Xnew.columns)
|
|
304
333
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
334
|
+
XX = XX.astype('float')
|
|
305
335
|
self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
|
306
336
|
self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
|
307
337
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
338
|
+
YY = YY.astype('float')
|
|
308
339
|
self._x_data = XX.copy()
|
|
309
340
|
self._y_data = YY.copy()
|
|
310
341
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
|
@@ -326,6 +357,7 @@ class ObjectiveFunction(object):
|
|
|
326
357
|
K = X.shape[1]
|
|
327
358
|
self.columns_names = X.columns
|
|
328
359
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
360
|
+
X = X.astype('float')
|
|
329
361
|
self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
|
330
362
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
331
363
|
Y = Y.astype('float')
|
|
@@ -338,6 +370,7 @@ class ObjectiveFunction(object):
|
|
|
338
370
|
|
|
339
371
|
|
|
340
372
|
else:
|
|
373
|
+
print('No Panels. Grouped Random Paramaters Will not be estimated')
|
|
341
374
|
self.G = None
|
|
342
375
|
self._Gnum = 1
|
|
343
376
|
self._max_group_all_means = 0
|
|
@@ -354,7 +387,9 @@ class ObjectiveFunction(object):
|
|
|
354
387
|
K = Xnew.shape[1]
|
|
355
388
|
self._characteristics_names = list(Xnew.columns)
|
|
356
389
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
390
|
+
XX = XX.astype('float')
|
|
357
391
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
392
|
+
YY = YY.astype('float')
|
|
358
393
|
self._x_data = XX.copy()
|
|
359
394
|
self._y_data = YY.copy()
|
|
360
395
|
|
|
@@ -370,7 +405,9 @@ class ObjectiveFunction(object):
|
|
|
370
405
|
K = X.shape[1]
|
|
371
406
|
self.columns_names = X.columns
|
|
372
407
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
408
|
+
X = X.astype('float')
|
|
373
409
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
410
|
+
Y = Y.astype('float')
|
|
374
411
|
self._x_data_test = X.copy()
|
|
375
412
|
self.y_data_test = Y.copy()
|
|
376
413
|
|
|
@@ -385,7 +422,7 @@ class ObjectiveFunction(object):
|
|
|
385
422
|
|
|
386
423
|
|
|
387
424
|
|
|
388
|
-
self.Ndraws = 200
|
|
425
|
+
self.Ndraws = kwargs.get('Ndraws', 200)
|
|
389
426
|
self.draws1 = None
|
|
390
427
|
self.initial_sig = 1 # pass the test of a single model
|
|
391
428
|
self.pvalue_sig_value = .1
|
|
@@ -403,17 +440,19 @@ class ObjectiveFunction(object):
|
|
|
403
440
|
print('Setup Complete...')
|
|
404
441
|
else:
|
|
405
442
|
print('No Panels Supplied')
|
|
443
|
+
print('Setup Complete...')
|
|
406
444
|
self._characteristics_names = list(self._x_data.columns)
|
|
407
445
|
# define the variables
|
|
408
446
|
# self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
|
|
409
447
|
self._transformations = ["no", "sqrt", "log", "arcsinh"]
|
|
410
448
|
self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
|
|
411
|
-
|
|
449
|
+
self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
|
|
412
450
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
413
451
|
|
|
414
|
-
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', '
|
|
452
|
+
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
415
453
|
|
|
416
454
|
if self.G is not None:
|
|
455
|
+
#TODO need to handle this for groups
|
|
417
456
|
self._distribution = ["trad| " + item for item in self._distribution
|
|
418
457
|
] + ["grpd| " + item for item in self._distribution]
|
|
419
458
|
|
|
@@ -425,18 +464,32 @@ class ObjectiveFunction(object):
|
|
|
425
464
|
|
|
426
465
|
self.significant = 0
|
|
427
466
|
# define the states of our explanatory variables
|
|
467
|
+
|
|
468
|
+
|
|
428
469
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
|
429
|
-
kwargs.get('must_include', []))
|
|
470
|
+
kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
|
|
471
|
+
|
|
472
|
+
|
|
473
|
+
|
|
430
474
|
self._discrete_values = self._discrete_values + \
|
|
431
|
-
|
|
475
|
+
self.define_distributions_analyst(extra=kwargs.get('decisions', None))
|
|
432
476
|
|
|
433
477
|
if 'model_types' in kwargs:
|
|
434
478
|
model_types = kwargs['model_types']
|
|
435
479
|
else:
|
|
436
|
-
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
437
480
|
|
|
481
|
+
|
|
482
|
+
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
483
|
+
#model_types = [[0]]
|
|
484
|
+
#TODO change back and fix NB
|
|
485
|
+
model_t_dict = {'Poisson':0,
|
|
486
|
+
"NB":1}
|
|
487
|
+
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
488
|
+
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
489
|
+
# Print the formatted result
|
|
490
|
+
print(f'The type of models possible will consider: {", ".join(model_keys)}')
|
|
438
491
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
|
439
|
-
self._transformations) + model_types
|
|
492
|
+
self._transformations, kwargs.get('decisions',None)) + model_types
|
|
440
493
|
|
|
441
494
|
self._model_type_codes = ['p', 'nb',
|
|
442
495
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
|
@@ -452,8 +505,9 @@ class ObjectiveFunction(object):
|
|
|
452
505
|
self._max_hurdle = 4
|
|
453
506
|
|
|
454
507
|
#Manually fit from analyst specification
|
|
455
|
-
manual_fit = kwargs.get('Manual_Fit')
|
|
508
|
+
manual_fit = kwargs.get('Manual_Fit', None)
|
|
456
509
|
if manual_fit is not None:
|
|
510
|
+
print('fitting manual')
|
|
457
511
|
self.process_manual_fit(manual_fit)
|
|
458
512
|
|
|
459
513
|
self.solution_analyst = None
|
|
@@ -488,6 +542,7 @@ class ObjectiveFunction(object):
|
|
|
488
542
|
if self.is_multi:
|
|
489
543
|
self._offsets_test = self._x_data_test[:, :, val_od]
|
|
490
544
|
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
|
545
|
+
#print(self._offsets)
|
|
491
546
|
else:
|
|
492
547
|
self.initialize_empty_offsets()
|
|
493
548
|
|
|
@@ -760,6 +815,8 @@ class ObjectiveFunction(object):
|
|
|
760
815
|
if dispersion == 0:
|
|
761
816
|
return None, None
|
|
762
817
|
elif dispersion == 2 or dispersion == 1:
|
|
818
|
+
if self.no_extra_param:
|
|
819
|
+
return self.nb_parma, None
|
|
763
820
|
return betas[-1], None
|
|
764
821
|
|
|
765
822
|
elif dispersion == 3:
|
|
@@ -787,14 +844,65 @@ class ObjectiveFunction(object):
|
|
|
787
844
|
par = np.nan_to_num(par)
|
|
788
845
|
return par
|
|
789
846
|
|
|
790
|
-
def
|
|
847
|
+
def rename_distro(self, distro):
|
|
848
|
+
# Mapping dictionary
|
|
849
|
+
mapping = {
|
|
850
|
+
'normal': ['normal', 'n', 'Normal'],
|
|
851
|
+
'triangular': ['triangular', 't', 'Triangular'],
|
|
852
|
+
'uniform': ['uniform', 'u', 'Uniform'],
|
|
853
|
+
'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
|
|
854
|
+
'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
# Use list comprehension with the mapping
|
|
858
|
+
reversed_mapping = {value: key for key, values in mapping.items() for value in values}
|
|
859
|
+
|
|
860
|
+
# Use the reversed mapping to find the corresponding key
|
|
861
|
+
new_distro = [reversed_mapping.get(i, i) for i in distro]
|
|
862
|
+
return new_distro
|
|
863
|
+
|
|
864
|
+
def define_distributions_analyst(self, extra = None):
|
|
865
|
+
|
|
866
|
+
if extra is not None:
|
|
867
|
+
set_alpha = []
|
|
868
|
+
for col in self._characteristics_names:
|
|
869
|
+
if col in extra[('Column')].values:
|
|
870
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
871
|
+
distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
|
|
872
|
+
distro = self.rename_distro(distro)
|
|
873
|
+
set_alpha = set_alpha+[distro]
|
|
874
|
+
elif col == 'const':
|
|
875
|
+
set_alpha = set_alpha +[['normal']]
|
|
876
|
+
return set_alpha
|
|
877
|
+
return [[x for x in self._distribution]] * self._characteristics
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
|
|
882
|
+
def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
|
|
791
883
|
'complexity level'
|
|
792
884
|
'''
|
|
793
885
|
2 is feature selection,
|
|
794
|
-
3 is random
|
|
795
|
-
4 is correlated random
|
|
886
|
+
3 is random parameters
|
|
887
|
+
4 is correlated random parameters
|
|
888
|
+
|
|
889
|
+
extra is the stuff defined by the Meta APP
|
|
796
890
|
'''
|
|
797
891
|
set_alpha = []
|
|
892
|
+
if extra is not None:
|
|
893
|
+
for col in self._characteristics_names:
|
|
894
|
+
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
895
|
+
set_alpha = set_alpha + [[1]]
|
|
896
|
+
elif col == 'Offset':
|
|
897
|
+
set_alpha = set_alpha + [[1]]
|
|
898
|
+
|
|
899
|
+
elif col in extra[('Column')].values:
|
|
900
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
901
|
+
check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
|
|
902
|
+
set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
|
|
903
|
+
return set_alpha
|
|
904
|
+
|
|
905
|
+
|
|
798
906
|
for col in self._characteristics_names:
|
|
799
907
|
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
800
908
|
set_alpha = set_alpha + [[1]]
|
|
@@ -845,10 +953,12 @@ class ObjectiveFunction(object):
|
|
|
845
953
|
return ([self._model_type_codes[dispersion]])
|
|
846
954
|
|
|
847
955
|
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
|
848
|
-
|
|
956
|
+
'''
|
|
849
957
|
setup for naming of the model summary
|
|
850
958
|
'''
|
|
959
|
+
if self.no_extra_param and dispersion ==1:
|
|
851
960
|
|
|
961
|
+
betas = np.append(betas, self.nb_parma)
|
|
852
962
|
|
|
853
963
|
self.name_deleter = []
|
|
854
964
|
group_rpm = None
|
|
@@ -969,13 +1079,15 @@ class ObjectiveFunction(object):
|
|
|
969
1079
|
[''] * (len(names) - len(self.transform_id_names))
|
|
970
1080
|
self.coeff_names = names
|
|
971
1081
|
|
|
1082
|
+
'''
|
|
972
1083
|
if betas is not None:
|
|
973
1084
|
try:
|
|
974
1085
|
if len(betas) != len(names):
|
|
975
|
-
print('
|
|
976
|
-
|
|
1086
|
+
print('standard_model', no_draws)
|
|
1087
|
+
|
|
977
1088
|
except Exception as e:
|
|
978
1089
|
print(e)
|
|
1090
|
+
'''
|
|
979
1091
|
|
|
980
1092
|
|
|
981
1093
|
|
|
@@ -1000,7 +1112,8 @@ class ObjectiveFunction(object):
|
|
|
1000
1112
|
if not isinstance(self.pvalues, np.ndarray):
|
|
1001
1113
|
raise Exception
|
|
1002
1114
|
|
|
1003
|
-
|
|
1115
|
+
if 'nb' in self.coeff_names and self.no_extra_param:
|
|
1116
|
+
self.pvalues = np.append(self.pvalues,0)
|
|
1004
1117
|
|
|
1005
1118
|
if self.please_print or save_state:
|
|
1006
1119
|
|
|
@@ -1016,17 +1129,22 @@ class ObjectiveFunction(object):
|
|
|
1016
1129
|
|
|
1017
1130
|
if solution is not None:
|
|
1018
1131
|
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1019
|
-
|
|
1132
|
+
|
|
1020
1133
|
self.pvalues = [self.round_with_padding(
|
|
1021
1134
|
x, 2) for x in self.pvalues]
|
|
1022
1135
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
|
1023
1136
|
if model == 1:
|
|
1024
1137
|
|
|
1025
|
-
self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1026
|
-
if self.
|
|
1138
|
+
#self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1139
|
+
if self.no_extra_param:
|
|
1140
|
+
self.coeff_ = np.append(self.coeff_, self.nb_parma)
|
|
1141
|
+
self.stderr = np.append(self.stderr, 0.00001)
|
|
1142
|
+
self.zvalues = np.append(self.zvalues, 50)
|
|
1143
|
+
|
|
1144
|
+
elif self.coeff_[-1] < 0.25:
|
|
1027
1145
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1028
1146
|
print(np.exp(self.coeff_[-1]))
|
|
1029
|
-
self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1147
|
+
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1030
1148
|
|
|
1031
1149
|
self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
|
|
1032
1150
|
|
|
@@ -1238,7 +1356,7 @@ class ObjectiveFunction(object):
|
|
|
1238
1356
|
with open(filename, 'w') as file:
|
|
1239
1357
|
file.write(content)
|
|
1240
1358
|
|
|
1241
|
-
def define_poissible_transforms(self, transforms) -> list:
|
|
1359
|
+
def define_poissible_transforms(self, transforms, extra= None) -> list:
|
|
1242
1360
|
transform_set = []
|
|
1243
1361
|
if not isinstance(self._x_data, pd.DataFrame):
|
|
1244
1362
|
x_data = self._x_data.reshape(self.N * self.P, -1).copy()
|
|
@@ -1249,6 +1367,7 @@ class ObjectiveFunction(object):
|
|
|
1249
1367
|
|
|
1250
1368
|
if 'AADT' in self._characteristics_names[col]:
|
|
1251
1369
|
new_transform = [['log']]
|
|
1370
|
+
#new_transform = [['no']]
|
|
1252
1371
|
transform_set = transform_set + new_transform
|
|
1253
1372
|
|
|
1254
1373
|
elif all(x_data[col] <= 5):
|
|
@@ -1288,6 +1407,18 @@ class ObjectiveFunction(object):
|
|
|
1288
1407
|
|
|
1289
1408
|
return transform_set
|
|
1290
1409
|
|
|
1410
|
+
def poisson_mean_get_dispersion(self, betas, X, y):
|
|
1411
|
+
eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
|
|
1412
|
+
return_EV=True,
|
|
1413
|
+
zi_list=None, draws_grouped=None, Xgroup=None)
|
|
1414
|
+
|
|
1415
|
+
ab = ((y - eVy)**2 - eVy)/eVy
|
|
1416
|
+
bb = eVy -1
|
|
1417
|
+
disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
|
|
1418
|
+
gamma = disp.params[0]
|
|
1419
|
+
#print(f'dispersion is {gamma}')
|
|
1420
|
+
return gamma
|
|
1421
|
+
|
|
1291
1422
|
def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
|
|
1292
1423
|
model_nature=None, halton=1, testing=1, validation=0):
|
|
1293
1424
|
'validation if mu needs to be calculated'
|
|
@@ -1321,7 +1452,7 @@ class ObjectiveFunction(object):
|
|
|
1321
1452
|
XG = model_nature.get('XGtest')[:total_percent, :, :]
|
|
1322
1453
|
else:
|
|
1323
1454
|
XG = model_nature.get('XGtest')[total_percent:, :, :]
|
|
1324
|
-
|
|
1455
|
+
|
|
1325
1456
|
else:
|
|
1326
1457
|
if 'XG' in model_nature:
|
|
1327
1458
|
XG = model_nature.get('XG')
|
|
@@ -1443,7 +1574,7 @@ class ObjectiveFunction(object):
|
|
|
1443
1574
|
5: herogeneity_in _means
|
|
1444
1575
|
|
|
1445
1576
|
|
|
1446
|
-
a: how to
|
|
1577
|
+
a: how to transform the original data
|
|
1447
1578
|
b: grab dispersion '''
|
|
1448
1579
|
|
|
1449
1580
|
# todo: better way
|
|
@@ -1791,7 +1922,10 @@ class ObjectiveFunction(object):
|
|
|
1791
1922
|
elif dispersion == 4:
|
|
1792
1923
|
return 2
|
|
1793
1924
|
else:
|
|
1794
|
-
|
|
1925
|
+
if self.no_extra_param:
|
|
1926
|
+
return 0
|
|
1927
|
+
else:
|
|
1928
|
+
return 1
|
|
1795
1929
|
|
|
1796
1930
|
def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
|
|
1797
1931
|
return_violated_terms=0):
|
|
@@ -1806,6 +1940,7 @@ class ObjectiveFunction(object):
|
|
|
1806
1940
|
|
|
1807
1941
|
else:
|
|
1808
1942
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1943
|
+
slice_this_amount = 1 #TODO handle this
|
|
1809
1944
|
if pvalues[-1] > sig_value:
|
|
1810
1945
|
vio_counts += 1
|
|
1811
1946
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -2230,7 +2365,7 @@ class ObjectiveFunction(object):
|
|
|
2230
2365
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
|
2231
2366
|
|
|
2232
2367
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
|
2233
|
-
obj_1 = 10.0 **
|
|
2368
|
+
obj_1 = 10.0 ** 4
|
|
2234
2369
|
obj_best = None
|
|
2235
2370
|
sub_slns = list()
|
|
2236
2371
|
|
|
@@ -2238,12 +2373,14 @@ class ObjectiveFunction(object):
|
|
|
2238
2373
|
vector) # just added to grab the fixed fit TODO: Clean up
|
|
2239
2374
|
dispersion = model_nature.get('dispersion')
|
|
2240
2375
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
2376
|
+
print('before', vector)
|
|
2241
2377
|
try:
|
|
2242
2378
|
self.repair(vector)
|
|
2243
2379
|
except Exception as e:
|
|
2244
|
-
print('
|
|
2380
|
+
print('problem repairing here')
|
|
2245
2381
|
print(vector)
|
|
2246
2382
|
print(e)
|
|
2383
|
+
print('after', vector)
|
|
2247
2384
|
layout = vector.copy()
|
|
2248
2385
|
trial_run = 0
|
|
2249
2386
|
max_trial = 0
|
|
@@ -2322,10 +2459,10 @@ class ObjectiveFunction(object):
|
|
|
2322
2459
|
|
|
2323
2460
|
|
|
2324
2461
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
|
2325
|
-
obj_1[self._obj_1] = 10 **
|
|
2462
|
+
obj_1[self._obj_1] = 10 ** 5
|
|
2326
2463
|
else:
|
|
2327
2464
|
if obj_1[self._obj_1] <= 0:
|
|
2328
|
-
obj_1[self._obj_1] = 10 **
|
|
2465
|
+
obj_1[self._obj_1] = 10 ** 5
|
|
2329
2466
|
|
|
2330
2467
|
if multi:
|
|
2331
2468
|
|
|
@@ -2356,10 +2493,10 @@ class ObjectiveFunction(object):
|
|
|
2356
2493
|
|
|
2357
2494
|
self.reset_sln()
|
|
2358
2495
|
if not self.is_quanitifiable_num(obj_1[self._obj_1]):
|
|
2359
|
-
obj_1[self._obj_1] = 10 **
|
|
2496
|
+
obj_1[self._obj_1] = 10 ** 5
|
|
2360
2497
|
else:
|
|
2361
2498
|
if obj_1[self._obj_1] == 0:
|
|
2362
|
-
obj_1[self._obj_1] = 10 **
|
|
2499
|
+
obj_1[self._obj_1] = 10 **5
|
|
2363
2500
|
if verbose:
|
|
2364
2501
|
print('The best solution iteratively is of objective value:', obj_1)
|
|
2365
2502
|
|
|
@@ -2488,7 +2625,7 @@ class ObjectiveFunction(object):
|
|
|
2488
2625
|
random.seed(seed)
|
|
2489
2626
|
|
|
2490
2627
|
def set_random_seed(self):
|
|
2491
|
-
print('
|
|
2628
|
+
print('Imbedding Seed', self._random_seed)
|
|
2492
2629
|
np.random.seed(self._random_seed)
|
|
2493
2630
|
|
|
2494
2631
|
random.seed(self._random_seed)
|
|
@@ -2522,7 +2659,7 @@ class ObjectiveFunction(object):
|
|
|
2522
2659
|
self._hmcr = (
|
|
2523
2660
|
self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
|
|
2524
2661
|
|
|
2525
|
-
|
|
2662
|
+
|
|
2526
2663
|
|
|
2527
2664
|
def update_par(self, iteration, is_sin=False):
|
|
2528
2665
|
"""
|
|
@@ -2742,10 +2879,6 @@ class ObjectiveFunction(object):
|
|
|
2742
2879
|
'''
|
|
2743
2880
|
#return score
|
|
2744
2881
|
|
|
2745
|
-
|
|
2746
|
-
|
|
2747
|
-
|
|
2748
|
-
|
|
2749
2882
|
try:
|
|
2750
2883
|
if alpha is None:
|
|
2751
2884
|
alpha = np.exp(params[-1])
|
|
@@ -2886,7 +3019,7 @@ class ObjectiveFunction(object):
|
|
|
2886
3019
|
argument = prob.mean(axis=1)
|
|
2887
3020
|
# if less than 0 penalise
|
|
2888
3021
|
if np.min(argument) < 0:
|
|
2889
|
-
print('
|
|
3022
|
+
print('Error with args..')
|
|
2890
3023
|
if np.min(argument) < limit:
|
|
2891
3024
|
# add a penalty for too small argument of log
|
|
2892
3025
|
log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
|
|
@@ -3377,6 +3510,7 @@ class ObjectiveFunction(object):
|
|
|
3377
3510
|
else:
|
|
3378
3511
|
corr_pairs = list(itertools.combinations(corr_indices, 2))
|
|
3379
3512
|
|
|
3513
|
+
|
|
3380
3514
|
for ii, corr_pair in enumerate(corr_pairs):
|
|
3381
3515
|
# lower cholesky matrix
|
|
3382
3516
|
chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
|
|
@@ -3404,7 +3538,7 @@ class ObjectiveFunction(object):
|
|
|
3404
3538
|
a = 0
|
|
3405
3539
|
b = 0
|
|
3406
3540
|
stuff = []
|
|
3407
|
-
#
|
|
3541
|
+
# TODO get order
|
|
3408
3542
|
for j, i in enumerate(list_sizes):
|
|
3409
3543
|
br_mean = betas_hetro[a:i + a]
|
|
3410
3544
|
a += i
|
|
@@ -3431,7 +3565,32 @@ class ObjectiveFunction(object):
|
|
|
3431
3565
|
br_mean = betas_m
|
|
3432
3566
|
br_sd = betas_sd # Last Kr positions
|
|
3433
3567
|
# Compute: betas = mean + sd*draws
|
|
3434
|
-
|
|
3568
|
+
if len(br_sd) != draws.shape[1]:
|
|
3569
|
+
#get the same size as the mean
|
|
3570
|
+
#if hasattr(self.Br):
|
|
3571
|
+
# betas_random = self.Br.copy()
|
|
3572
|
+
#else:
|
|
3573
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
|
3574
|
+
'''
|
|
3575
|
+
c = self.get_num_params()[3:5]
|
|
3576
|
+
|
|
3577
|
+
cor = []
|
|
3578
|
+
for i in range(c[0]):
|
|
3579
|
+
cor.append(i)
|
|
3580
|
+
|
|
3581
|
+
vall =[]
|
|
3582
|
+
for i, val in enumerate(reversed(br_sd)):
|
|
3583
|
+
vall.append()
|
|
3584
|
+
|
|
3585
|
+
remaining = draws.shape[1] - len(betas_sd)
|
|
3586
|
+
'''
|
|
3587
|
+
|
|
3588
|
+
else:
|
|
3589
|
+
|
|
3590
|
+
|
|
3591
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
|
3592
|
+
|
|
3593
|
+
|
|
3435
3594
|
betas_random = self._apply_distribution(betas_random)
|
|
3436
3595
|
|
|
3437
3596
|
return betas_random
|
|
@@ -3450,21 +3609,38 @@ class ObjectiveFunction(object):
|
|
|
3450
3609
|
# if gamma <= 0.01: #min defined value for stable nb
|
|
3451
3610
|
# gamma = 0.01
|
|
3452
3611
|
|
|
3612
|
+
#g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
|
|
3453
3613
|
|
|
3614
|
+
#gg = stats.poisson.rvs(g)
|
|
3454
3615
|
|
|
3616
|
+
|
|
3455
3617
|
|
|
3618
|
+
|
|
3456
3619
|
endog = y
|
|
3457
3620
|
mu = lam
|
|
3621
|
+
''''
|
|
3622
|
+
mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
|
|
3458
3623
|
alpha = np.exp(gamma)
|
|
3459
|
-
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3463
|
-
|
|
3624
|
+
|
|
3625
|
+
'''
|
|
3626
|
+
alpha = gamma
|
|
3627
|
+
size = 1.0 / alpha * mu ** Q
|
|
3628
|
+
|
|
3629
|
+
prob = size/(size+mu)
|
|
3630
|
+
|
|
3631
|
+
|
|
3464
3632
|
|
|
3465
3633
|
'''test'''
|
|
3466
3634
|
|
|
3467
3635
|
|
|
3636
|
+
'''
|
|
3637
|
+
size = 1 / np.exp(gamma) * mu ** 0
|
|
3638
|
+
prob = size / (size + mu)
|
|
3639
|
+
coeff = (gammaln(size + y) - gammaln(y + 1) -
|
|
3640
|
+
gammaln(size))
|
|
3641
|
+
llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
|
|
3642
|
+
'''
|
|
3643
|
+
|
|
3468
3644
|
try:
|
|
3469
3645
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
|
3470
3646
|
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
|
@@ -3476,22 +3652,28 @@ class ObjectiveFunction(object):
|
|
|
3476
3652
|
#start_time = time.time()
|
|
3477
3653
|
#for _ in range(10000):
|
|
3478
3654
|
|
|
3479
|
-
|
|
3655
|
+
|
|
3480
3656
|
#end_time = time.time()
|
|
3481
3657
|
#print("Custom functieon time:", end_time - start_time)
|
|
3482
3658
|
#start_time = time.time()
|
|
3483
3659
|
#for _ in range(10000):
|
|
3660
|
+
'''
|
|
3484
3661
|
gg = np.exp(
|
|
3485
3662
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
|
3486
3663
|
y + alpha) * np.log(mu + alpha))
|
|
3487
3664
|
gg[np.isnan(gg)] = 1
|
|
3665
|
+
'''
|
|
3666
|
+
gg_alt = nbinom.pmf(y ,1/alpha, prob)
|
|
3667
|
+
#gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
|
|
3668
|
+
#gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
|
|
3669
|
+
#print('check theses')
|
|
3488
3670
|
#gg = nbinom.pmf(y ,alpha, prob)
|
|
3489
3671
|
#end_time = time.time()
|
|
3490
3672
|
#print("Custom functieon time:", end_time - start_time)
|
|
3491
3673
|
|
|
3492
3674
|
except Exception as e:
|
|
3493
|
-
print(
|
|
3494
|
-
return
|
|
3675
|
+
print("Neg Binom error.")
|
|
3676
|
+
return gg_alt
|
|
3495
3677
|
|
|
3496
3678
|
def lindley_pmf(self, x, r, theta, k=50):
|
|
3497
3679
|
"""
|
|
@@ -3638,8 +3820,8 @@ class ObjectiveFunction(object):
|
|
|
3638
3820
|
|
|
3639
3821
|
if dispersion == 1 or dispersion == 4: # nb
|
|
3640
3822
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
|
3641
|
-
|
|
3642
|
-
|
|
3823
|
+
#b_gam = 1/np.exp(b_gam)
|
|
3824
|
+
#print(b_gam)
|
|
3643
3825
|
if b_gam <= 0:
|
|
3644
3826
|
#penalty += 100
|
|
3645
3827
|
#penalty += abs(b_gam)
|
|
@@ -3647,9 +3829,9 @@ class ObjectiveFunction(object):
|
|
|
3647
3829
|
#b_gam = 1
|
|
3648
3830
|
|
|
3649
3831
|
# if b_gam < 0.03:
|
|
3650
|
-
penalty += min(1, np.abs(b_gam))
|
|
3832
|
+
penalty += min(1, np.abs(b_gam), 0)
|
|
3651
3833
|
|
|
3652
|
-
b_gam = 0.001
|
|
3834
|
+
#b_gam = 0.001
|
|
3653
3835
|
#
|
|
3654
3836
|
|
|
3655
3837
|
#if b_gam >= 10:
|
|
@@ -3681,8 +3863,15 @@ class ObjectiveFunction(object):
|
|
|
3681
3863
|
def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
|
|
3682
3864
|
|
|
3683
3865
|
# print('this was 0')
|
|
3684
|
-
|
|
3866
|
+
if dispersion:
|
|
3867
|
+
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3868
|
+
|
|
3869
|
+
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3870
|
+
#print('check if this holds size')
|
|
3871
|
+
else:
|
|
3872
|
+
eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3685
3873
|
eta = np.array(eta)
|
|
3874
|
+
|
|
3686
3875
|
# eta = np.float64(eta)
|
|
3687
3876
|
# eta = np.dot(Xd, params_main)+offset[:,:,0]
|
|
3688
3877
|
# eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
|
|
@@ -3701,7 +3890,7 @@ class ObjectiveFunction(object):
|
|
|
3701
3890
|
|
|
3702
3891
|
else:
|
|
3703
3892
|
# eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
|
|
3704
|
-
|
|
3893
|
+
eta = eta.astype('float')
|
|
3705
3894
|
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
|
3706
3895
|
return eVd
|
|
3707
3896
|
|
|
@@ -3817,7 +4006,7 @@ class ObjectiveFunction(object):
|
|
|
3817
4006
|
|
|
3818
4007
|
|
|
3819
4008
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
|
3820
|
-
|
|
4009
|
+
|
|
3821
4010
|
|
|
3822
4011
|
elif dispersion == 2:
|
|
3823
4012
|
|
|
@@ -3838,7 +4027,7 @@ class ObjectiveFunction(object):
|
|
|
3838
4027
|
# proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
|
|
3839
4028
|
proba_r = np.array(store)
|
|
3840
4029
|
proba_r = np.atleast_2d(proba_r).T
|
|
3841
|
-
|
|
4030
|
+
|
|
3842
4031
|
|
|
3843
4032
|
else:
|
|
3844
4033
|
raise Exception('not implemented other modeling forms')
|
|
@@ -3855,6 +4044,8 @@ class ObjectiveFunction(object):
|
|
|
3855
4044
|
if dispersion == 0 or dispersion == 3:
|
|
3856
4045
|
return 0
|
|
3857
4046
|
else:
|
|
4047
|
+
|
|
4048
|
+
|
|
3858
4049
|
return 1
|
|
3859
4050
|
|
|
3860
4051
|
def _prob_product_across_panels(self, pch, panel_info):
|
|
@@ -3910,7 +4101,7 @@ class ObjectiveFunction(object):
|
|
|
3910
4101
|
if y[i] == 0:
|
|
3911
4102
|
gr_e[i] = 0
|
|
3912
4103
|
|
|
3913
|
-
if self.is_dispersion(dispersion):
|
|
4104
|
+
if self.is_dispersion(dispersion) and not self.no_extra_param:
|
|
3914
4105
|
gr_d = np.zeros((N, 1))
|
|
3915
4106
|
if dispersion == 1:
|
|
3916
4107
|
# trying alt
|
|
@@ -4014,12 +4205,13 @@ class ObjectiveFunction(object):
|
|
|
4014
4205
|
br, draws_, brstd, dis_fit_long) # (N,K,R)
|
|
4015
4206
|
dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
|
|
4016
4207
|
einsum_model_form, dtype=np.float64) # (N,K,R)
|
|
4017
|
-
der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4018
|
-
der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4019
|
-
|
|
4020
|
-
|
|
4208
|
+
#der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4209
|
+
#der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4210
|
+
|
|
4211
|
+
der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
|
|
4212
|
+
|
|
4021
4213
|
der_t = self._compute_derivatives(
|
|
4022
|
-
br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
|
|
4214
|
+
br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
|
|
4023
4215
|
# er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
|
|
4024
4216
|
der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
|
|
4025
4217
|
der_t * proba_n[:, None, :] # (N,K,R)
|
|
@@ -4080,14 +4272,18 @@ class ObjectiveFunction(object):
|
|
|
4080
4272
|
grad_n = self._concat_gradients(
|
|
4081
4273
|
(gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
|
|
4082
4274
|
else:
|
|
4083
|
-
|
|
4084
|
-
|
|
4085
|
-
|
|
4086
|
-
|
|
4275
|
+
if self.no_extra_param:
|
|
4276
|
+
grad_n = self._concat_gradients(
|
|
4277
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
|
|
4278
|
+
else:
|
|
4279
|
+
grad_n = self._concat_gradients(
|
|
4280
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
|
|
4281
|
+
grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
|
|
4282
|
+
grad_n = np.clip(grad_n, -100, 100)
|
|
4087
4283
|
n = np.shape(grad_n)[0]
|
|
4088
4284
|
# subtract out mean gradient value
|
|
4089
|
-
|
|
4090
|
-
|
|
4285
|
+
grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
|
|
4286
|
+
grad_n = grad_n_sub
|
|
4091
4287
|
grad = grad_n.sum(axis=0)
|
|
4092
4288
|
return grad, grad_n
|
|
4093
4289
|
|
|
@@ -4238,7 +4434,7 @@ class ObjectiveFunction(object):
|
|
|
4238
4434
|
return proba_r.sum(axis=1), np.squeeze(proba_r)
|
|
4239
4435
|
|
|
4240
4436
|
def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
|
|
4241
|
-
penalty_val = 0.
|
|
4437
|
+
penalty_val = 0.1
|
|
4242
4438
|
penalty_val_max = 130
|
|
4243
4439
|
|
|
4244
4440
|
# print('change_later')
|
|
@@ -4254,8 +4450,8 @@ class ObjectiveFunction(object):
|
|
|
4254
4450
|
if abs(i) > penalty_val_max:
|
|
4255
4451
|
penalty += abs(i)
|
|
4256
4452
|
|
|
4257
|
-
#
|
|
4258
|
-
#
|
|
4453
|
+
#if abs(i) < penalty_val:
|
|
4454
|
+
# penalty += 5
|
|
4259
4455
|
|
|
4260
4456
|
# penalty = 0
|
|
4261
4457
|
return penalty
|
|
@@ -4362,8 +4558,7 @@ class ObjectiveFunction(object):
|
|
|
4362
4558
|
index += 1
|
|
4363
4559
|
|
|
4364
4560
|
brstd = br_std
|
|
4365
|
-
|
|
4366
|
-
print(brstd)
|
|
4561
|
+
|
|
4367
4562
|
|
|
4368
4563
|
|
|
4369
4564
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
@@ -4395,7 +4590,7 @@ class ObjectiveFunction(object):
|
|
|
4395
4590
|
penalty = self._penalty_betas(
|
|
4396
4591
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
|
4397
4592
|
self.n_obs = len(y) # feeds into gradient
|
|
4398
|
-
if draws is None and draws_grouped is None and (
|
|
4593
|
+
if draws is None and draws_grouped is None and (model_nature is None or
|
|
4399
4594
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
|
4400
4595
|
#TODO do i shuffle the draws
|
|
4401
4596
|
if type(Xd) == dict:
|
|
@@ -4448,7 +4643,7 @@ class ObjectiveFunction(object):
|
|
|
4448
4643
|
penalty = self.regularise_l2(betas)
|
|
4449
4644
|
|
|
4450
4645
|
if not np.isreal(loglik):
|
|
4451
|
-
loglik = -
|
|
4646
|
+
loglik = - 10000000.0
|
|
4452
4647
|
|
|
4453
4648
|
output = (-loglik + penalty,)
|
|
4454
4649
|
if return_gradient:
|
|
@@ -4542,7 +4737,9 @@ class ObjectiveFunction(object):
|
|
|
4542
4737
|
Kf = 0
|
|
4543
4738
|
else:
|
|
4544
4739
|
if n_coeff != len(betas):
|
|
4545
|
-
raise Exception
|
|
4740
|
+
raise Exception(
|
|
4741
|
+
|
|
4742
|
+
)
|
|
4546
4743
|
Bf = betas[0:Kf] # Fixed betas
|
|
4547
4744
|
|
|
4548
4745
|
|
|
@@ -4644,7 +4841,8 @@ class ObjectiveFunction(object):
|
|
|
4644
4841
|
eVd = self.lam_transform(eVd, dispersion, betas[-1])
|
|
4645
4842
|
|
|
4646
4843
|
if self.is_dispersion(dispersion):
|
|
4647
|
-
|
|
4844
|
+
if not self.no_extra_param:
|
|
4845
|
+
penalty, betas[-1] = self._penalty_dispersion(
|
|
4648
4846
|
dispersion, betas[-1], eVd, y, penalty, model_nature)
|
|
4649
4847
|
|
|
4650
4848
|
'''
|
|
@@ -4688,7 +4886,7 @@ class ObjectiveFunction(object):
|
|
|
4688
4886
|
proba.append(dev.to_cpu(proba_))
|
|
4689
4887
|
|
|
4690
4888
|
lik = np.stack(proba).sum(axis=0) / R # (N, )
|
|
4691
|
-
lik = np.clip(lik, min_comp_val,
|
|
4889
|
+
lik = np.clip(lik, min_comp_val, max_comp_val)
|
|
4692
4890
|
# lik = np.nan_to_num(lik, )
|
|
4693
4891
|
loglik = np.log(lik)
|
|
4694
4892
|
llf_main = loglik
|
|
@@ -5018,7 +5216,7 @@ class ObjectiveFunction(object):
|
|
|
5018
5216
|
H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
|
|
5019
5217
|
result['Hessian'] = H
|
|
5020
5218
|
result['hess_inv'] = np.linalg.pinv(H)
|
|
5021
|
-
|
|
5219
|
+
|
|
5022
5220
|
standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
|
|
5023
5221
|
return result
|
|
5024
5222
|
# return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
|
|
@@ -5289,7 +5487,7 @@ class ObjectiveFunction(object):
|
|
|
5289
5487
|
return a
|
|
5290
5488
|
|
|
5291
5489
|
def fitRegression(self, mod,
|
|
5292
|
-
dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
|
|
5490
|
+
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5293
5491
|
|
|
5294
5492
|
"""
|
|
5295
5493
|
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
@@ -5306,7 +5504,7 @@ class ObjectiveFunction(object):
|
|
|
5306
5504
|
|
|
5307
5505
|
|
|
5308
5506
|
sol = Solution()
|
|
5309
|
-
|
|
5507
|
+
|
|
5310
5508
|
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
5311
5509
|
is_delete = 0
|
|
5312
5510
|
dispersion = mod.get('dispersion')
|
|
@@ -5335,6 +5533,8 @@ class ObjectiveFunction(object):
|
|
|
5335
5533
|
_g, pg, kg = 0, 0, 0
|
|
5336
5534
|
|
|
5337
5535
|
dispersion_param_num = self.is_dispersion(dispersion)
|
|
5536
|
+
if self.no_extra_param:
|
|
5537
|
+
dispersion_param_num =0
|
|
5338
5538
|
|
|
5339
5539
|
#paramNum = self.get_param_num(dispersion)
|
|
5340
5540
|
self.no_random_paramaters = 0
|
|
@@ -5389,17 +5589,26 @@ class ObjectiveFunction(object):
|
|
|
5389
5589
|
else:
|
|
5390
5590
|
bb[0] = self.constant_value
|
|
5391
5591
|
if dispersion == 1:
|
|
5392
|
-
|
|
5592
|
+
if not self.no_extra_param:
|
|
5593
|
+
bb[-1] = self.negative_binomial_value
|
|
5393
5594
|
bounds = None
|
|
5394
5595
|
|
|
5596
|
+
|
|
5597
|
+
|
|
5395
5598
|
# intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
|
|
5396
5599
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
|
5397
|
-
|
|
5600
|
+
|
|
5601
|
+
if self.no_extra_param:
|
|
5602
|
+
dispersion_poisson = 0
|
|
5603
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5398
5604
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
|
5399
|
-
|
|
5605
|
+
dispersion_poisson, 0, False, 0, None, None, None, None, None,
|
|
5400
5606
|
mod),
|
|
5401
5607
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
|
5402
5608
|
bounds=bounds)
|
|
5609
|
+
if dispersion:
|
|
5610
|
+
nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
|
|
5611
|
+
|
|
5403
5612
|
|
|
5404
5613
|
|
|
5405
5614
|
|
|
@@ -5499,7 +5708,7 @@ class ObjectiveFunction(object):
|
|
|
5499
5708
|
|
|
5500
5709
|
b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
|
|
5501
5710
|
self.none_handler(self.rdm_fit)) + len(
|
|
5502
|
-
self.none_handler(self.rdm_cor_fit)) else b[i] / 1
|
|
5711
|
+
self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
|
|
5503
5712
|
else:
|
|
5504
5713
|
b = bb
|
|
5505
5714
|
|
|
@@ -5509,9 +5718,10 @@ class ObjectiveFunction(object):
|
|
|
5509
5718
|
else:
|
|
5510
5719
|
b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
|
|
5511
5720
|
if dispersion == 1:
|
|
5512
|
-
|
|
5513
|
-
|
|
5514
|
-
b[-1]
|
|
5721
|
+
if not self.no_extra_param:
|
|
5722
|
+
b[-1] = np.abs(b[-1])
|
|
5723
|
+
if b[-1] > 10:
|
|
5724
|
+
b[-1] = 5
|
|
5515
5725
|
elif dispersion == 2:
|
|
5516
5726
|
b[-1] = .5
|
|
5517
5727
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
|
@@ -5637,13 +5847,35 @@ class ObjectiveFunction(object):
|
|
|
5637
5847
|
|
|
5638
5848
|
if draws is None and draws_hetro is not None:
|
|
5639
5849
|
print('hold')
|
|
5640
|
-
|
|
5641
|
-
|
|
5642
|
-
|
|
5643
|
-
|
|
5644
|
-
|
|
5645
|
-
|
|
5646
|
-
|
|
5850
|
+
#self.grad_yes = True
|
|
5851
|
+
#self.hess_yes = True
|
|
5852
|
+
|
|
5853
|
+
if self.no_extra_param:
|
|
5854
|
+
dispersion_poisson = 0
|
|
5855
|
+
print('b :', len(b))
|
|
5856
|
+
print(self.get_param_num())
|
|
5857
|
+
baby = self.get_param_num()
|
|
5858
|
+
if len(b) != baby:
|
|
5859
|
+
print('modify')
|
|
5860
|
+
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
5861
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
|
|
5862
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5863
|
+
method=method2, tol=tol['ftol'],
|
|
5864
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5865
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5866
|
+
if dispersion:
|
|
5867
|
+
initial_fit_beta = betas_est.x
|
|
5868
|
+
parmas = np.append(initial_fit_beta, nb_parma)
|
|
5869
|
+
self.nb_parma = nb_parma
|
|
5870
|
+
#print(f'neg binomi,{self.nb_parma}')
|
|
5871
|
+
betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
|
|
5872
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5873
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5874
|
+
method=method2, tol=tol['ftol'],
|
|
5875
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5876
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5877
|
+
|
|
5878
|
+
#print('refit with estimation of NB')
|
|
5647
5879
|
# self.numerical_hessian_calc = True
|
|
5648
5880
|
if self.numerical_hessian_calc:
|
|
5649
5881
|
try:
|
|
@@ -5942,7 +6174,7 @@ class ObjectiveFunction(object):
|
|
|
5942
6174
|
return delim + self._model_type_codes[dispersion]
|
|
5943
6175
|
|
|
5944
6176
|
def self_standardize_positive(self, X):
|
|
5945
|
-
scaler =
|
|
6177
|
+
scaler = MinMaxScaler()
|
|
5946
6178
|
if type(X) == list:
|
|
5947
6179
|
return X
|
|
5948
6180
|
|
|
@@ -5952,12 +6184,26 @@ class ObjectiveFunction(object):
|
|
|
5952
6184
|
# Reshaping to 2D - combining the last two dimensions
|
|
5953
6185
|
df_tf_reshaped = X.reshape(original_shape[0], -1)
|
|
5954
6186
|
df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
|
|
5955
|
-
df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6187
|
+
#df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
5956
6188
|
# Reshape back to original 3D shape if necessary
|
|
5957
6189
|
df_tf = df_tf_scaled.reshape(original_shape)
|
|
5958
6190
|
return df_tf
|
|
5959
6191
|
else:
|
|
5960
|
-
|
|
6192
|
+
# Initialize the MinMaxScaler
|
|
6193
|
+
scaler = MinMaxScaler()
|
|
6194
|
+
float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
|
|
6195
|
+
non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
|
|
6196
|
+
|
|
6197
|
+
# Fit the scaler to the float columns and transform them
|
|
6198
|
+
X[float_columns] = scaler.fit_transform(X[float_columns])
|
|
6199
|
+
# Fit the scaler to the data and transform it
|
|
6200
|
+
#scaled_data = scaler.fit_transform(X)
|
|
6201
|
+
|
|
6202
|
+
# Convert the result back to a DataFrame
|
|
6203
|
+
#scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
|
|
6204
|
+
|
|
6205
|
+
|
|
6206
|
+
return X
|
|
5961
6207
|
|
|
5962
6208
|
def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
|
|
5963
6209
|
*args, **kwargs):
|
|
@@ -6012,8 +6258,9 @@ class ObjectiveFunction(object):
|
|
|
6012
6258
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
|
6013
6259
|
t, idx, df_test[:, :, idx])
|
|
6014
6260
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
|
6261
|
+
#TODO need to normalise the data
|
|
6015
6262
|
|
|
6016
|
-
|
|
6263
|
+
print('should not be possible')
|
|
6017
6264
|
|
|
6018
6265
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
6019
6266
|
indices = self.get_named_indices(self.fixed_fit)
|
|
@@ -6070,7 +6317,7 @@ class ObjectiveFunction(object):
|
|
|
6070
6317
|
model_nature['XH'] = XH
|
|
6071
6318
|
X_test = None
|
|
6072
6319
|
if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
|
|
6073
|
-
raise Exception('there is some kind of error')
|
|
6320
|
+
raise Exception('there is some kind of error in X')
|
|
6074
6321
|
|
|
6075
6322
|
# numpy data setup fpr estimation
|
|
6076
6323
|
indices2 = self.get_named_indices(self.rdm_fit)
|
|
@@ -6170,7 +6417,7 @@ class ObjectiveFunction(object):
|
|
|
6170
6417
|
self.log_lik = log_lik
|
|
6171
6418
|
if self.significant == 0:
|
|
6172
6419
|
|
|
6173
|
-
|
|
6420
|
+
|
|
6174
6421
|
if not self.test_flag:
|
|
6175
6422
|
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
6176
6423
|
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
|
@@ -6221,6 +6468,53 @@ class ObjectiveFunction(object):
|
|
|
6221
6468
|
|
|
6222
6469
|
return obj_1, model_nature
|
|
6223
6470
|
|
|
6471
|
+
def get_X_tril(self):
|
|
6472
|
+
'''For correlations find the repeating terms'''
|
|
6473
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
|
6474
|
+
rv_count_all = 0
|
|
6475
|
+
chol_count = 0
|
|
6476
|
+
rv_count = 0
|
|
6477
|
+
corr_indices = []
|
|
6478
|
+
rv_indices = []
|
|
6479
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
|
6480
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6481
|
+
is_correlated = True
|
|
6482
|
+
else:
|
|
6483
|
+
is_correlated = False
|
|
6484
|
+
|
|
6485
|
+
rv_count_all += 1
|
|
6486
|
+
if is_correlated:
|
|
6487
|
+
chol_count += 1
|
|
6488
|
+
else:
|
|
6489
|
+
rv_count += 1
|
|
6490
|
+
|
|
6491
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6492
|
+
|
|
6493
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
|
6494
|
+
|
|
6495
|
+
else:
|
|
6496
|
+
rv_indices.append(rv_count_all - 1)
|
|
6497
|
+
|
|
6498
|
+
# for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
|
|
6499
|
+
draws_tril_idx = np.array([corr_indices[j]
|
|
6500
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6501
|
+
for j in range(i + 1)]) # varnames pos.
|
|
6502
|
+
X_tril_idx = np.array([corr_indices[i]
|
|
6503
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6504
|
+
for j in range(i + 1)])
|
|
6505
|
+
# Find the s.d. for random variables that are not correlated
|
|
6506
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
|
6507
|
+
range_var = [x for x in
|
|
6508
|
+
range(len(self.none_handler(var_uncor)))]
|
|
6509
|
+
range_var = sorted(range_var)
|
|
6510
|
+
draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
|
|
6511
|
+
X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
|
|
6512
|
+
draws_tril_idx = draws_tril_idx.astype(int)
|
|
6513
|
+
X_tril_idx = X_tril_idx.astype(int)
|
|
6514
|
+
return X_tril_idx
|
|
6515
|
+
|
|
6516
|
+
|
|
6517
|
+
|
|
6224
6518
|
def modifyn(self, data):
|
|
6225
6519
|
select_data = self._characteristics_names
|
|
6226
6520
|
alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
|
|
@@ -6428,23 +6722,35 @@ class ObjectiveFunction(object):
|
|
|
6428
6722
|
# N, D = draws.shape[0], draws.shape[1]
|
|
6429
6723
|
N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
|
|
6430
6724
|
der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
|
|
6431
|
-
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
|
6432
|
-
Br_come_one = self.Br.copy()
|
|
6433
|
-
# Br_come_one =
|
|
6434
|
-
else:
|
|
6435
6725
|
|
|
6436
|
-
Br_come_one = self.Br.copy()
|
|
6437
6726
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
|
6438
6727
|
#todo make sure this works for ln and truncated normal
|
|
6439
6728
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
|
6440
|
-
|
|
6729
|
+
|
|
6730
|
+
#print('check this, intesection shouldn not happen for all')
|
|
6731
|
+
|
|
6732
|
+
if der.shape[1] != draws.shape[1]:
|
|
6733
|
+
print('why')
|
|
6441
6734
|
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
|
6735
|
+
if der.shape[1] != draws.shape[1]:
|
|
6736
|
+
print('why')
|
|
6737
|
+
#TODO need to get the stuction of the rdms
|
|
6442
6738
|
for k, dist_k in enumerate(distribution):
|
|
6443
6739
|
if dist_k == 'ln_normal':
|
|
6740
|
+
if der.shape[1] != draws.shape[1]:
|
|
6741
|
+
print('why')
|
|
6444
6742
|
der[:, k, :] = Br_come_one[:, k, :]
|
|
6743
|
+
if der.shape[1] != draws.shape[1]:
|
|
6744
|
+
print('why')
|
|
6445
6745
|
elif dist_k == 'tn_normal':
|
|
6746
|
+
if der.shape[1] != draws.shape[1]:
|
|
6747
|
+
print('why')
|
|
6446
6748
|
der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
|
|
6749
|
+
if der.shape[1] != draws.shape[1]:
|
|
6750
|
+
print('why')
|
|
6447
6751
|
|
|
6752
|
+
if der.shape[1] != draws.shape[1]:
|
|
6753
|
+
print('why')
|
|
6448
6754
|
return der
|
|
6449
6755
|
|
|
6450
6756
|
def _copy_size_display_as_ones(self, matrix):
|