metacountregressor 0.1.88__py3-none-any.whl → 0.1.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metacountregressor/app_main.py +258 -0
- metacountregressor/helperprocess.py +257 -5
- metacountregressor/main.py +256 -55
- metacountregressor/metaheuristics.py +22 -11
- metacountregressor/setup.py +3 -2
- metacountregressor/solution.py +472 -157
- {metacountregressor-0.1.88.dist-info → metacountregressor-0.1.89.dist-info}/METADATA +179 -16
- {metacountregressor-0.1.88.dist-info → metacountregressor-0.1.89.dist-info}/RECORD +11 -10
- {metacountregressor-0.1.88.dist-info → metacountregressor-0.1.89.dist-info}/WHEEL +1 -1
- {metacountregressor-0.1.88.dist-info → metacountregressor-0.1.89.dist-info}/LICENSE.txt +0 -0
- {metacountregressor-0.1.88.dist-info → metacountregressor-0.1.89.dist-info}/top_level.txt +0 -0
metacountregressor/solution.py
CHANGED
|
@@ -30,19 +30,19 @@ from scipy.special import gammaln
|
|
|
30
30
|
from sklearn.metrics import mean_absolute_error as MAE
|
|
31
31
|
from sklearn.metrics import mean_squared_error as MSPE
|
|
32
32
|
from statsmodels.tools.numdiff import approx_fprime, approx_hess
|
|
33
|
-
from sklearn.preprocessing import StandardScaler
|
|
33
|
+
from sklearn.preprocessing import StandardScaler, MinMaxScaler
|
|
34
34
|
from texttable import Texttable
|
|
35
|
-
|
|
35
|
+
import time
|
|
36
36
|
try:
|
|
37
37
|
from ._device_cust import device as dev
|
|
38
38
|
from .pareto_file import Pareto, Solution
|
|
39
39
|
from .data_split_helper import DataProcessor
|
|
40
40
|
except ImportError:
|
|
41
|
-
from
|
|
42
|
-
from
|
|
41
|
+
from _device_cust import device as dev
|
|
42
|
+
from pareto_file import Pareto, Solution
|
|
43
43
|
from data_split_helper import DataProcessor
|
|
44
44
|
|
|
45
|
-
|
|
45
|
+
from scipy import stats
|
|
46
46
|
np.seterr(divide='ignore', invalid='ignore')
|
|
47
47
|
warnings.simplefilter("ignore")
|
|
48
48
|
|
|
@@ -122,11 +122,13 @@ class ObjectiveFunction(object):
|
|
|
122
122
|
|
|
123
123
|
def __init__(self, x_data, y_data, **kwargs):
|
|
124
124
|
|
|
125
|
-
self.reg_penalty =
|
|
125
|
+
self.reg_penalty = 0
|
|
126
126
|
self.power_up_ll = False
|
|
127
|
+
self.nb_parma = 1
|
|
127
128
|
self.bic = None
|
|
128
129
|
self.other_bic = False
|
|
129
130
|
self.test_flag = 1
|
|
131
|
+
self.no_extra_param =1 #if true, fix dispersion. w
|
|
130
132
|
if self.other_bic:
|
|
131
133
|
print('change this to false latter ')
|
|
132
134
|
|
|
@@ -134,10 +136,10 @@ class ObjectiveFunction(object):
|
|
|
134
136
|
self.constant_value = 0
|
|
135
137
|
self.negative_binomial_value = 1
|
|
136
138
|
|
|
137
|
-
self.verbose_safe =
|
|
139
|
+
self.verbose_safe = kwargs.get('verbose', 0)
|
|
138
140
|
self.please_print = kwargs.get('please_print', 0)
|
|
139
141
|
self.group_halton = None
|
|
140
|
-
self.grad_yes = False
|
|
142
|
+
self.grad_yes = kwargs.get('grad_est', False)
|
|
141
143
|
self.hess_yes = False
|
|
142
144
|
self.group_halton_test = None
|
|
143
145
|
self.panels = None
|
|
@@ -150,15 +152,15 @@ class ObjectiveFunction(object):
|
|
|
150
152
|
self.dist_fit = None
|
|
151
153
|
|
|
152
154
|
self.MAE = None
|
|
153
|
-
self.best_obj_1 =
|
|
154
|
-
self._obj_1 = 'bic'
|
|
155
|
-
self._obj_2 = 'MSE'
|
|
155
|
+
self.best_obj_1 = 1000000.0
|
|
156
|
+
self._obj_1 = kwargs.get('_obj_1', 'bic')
|
|
157
|
+
self._obj_2 = kwargs.get('_obj_2', 'MSE')
|
|
156
158
|
self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
|
|
157
159
|
self.full_model = None
|
|
158
160
|
self.GP_parameter = 0
|
|
159
|
-
self.is_multi =
|
|
161
|
+
self.is_multi = kwargs.get('is_multi', False)
|
|
160
162
|
self.complexity_level = 6
|
|
161
|
-
self._max_iterations_improvement =
|
|
163
|
+
self._max_iterations_improvement = 10000
|
|
162
164
|
self.generated_sln = set()
|
|
163
165
|
self.ave_mae = 0
|
|
164
166
|
# defalt paramaters for hs #TODO unpack into harmony search class
|
|
@@ -166,23 +168,32 @@ class ObjectiveFunction(object):
|
|
|
166
168
|
self._hms = 20
|
|
167
169
|
self._max_time = 60 * 60 * 24
|
|
168
170
|
self._hmcr = .5
|
|
169
|
-
self._par = 0.3
|
|
171
|
+
self._par = 0.3 #dont think this gets useted
|
|
170
172
|
self._mpai = 1
|
|
171
173
|
self._max_imp = 100000
|
|
172
174
|
self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
|
|
173
175
|
self._panels = None
|
|
174
176
|
self.is_multi = True
|
|
175
177
|
self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
|
+
|
|
176
179
|
self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
|
|
177
|
-
self.method_ll = 'BFGS_2'
|
|
180
|
+
self.method_ll = kwargs.get('method', 'BFGS_2')
|
|
181
|
+
|
|
182
|
+
#self.method_ll = 'Nelder-Mead-BFGS'
|
|
178
183
|
self.Keep_Fit = 2
|
|
179
184
|
self.MP = 0
|
|
180
185
|
# Nelder-Mead-BFGS
|
|
181
186
|
|
|
182
|
-
self._max_characteristics = 26
|
|
187
|
+
self._max_characteristics = kwargs.get('_max_vars', 26)
|
|
183
188
|
|
|
184
189
|
self.beta_dict = dict
|
|
190
|
+
if 'model_terms' in kwargs:
|
|
191
|
+
print('change')
|
|
192
|
+
if kwargs.get('model_terms').get('group') is not None:
|
|
193
|
+
kwargs['group'] = kwargs.get('model_terms').get('group')
|
|
185
194
|
|
|
195
|
+
if kwargs.get('model_terms').get('panels') is not None:
|
|
196
|
+
kwargs['panels'] = kwargs.get('model_terms').get('panels')
|
|
186
197
|
acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
|
|
187
198
|
'algorithm', '_random_seed', '_max_time',
|
|
188
199
|
'forcedvariables', '_obj_1', '_obj_2', '_par',
|
|
@@ -200,12 +211,17 @@ class ObjectiveFunction(object):
|
|
|
200
211
|
if 'instance_number' in kwargs:
|
|
201
212
|
self.instance_number = str(kwargs['instance_number'])
|
|
202
213
|
else:
|
|
214
|
+
|
|
215
|
+
print('no name set, setting name as 0')
|
|
203
216
|
self.instance_number = str(0) # set an arbitrary instance number
|
|
204
217
|
|
|
205
218
|
if not os.path.exists(self.instance_number):
|
|
206
|
-
|
|
219
|
+
if kwargs.get('make_directory', True):
|
|
220
|
+
print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
|
|
221
|
+
os.makedirs(self.instance_number)
|
|
207
222
|
|
|
208
223
|
if not hasattr(self, '_obj_1'):
|
|
224
|
+
print('_obj_1 required, define as bic, aic, ll')
|
|
209
225
|
raise Exception
|
|
210
226
|
|
|
211
227
|
self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
|
|
@@ -213,6 +229,11 @@ class ObjectiveFunction(object):
|
|
|
213
229
|
self._maximize = False # do we maximize or minimize?
|
|
214
230
|
|
|
215
231
|
x_data = sm.add_constant(x_data)
|
|
232
|
+
standardize_the_data = 0
|
|
233
|
+
if standardize_the_data:
|
|
234
|
+
print('we are standardize the data')
|
|
235
|
+
x_data = self.self_standardize_positive(x_data)
|
|
236
|
+
|
|
216
237
|
self._input_data(x_data, y_data)
|
|
217
238
|
|
|
218
239
|
|
|
@@ -229,9 +250,12 @@ class ObjectiveFunction(object):
|
|
|
229
250
|
self.test_percentage = float(kwargs.get('test_percentage', 0))
|
|
230
251
|
self.val_percentage = float(kwargs.get('val_percentage', 0))
|
|
231
252
|
if self.test_percentage == 0:
|
|
253
|
+
print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
|
|
254
|
+
print('continuing single objective')
|
|
255
|
+
time.sleep(2)
|
|
232
256
|
self.is_multi = False
|
|
233
257
|
|
|
234
|
-
if 'panels' in kwargs:
|
|
258
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
235
259
|
self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
|
|
236
260
|
|
|
237
261
|
x_data[kwargs['group']] = x_data[kwargs['group']].astype(
|
|
@@ -274,11 +298,11 @@ class ObjectiveFunction(object):
|
|
|
274
298
|
|
|
275
299
|
#self.n_obs = N
|
|
276
300
|
self._characteristics_names = list(self._x_data.columns)
|
|
277
|
-
self._max_group_all_means =
|
|
301
|
+
self._max_group_all_means = 2
|
|
278
302
|
|
|
279
303
|
exclude_this_test = [4]
|
|
280
304
|
|
|
281
|
-
if 'panels' in kwargs:
|
|
305
|
+
if 'panels' in kwargs and not (kwargs.get('panels') == None):
|
|
282
306
|
self.panels = np.asarray(df_train[kwargs['panels']])
|
|
283
307
|
self.panels_test = np.asarray(df_test[kwargs['panels']])
|
|
284
308
|
self.ids = np.asarray(
|
|
@@ -294,6 +318,8 @@ class ObjectiveFunction(object):
|
|
|
294
318
|
self.group_halton = group.copy()
|
|
295
319
|
self.group_dummies = pd.get_dummies(group)
|
|
296
320
|
Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
|
|
321
|
+
|
|
322
|
+
Xnew = pd.DataFrame(Xnew, columns=X.columns)
|
|
297
323
|
self.panel_info = panel_info
|
|
298
324
|
self.N, self.P = panel_info.shape
|
|
299
325
|
Xnew.drop(kwargs['panels'], axis=1, inplace=True)
|
|
@@ -301,9 +327,11 @@ class ObjectiveFunction(object):
|
|
|
301
327
|
K = Xnew.shape[1]
|
|
302
328
|
self._characteristics_names = list(Xnew.columns)
|
|
303
329
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
330
|
+
XX = XX.astype('float')
|
|
304
331
|
self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
|
|
305
332
|
self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
|
|
306
333
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
334
|
+
YY = YY.astype('float')
|
|
307
335
|
self._x_data = XX.copy()
|
|
308
336
|
self._y_data = YY.copy()
|
|
309
337
|
X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
|
|
@@ -325,6 +353,7 @@ class ObjectiveFunction(object):
|
|
|
325
353
|
K = X.shape[1]
|
|
326
354
|
self.columns_names = X.columns
|
|
327
355
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
356
|
+
X = X.astype('float')
|
|
328
357
|
self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
|
|
329
358
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
330
359
|
Y = Y.astype('float')
|
|
@@ -337,6 +366,7 @@ class ObjectiveFunction(object):
|
|
|
337
366
|
|
|
338
367
|
|
|
339
368
|
else:
|
|
369
|
+
print('No Panels. Grouped Random Paramaters Will not be estimated')
|
|
340
370
|
self.G = None
|
|
341
371
|
self._Gnum = 1
|
|
342
372
|
self._max_group_all_means = 0
|
|
@@ -353,7 +383,9 @@ class ObjectiveFunction(object):
|
|
|
353
383
|
K = Xnew.shape[1]
|
|
354
384
|
self._characteristics_names = list(Xnew.columns)
|
|
355
385
|
XX = Xnew.values.reshape(self.N, self.P, K).copy()
|
|
386
|
+
XX = XX.astype('float')
|
|
356
387
|
YY = Ynew.values.reshape(self.N, self.P, 1).copy()
|
|
388
|
+
YY = YY.astype('float')
|
|
357
389
|
self._x_data = XX.copy()
|
|
358
390
|
self._y_data = YY.copy()
|
|
359
391
|
|
|
@@ -369,7 +401,9 @@ class ObjectiveFunction(object):
|
|
|
369
401
|
K = X.shape[1]
|
|
370
402
|
self.columns_names = X.columns
|
|
371
403
|
X = X.values.reshape(self.N_test, self.P_test, K)
|
|
404
|
+
X = X.astype('float')
|
|
372
405
|
Y = Y.values.reshape(self.N_test, self.P_test, 1)
|
|
406
|
+
Y = Y.astype('float')
|
|
373
407
|
self._x_data_test = X.copy()
|
|
374
408
|
self.y_data_test = Y.copy()
|
|
375
409
|
|
|
@@ -384,11 +418,13 @@ class ObjectiveFunction(object):
|
|
|
384
418
|
|
|
385
419
|
|
|
386
420
|
|
|
387
|
-
self.Ndraws = 200
|
|
421
|
+
self.Ndraws = kwargs.get('Ndraws', 200)
|
|
388
422
|
self.draws1 = None
|
|
389
423
|
self.initial_sig = 1 # pass the test of a single model
|
|
390
424
|
self.pvalue_sig_value = .1
|
|
391
425
|
self.observations = self._x_data.shape[0]
|
|
426
|
+
self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
|
|
427
|
+
|
|
392
428
|
self.batch_size = None
|
|
393
429
|
# open the file in the write mode
|
|
394
430
|
self.grab_transforms = 0
|
|
@@ -400,17 +436,19 @@ class ObjectiveFunction(object):
|
|
|
400
436
|
print('Setup Complete...')
|
|
401
437
|
else:
|
|
402
438
|
print('No Panels Supplied')
|
|
439
|
+
print('Setup Complete...')
|
|
403
440
|
self._characteristics_names = list(self._x_data.columns)
|
|
404
441
|
# define the variables
|
|
405
442
|
# self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
|
|
406
443
|
self._transformations = ["no", "sqrt", "log", "arcsinh"]
|
|
407
444
|
self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
|
|
408
|
-
|
|
445
|
+
self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
|
|
409
446
|
# self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
|
|
410
447
|
|
|
411
|
-
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', '
|
|
448
|
+
self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
|
|
412
449
|
|
|
413
450
|
if self.G is not None:
|
|
451
|
+
#TODO need to handle this for groups
|
|
414
452
|
self._distribution = ["trad| " + item for item in self._distribution
|
|
415
453
|
] + ["grpd| " + item for item in self._distribution]
|
|
416
454
|
|
|
@@ -422,18 +460,32 @@ class ObjectiveFunction(object):
|
|
|
422
460
|
|
|
423
461
|
self.significant = 0
|
|
424
462
|
# define the states of our explanatory variables
|
|
463
|
+
|
|
464
|
+
|
|
425
465
|
self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
|
|
426
|
-
kwargs.get('must_include', []))
|
|
466
|
+
kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
|
|
427
470
|
self._discrete_values = self._discrete_values + \
|
|
428
|
-
|
|
471
|
+
self.define_distributions_analyst(extra=kwargs.get('decisions', None))
|
|
429
472
|
|
|
430
473
|
if 'model_types' in kwargs:
|
|
431
474
|
model_types = kwargs['model_types']
|
|
432
475
|
else:
|
|
433
|
-
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
434
476
|
|
|
477
|
+
|
|
478
|
+
model_types = [[0, 1]] # add 2 for Generalized Poisson
|
|
479
|
+
#model_types = [[0]]
|
|
480
|
+
#TODO change back and fix NB
|
|
481
|
+
model_t_dict = {'Poisson':0,
|
|
482
|
+
"NB":1}
|
|
483
|
+
# Retrieve the keys (model names) corresponding to the values in model_types
|
|
484
|
+
model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
|
|
485
|
+
# Print the formatted result
|
|
486
|
+
print(f'The type of models possible will consider: {", ".join(model_keys)}')
|
|
435
487
|
self._discrete_values = self._discrete_values + self.define_poissible_transforms(
|
|
436
|
-
self._transformations) + model_types
|
|
488
|
+
self._transformations, kwargs.get('decisions',None)) + model_types
|
|
437
489
|
|
|
438
490
|
self._model_type_codes = ['p', 'nb',
|
|
439
491
|
'gp', "pl", ["nb-theta", 'nb-dis']]
|
|
@@ -451,6 +503,7 @@ class ObjectiveFunction(object):
|
|
|
451
503
|
#Manually fit from analyst specification
|
|
452
504
|
manual_fit = kwargs.get('Manual_Fit')
|
|
453
505
|
if manual_fit is not None:
|
|
506
|
+
print('fitting manual')
|
|
454
507
|
self.process_manual_fit(manual_fit)
|
|
455
508
|
|
|
456
509
|
self.solution_analyst = None
|
|
@@ -485,6 +538,7 @@ class ObjectiveFunction(object):
|
|
|
485
538
|
if self.is_multi:
|
|
486
539
|
self._offsets_test = self._x_data_test[:, :, val_od]
|
|
487
540
|
self._x_data_test = self.remove_offset(self._x_data_test, val_od)
|
|
541
|
+
print(self._offsets)
|
|
488
542
|
else:
|
|
489
543
|
self.initialize_empty_offsets()
|
|
490
544
|
|
|
@@ -757,6 +811,8 @@ class ObjectiveFunction(object):
|
|
|
757
811
|
if dispersion == 0:
|
|
758
812
|
return None, None
|
|
759
813
|
elif dispersion == 2 or dispersion == 1:
|
|
814
|
+
if self.no_extra_param:
|
|
815
|
+
return self.nb_parma, None
|
|
760
816
|
return betas[-1], None
|
|
761
817
|
|
|
762
818
|
elif dispersion == 3:
|
|
@@ -784,14 +840,65 @@ class ObjectiveFunction(object):
|
|
|
784
840
|
par = np.nan_to_num(par)
|
|
785
841
|
return par
|
|
786
842
|
|
|
787
|
-
def
|
|
843
|
+
def rename_distro(self, distro):
|
|
844
|
+
# Mapping dictionary
|
|
845
|
+
mapping = {
|
|
846
|
+
'normal': ['normal', 'n', 'Normal'],
|
|
847
|
+
'triangular': ['triangular', 't', 'Triangular'],
|
|
848
|
+
'uniform': ['uniform', 'u', 'Uniform'],
|
|
849
|
+
'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
|
|
850
|
+
'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
# Use list comprehension with the mapping
|
|
854
|
+
reversed_mapping = {value: key for key, values in mapping.items() for value in values}
|
|
855
|
+
|
|
856
|
+
# Use the reversed mapping to find the corresponding key
|
|
857
|
+
new_distro = [reversed_mapping.get(i, i) for i in distro]
|
|
858
|
+
return new_distro
|
|
859
|
+
|
|
860
|
+
def define_distributions_analyst(self, extra = None):
|
|
861
|
+
|
|
862
|
+
if extra is not None:
|
|
863
|
+
set_alpha = []
|
|
864
|
+
for col in self._characteristics_names:
|
|
865
|
+
if col in extra[('Column')].values:
|
|
866
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
867
|
+
distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
|
|
868
|
+
distro = self.rename_distro(distro)
|
|
869
|
+
set_alpha = set_alpha+[distro]
|
|
870
|
+
elif col == 'const':
|
|
871
|
+
set_alpha = set_alpha +[['normal']]
|
|
872
|
+
return set_alpha
|
|
873
|
+
return [[x for x in self._distribution]] * self._characteristics
|
|
874
|
+
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
|
|
788
879
|
'complexity level'
|
|
789
880
|
'''
|
|
790
881
|
2 is feature selection,
|
|
791
|
-
3 is random
|
|
792
|
-
4 is correlated random
|
|
882
|
+
3 is random parameters
|
|
883
|
+
4 is correlated random parameters
|
|
884
|
+
|
|
885
|
+
extra is the stuff defined by the Meta APP
|
|
793
886
|
'''
|
|
794
887
|
set_alpha = []
|
|
888
|
+
if extra is not None:
|
|
889
|
+
for col in self._characteristics_names:
|
|
890
|
+
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
891
|
+
set_alpha = set_alpha + [[1]]
|
|
892
|
+
elif col == 'Offset':
|
|
893
|
+
set_alpha = set_alpha + [[1]]
|
|
894
|
+
|
|
895
|
+
elif col in extra[('Column')].values:
|
|
896
|
+
matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
|
|
897
|
+
check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
|
|
898
|
+
set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
|
|
899
|
+
return set_alpha
|
|
900
|
+
|
|
901
|
+
|
|
795
902
|
for col in self._characteristics_names:
|
|
796
903
|
if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
|
|
797
904
|
set_alpha = set_alpha + [[1]]
|
|
@@ -842,6 +949,13 @@ class ObjectiveFunction(object):
|
|
|
842
949
|
return ([self._model_type_codes[dispersion]])
|
|
843
950
|
|
|
844
951
|
def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
|
|
952
|
+
'''
|
|
953
|
+
setup for naming of the model summary
|
|
954
|
+
'''
|
|
955
|
+
if self.no_extra_param and dispersion ==1:
|
|
956
|
+
|
|
957
|
+
betas = np.append(betas, self.nb_parma)
|
|
958
|
+
|
|
845
959
|
self.name_deleter = []
|
|
846
960
|
group_rpm = None
|
|
847
961
|
group_dist = []
|
|
@@ -961,13 +1075,15 @@ class ObjectiveFunction(object):
|
|
|
961
1075
|
[''] * (len(names) - len(self.transform_id_names))
|
|
962
1076
|
self.coeff_names = names
|
|
963
1077
|
|
|
1078
|
+
'''
|
|
964
1079
|
if betas is not None:
|
|
965
1080
|
try:
|
|
966
1081
|
if len(betas) != len(names):
|
|
967
|
-
print('
|
|
968
|
-
|
|
1082
|
+
print('standard_model', no_draws)
|
|
1083
|
+
|
|
969
1084
|
except Exception as e:
|
|
970
1085
|
print(e)
|
|
1086
|
+
'''
|
|
971
1087
|
|
|
972
1088
|
|
|
973
1089
|
|
|
@@ -992,7 +1108,8 @@ class ObjectiveFunction(object):
|
|
|
992
1108
|
if not isinstance(self.pvalues, np.ndarray):
|
|
993
1109
|
raise Exception
|
|
994
1110
|
|
|
995
|
-
|
|
1111
|
+
if 'nb' in self.coeff_names and self.no_extra_param:
|
|
1112
|
+
self.pvalues = np.append(self.pvalues,0)
|
|
996
1113
|
|
|
997
1114
|
if self.please_print or save_state:
|
|
998
1115
|
|
|
@@ -1008,17 +1125,22 @@ class ObjectiveFunction(object):
|
|
|
1008
1125
|
|
|
1009
1126
|
if solution is not None:
|
|
1010
1127
|
print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
|
|
1011
|
-
|
|
1128
|
+
|
|
1012
1129
|
self.pvalues = [self.round_with_padding(
|
|
1013
1130
|
x, 2) for x in self.pvalues]
|
|
1014
1131
|
signif_list = self.pvalue_asterix_add(self.pvalues)
|
|
1015
1132
|
if model == 1:
|
|
1016
1133
|
|
|
1017
|
-
self.coeff_[-1] = np.
|
|
1018
|
-
if self.
|
|
1134
|
+
#self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
|
|
1135
|
+
if self.no_extra_param:
|
|
1136
|
+
self.coeff_ = np.append(self.coeff_, self.nb_parma)
|
|
1137
|
+
self.stderr = np.append(self.stderr, 0.00001)
|
|
1138
|
+
self.zvalues = np.append(self.zvalues, 50)
|
|
1139
|
+
|
|
1140
|
+
elif self.coeff_[-1] < 0.25:
|
|
1019
1141
|
print(self.coeff_[-1], 'Warning Check Dispersion')
|
|
1020
1142
|
print(np.exp(self.coeff_[-1]))
|
|
1021
|
-
self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1143
|
+
#self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
|
|
1022
1144
|
|
|
1023
1145
|
self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
|
|
1024
1146
|
|
|
@@ -1230,7 +1352,7 @@ class ObjectiveFunction(object):
|
|
|
1230
1352
|
with open(filename, 'w') as file:
|
|
1231
1353
|
file.write(content)
|
|
1232
1354
|
|
|
1233
|
-
def define_poissible_transforms(self, transforms) -> list:
|
|
1355
|
+
def define_poissible_transforms(self, transforms, extra= None) -> list:
|
|
1234
1356
|
transform_set = []
|
|
1235
1357
|
if not isinstance(self._x_data, pd.DataFrame):
|
|
1236
1358
|
x_data = self._x_data.reshape(self.N * self.P, -1).copy()
|
|
@@ -1241,6 +1363,7 @@ class ObjectiveFunction(object):
|
|
|
1241
1363
|
|
|
1242
1364
|
if 'AADT' in self._characteristics_names[col]:
|
|
1243
1365
|
new_transform = [['log']]
|
|
1366
|
+
#new_transform = [['no']]
|
|
1244
1367
|
transform_set = transform_set + new_transform
|
|
1245
1368
|
|
|
1246
1369
|
elif all(x_data[col] <= 5):
|
|
@@ -1280,6 +1403,18 @@ class ObjectiveFunction(object):
|
|
|
1280
1403
|
|
|
1281
1404
|
return transform_set
|
|
1282
1405
|
|
|
1406
|
+
def poisson_mean_get_dispersion(self, betas, X, y):
|
|
1407
|
+
eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
|
|
1408
|
+
return_EV=True,
|
|
1409
|
+
zi_list=None, draws_grouped=None, Xgroup=None)
|
|
1410
|
+
|
|
1411
|
+
ab = ((y - eVy)**2 - eVy)/eVy
|
|
1412
|
+
bb = eVy -1
|
|
1413
|
+
disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
|
|
1414
|
+
gamma = disp.params[0]
|
|
1415
|
+
#print(f'dispersion is {gamma}')
|
|
1416
|
+
return gamma
|
|
1417
|
+
|
|
1283
1418
|
def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
|
|
1284
1419
|
model_nature=None, halton=1, testing=1, validation=0):
|
|
1285
1420
|
'validation if mu needs to be calculated'
|
|
@@ -1313,7 +1448,7 @@ class ObjectiveFunction(object):
|
|
|
1313
1448
|
XG = model_nature.get('XGtest')[:total_percent, :, :]
|
|
1314
1449
|
else:
|
|
1315
1450
|
XG = model_nature.get('XGtest')[total_percent:, :, :]
|
|
1316
|
-
|
|
1451
|
+
|
|
1317
1452
|
else:
|
|
1318
1453
|
if 'XG' in model_nature:
|
|
1319
1454
|
XG = model_nature.get('XG')
|
|
@@ -1435,7 +1570,7 @@ class ObjectiveFunction(object):
|
|
|
1435
1570
|
5: herogeneity_in _means
|
|
1436
1571
|
|
|
1437
1572
|
|
|
1438
|
-
a: how to
|
|
1573
|
+
a: how to transform the original data
|
|
1439
1574
|
b: grab dispersion '''
|
|
1440
1575
|
|
|
1441
1576
|
# todo: better way
|
|
@@ -1783,7 +1918,10 @@ class ObjectiveFunction(object):
|
|
|
1783
1918
|
elif dispersion == 4:
|
|
1784
1919
|
return 2
|
|
1785
1920
|
else:
|
|
1786
|
-
|
|
1921
|
+
if self.no_extra_param:
|
|
1922
|
+
return 0
|
|
1923
|
+
else:
|
|
1924
|
+
return 1
|
|
1787
1925
|
|
|
1788
1926
|
def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
|
|
1789
1927
|
return_violated_terms=0):
|
|
@@ -1798,6 +1936,7 @@ class ObjectiveFunction(object):
|
|
|
1798
1936
|
|
|
1799
1937
|
else:
|
|
1800
1938
|
slice_this_amount = self.num_dispersion_params(dispersion)
|
|
1939
|
+
slice_this_amount = 1 #TODO handle this
|
|
1801
1940
|
if pvalues[-1] > sig_value:
|
|
1802
1941
|
vio_counts += 1
|
|
1803
1942
|
subpvalues = pvalues[:-slice_this_amount].copy()
|
|
@@ -2222,7 +2361,7 @@ class ObjectiveFunction(object):
|
|
|
2222
2361
|
sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
|
|
2223
2362
|
|
|
2224
2363
|
def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
|
|
2225
|
-
obj_1 = 10.0 **
|
|
2364
|
+
obj_1 = 10.0 ** 5
|
|
2226
2365
|
obj_best = None
|
|
2227
2366
|
sub_slns = list()
|
|
2228
2367
|
|
|
@@ -2233,7 +2372,7 @@ class ObjectiveFunction(object):
|
|
|
2233
2372
|
try:
|
|
2234
2373
|
self.repair(vector)
|
|
2235
2374
|
except Exception as e:
|
|
2236
|
-
print('
|
|
2375
|
+
print('prolem repairing here')
|
|
2237
2376
|
print(vector)
|
|
2238
2377
|
print(e)
|
|
2239
2378
|
layout = vector.copy()
|
|
@@ -2480,7 +2619,7 @@ class ObjectiveFunction(object):
|
|
|
2480
2619
|
random.seed(seed)
|
|
2481
2620
|
|
|
2482
2621
|
def set_random_seed(self):
|
|
2483
|
-
print('
|
|
2622
|
+
print('Imbedding Seed', self._random_seed)
|
|
2484
2623
|
np.random.seed(self._random_seed)
|
|
2485
2624
|
|
|
2486
2625
|
random.seed(self._random_seed)
|
|
@@ -2514,7 +2653,7 @@ class ObjectiveFunction(object):
|
|
|
2514
2653
|
self._hmcr = (
|
|
2515
2654
|
self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
|
|
2516
2655
|
|
|
2517
|
-
|
|
2656
|
+
|
|
2518
2657
|
|
|
2519
2658
|
def update_par(self, iteration, is_sin=False):
|
|
2520
2659
|
"""
|
|
@@ -2701,9 +2840,7 @@ class ObjectiveFunction(object):
|
|
|
2701
2840
|
|
|
2702
2841
|
|
|
2703
2842
|
"""
|
|
2704
|
-
|
|
2705
|
-
if alpha is None:
|
|
2706
|
-
alpha = params[-1]
|
|
2843
|
+
|
|
2707
2844
|
# Calculate common terms
|
|
2708
2845
|
'''
|
|
2709
2846
|
n = len(y)
|
|
@@ -2736,13 +2873,11 @@ class ObjectiveFunction(object):
|
|
|
2736
2873
|
'''
|
|
2737
2874
|
#return score
|
|
2738
2875
|
|
|
2739
|
-
|
|
2740
|
-
|
|
2741
|
-
|
|
2742
|
-
|
|
2743
2876
|
try:
|
|
2744
2877
|
if alpha is None:
|
|
2745
|
-
alpha = params[-1]
|
|
2878
|
+
alpha = np.exp(params[-1])
|
|
2879
|
+
else:
|
|
2880
|
+
alpha = np.exp(params[-1])
|
|
2746
2881
|
a1 = 1 / alpha * mu ** Q
|
|
2747
2882
|
prob = a1 / (a1 + mu)
|
|
2748
2883
|
exog = X
|
|
@@ -2878,7 +3013,7 @@ class ObjectiveFunction(object):
|
|
|
2878
3013
|
argument = prob.mean(axis=1)
|
|
2879
3014
|
# if less than 0 penalise
|
|
2880
3015
|
if np.min(argument) < 0:
|
|
2881
|
-
print('
|
|
3016
|
+
print('Error with args..')
|
|
2882
3017
|
if np.min(argument) < limit:
|
|
2883
3018
|
# add a penalty for too small argument of log
|
|
2884
3019
|
log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
|
|
@@ -3369,6 +3504,7 @@ class ObjectiveFunction(object):
|
|
|
3369
3504
|
else:
|
|
3370
3505
|
corr_pairs = list(itertools.combinations(corr_indices, 2))
|
|
3371
3506
|
|
|
3507
|
+
|
|
3372
3508
|
for ii, corr_pair in enumerate(corr_pairs):
|
|
3373
3509
|
# lower cholesky matrix
|
|
3374
3510
|
chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
|
|
@@ -3396,7 +3532,7 @@ class ObjectiveFunction(object):
|
|
|
3396
3532
|
a = 0
|
|
3397
3533
|
b = 0
|
|
3398
3534
|
stuff = []
|
|
3399
|
-
#
|
|
3535
|
+
# TODO get order
|
|
3400
3536
|
for j, i in enumerate(list_sizes):
|
|
3401
3537
|
br_mean = betas_hetro[a:i + a]
|
|
3402
3538
|
a += i
|
|
@@ -3423,7 +3559,30 @@ class ObjectiveFunction(object):
|
|
|
3423
3559
|
br_mean = betas_m
|
|
3424
3560
|
br_sd = betas_sd # Last Kr positions
|
|
3425
3561
|
# Compute: betas = mean + sd*draws
|
|
3426
|
-
|
|
3562
|
+
if len(br_sd) != draws.shape[1]:
|
|
3563
|
+
#get the same size as the mean
|
|
3564
|
+
betas_random = self.Br.copy()
|
|
3565
|
+
|
|
3566
|
+
'''
|
|
3567
|
+
c = self.get_num_params()[3:5]
|
|
3568
|
+
|
|
3569
|
+
cor = []
|
|
3570
|
+
for i in range(c[0]):
|
|
3571
|
+
cor.append(i)
|
|
3572
|
+
|
|
3573
|
+
vall =[]
|
|
3574
|
+
for i, val in enumerate(reversed(br_sd)):
|
|
3575
|
+
vall.append()
|
|
3576
|
+
|
|
3577
|
+
remaining = draws.shape[1] - len(betas_sd)
|
|
3578
|
+
'''
|
|
3579
|
+
|
|
3580
|
+
else:
|
|
3581
|
+
|
|
3582
|
+
|
|
3583
|
+
betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
|
|
3584
|
+
|
|
3585
|
+
|
|
3427
3586
|
betas_random = self._apply_distribution(betas_random)
|
|
3428
3587
|
|
|
3429
3588
|
return betas_random
|
|
@@ -3442,28 +3601,71 @@ class ObjectiveFunction(object):
|
|
|
3442
3601
|
# if gamma <= 0.01: #min defined value for stable nb
|
|
3443
3602
|
# gamma = 0.01
|
|
3444
3603
|
|
|
3604
|
+
#g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
|
|
3605
|
+
|
|
3606
|
+
#gg = stats.poisson.rvs(g)
|
|
3607
|
+
|
|
3608
|
+
|
|
3609
|
+
|
|
3610
|
+
|
|
3445
3611
|
endog = y
|
|
3446
3612
|
mu = lam
|
|
3613
|
+
''''
|
|
3614
|
+
mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
|
|
3615
|
+
alpha = np.exp(gamma)
|
|
3616
|
+
|
|
3617
|
+
'''
|
|
3447
3618
|
alpha = gamma
|
|
3448
3619
|
size = 1.0 / alpha * mu ** Q
|
|
3449
|
-
|
|
3450
|
-
|
|
3451
|
-
|
|
3452
|
-
|
|
3620
|
+
|
|
3621
|
+
prob = size/(size+mu)
|
|
3622
|
+
|
|
3623
|
+
|
|
3624
|
+
|
|
3625
|
+
'''test'''
|
|
3626
|
+
|
|
3627
|
+
|
|
3628
|
+
'''
|
|
3629
|
+
size = 1 / np.exp(gamma) * mu ** 0
|
|
3630
|
+
prob = size / (size + mu)
|
|
3631
|
+
coeff = (gammaln(size + y) - gammaln(y + 1) -
|
|
3632
|
+
gammaln(size))
|
|
3633
|
+
llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
|
|
3634
|
+
'''
|
|
3635
|
+
|
|
3453
3636
|
try:
|
|
3454
3637
|
# print(np.shape(y),np.shape(size), np.shape(prob))
|
|
3455
|
-
gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
|
3638
|
+
#gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
|
|
3639
|
+
#import time
|
|
3640
|
+
#start_time = time.time()
|
|
3641
|
+
|
|
3642
|
+
|
|
3643
|
+
# Measure time for negbinom_pmf
|
|
3644
|
+
#start_time = time.time()
|
|
3645
|
+
#for _ in range(10000):
|
|
3646
|
+
|
|
3456
3647
|
|
|
3648
|
+
#end_time = time.time()
|
|
3649
|
+
#print("Custom functieon time:", end_time - start_time)
|
|
3650
|
+
#start_time = time.time()
|
|
3651
|
+
#for _ in range(10000):
|
|
3652
|
+
'''
|
|
3457
3653
|
gg = np.exp(
|
|
3458
3654
|
gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
|
|
3459
3655
|
y + alpha) * np.log(mu + alpha))
|
|
3460
|
-
|
|
3461
|
-
|
|
3462
|
-
|
|
3656
|
+
gg[np.isnan(gg)] = 1
|
|
3657
|
+
'''
|
|
3658
|
+
gg_alt = nbinom.pmf(y ,1/alpha, prob)
|
|
3659
|
+
#gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
|
|
3660
|
+
#gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
|
|
3661
|
+
#print('check theses')
|
|
3662
|
+
#gg = nbinom.pmf(y ,alpha, prob)
|
|
3663
|
+
#end_time = time.time()
|
|
3664
|
+
#print("Custom functieon time:", end_time - start_time)
|
|
3463
3665
|
|
|
3464
3666
|
except Exception as e:
|
|
3465
|
-
print(
|
|
3466
|
-
return
|
|
3667
|
+
print("Neg Binom error.")
|
|
3668
|
+
return gg_alt
|
|
3467
3669
|
|
|
3468
3670
|
def lindley_pmf(self, x, r, theta, k=50):
|
|
3469
3671
|
"""
|
|
@@ -3530,7 +3732,7 @@ class ObjectiveFunction(object):
|
|
|
3530
3732
|
|
|
3531
3733
|
endog = y
|
|
3532
3734
|
mu = lam
|
|
3533
|
-
alpha = gamma
|
|
3735
|
+
alpha = np.exp(gamma)
|
|
3534
3736
|
alpha = alpha * mu ** Q
|
|
3535
3737
|
size = 1 / alpha * mu ** Q # also r
|
|
3536
3738
|
# self.rate_param = size
|
|
@@ -3610,8 +3812,8 @@ class ObjectiveFunction(object):
|
|
|
3610
3812
|
|
|
3611
3813
|
if dispersion == 1 or dispersion == 4: # nb
|
|
3612
3814
|
# if model_nature is not None and 'dispersion_penalty' in model_nature:
|
|
3613
|
-
|
|
3614
|
-
|
|
3815
|
+
#b_gam = 1/np.exp(b_gam)
|
|
3816
|
+
#print(b_gam)
|
|
3615
3817
|
if b_gam <= 0:
|
|
3616
3818
|
#penalty += 100
|
|
3617
3819
|
#penalty += abs(b_gam)
|
|
@@ -3619,9 +3821,9 @@ class ObjectiveFunction(object):
|
|
|
3619
3821
|
#b_gam = 1
|
|
3620
3822
|
|
|
3621
3823
|
# if b_gam < 0.03:
|
|
3622
|
-
penalty += min(1, np.abs(b_gam))
|
|
3824
|
+
penalty += min(1, np.abs(b_gam), 0)
|
|
3623
3825
|
|
|
3624
|
-
b_gam = 0.001
|
|
3826
|
+
#b_gam = 0.001
|
|
3625
3827
|
#
|
|
3626
3828
|
|
|
3627
3829
|
#if b_gam >= 10:
|
|
@@ -3653,8 +3855,15 @@ class ObjectiveFunction(object):
|
|
|
3653
3855
|
def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
|
|
3654
3856
|
|
|
3655
3857
|
# print('this was 0')
|
|
3656
|
-
|
|
3858
|
+
if dispersion:
|
|
3859
|
+
eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3860
|
+
|
|
3861
|
+
#eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
|
|
3862
|
+
#print('check if this holds size')
|
|
3863
|
+
else:
|
|
3864
|
+
eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
|
|
3657
3865
|
eta = np.array(eta)
|
|
3866
|
+
|
|
3658
3867
|
# eta = np.float64(eta)
|
|
3659
3868
|
# eta = np.dot(Xd, params_main)+offset[:,:,0]
|
|
3660
3869
|
# eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
|
|
@@ -3673,7 +3882,7 @@ class ObjectiveFunction(object):
|
|
|
3673
3882
|
|
|
3674
3883
|
else:
|
|
3675
3884
|
# eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
|
|
3676
|
-
|
|
3885
|
+
eta = eta.astype('float')
|
|
3677
3886
|
eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
|
|
3678
3887
|
return eVd
|
|
3679
3888
|
|
|
@@ -3789,7 +3998,7 @@ class ObjectiveFunction(object):
|
|
|
3789
3998
|
|
|
3790
3999
|
|
|
3791
4000
|
# proba_d = self.dnegbimonli(y, eVd, b_gam )
|
|
3792
|
-
|
|
4001
|
+
|
|
3793
4002
|
|
|
3794
4003
|
elif dispersion == 2:
|
|
3795
4004
|
|
|
@@ -3810,7 +4019,7 @@ class ObjectiveFunction(object):
|
|
|
3810
4019
|
# proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
|
|
3811
4020
|
proba_r = np.array(store)
|
|
3812
4021
|
proba_r = np.atleast_2d(proba_r).T
|
|
3813
|
-
|
|
4022
|
+
|
|
3814
4023
|
|
|
3815
4024
|
else:
|
|
3816
4025
|
raise Exception('not implemented other modeling forms')
|
|
@@ -3827,6 +4036,8 @@ class ObjectiveFunction(object):
|
|
|
3827
4036
|
if dispersion == 0 or dispersion == 3:
|
|
3828
4037
|
return 0
|
|
3829
4038
|
else:
|
|
4039
|
+
|
|
4040
|
+
|
|
3830
4041
|
return 1
|
|
3831
4042
|
|
|
3832
4043
|
def _prob_product_across_panels(self, pch, panel_info):
|
|
@@ -3882,7 +4093,7 @@ class ObjectiveFunction(object):
|
|
|
3882
4093
|
if y[i] == 0:
|
|
3883
4094
|
gr_e[i] = 0
|
|
3884
4095
|
|
|
3885
|
-
if self.is_dispersion(dispersion):
|
|
4096
|
+
if self.is_dispersion(dispersion) and not self.no_extra_param:
|
|
3886
4097
|
gr_d = np.zeros((N, 1))
|
|
3887
4098
|
if dispersion == 1:
|
|
3888
4099
|
# trying alt
|
|
@@ -3986,12 +4197,13 @@ class ObjectiveFunction(object):
|
|
|
3986
4197
|
br, draws_, brstd, dis_fit_long) # (N,K,R)
|
|
3987
4198
|
dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
|
|
3988
4199
|
einsum_model_form, dtype=np.float64) # (N,K,R)
|
|
3989
|
-
der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
3990
|
-
der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
3991
|
-
|
|
3992
|
-
|
|
4200
|
+
#der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
|
|
4201
|
+
#der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
|
|
4202
|
+
|
|
4203
|
+
der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
|
|
4204
|
+
|
|
3993
4205
|
der_t = self._compute_derivatives(
|
|
3994
|
-
br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
|
|
4206
|
+
br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
|
|
3995
4207
|
# er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
|
|
3996
4208
|
der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
|
|
3997
4209
|
der_t * proba_n[:, None, :] # (N,K,R)
|
|
@@ -4052,14 +4264,18 @@ class ObjectiveFunction(object):
|
|
|
4052
4264
|
grad_n = self._concat_gradients(
|
|
4053
4265
|
(gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
|
|
4054
4266
|
else:
|
|
4055
|
-
|
|
4056
|
-
|
|
4057
|
-
|
|
4058
|
-
|
|
4267
|
+
if self.no_extra_param:
|
|
4268
|
+
grad_n = self._concat_gradients(
|
|
4269
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
|
|
4270
|
+
else:
|
|
4271
|
+
grad_n = self._concat_gradients(
|
|
4272
|
+
(gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
|
|
4273
|
+
grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
|
|
4274
|
+
grad_n = np.clip(grad_n, -100, 100)
|
|
4059
4275
|
n = np.shape(grad_n)[0]
|
|
4060
4276
|
# subtract out mean gradient value
|
|
4061
|
-
|
|
4062
|
-
|
|
4277
|
+
grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
|
|
4278
|
+
grad_n = grad_n_sub
|
|
4063
4279
|
grad = grad_n.sum(axis=0)
|
|
4064
4280
|
return grad, grad_n
|
|
4065
4281
|
|
|
@@ -4210,7 +4426,7 @@ class ObjectiveFunction(object):
|
|
|
4210
4426
|
return proba_r.sum(axis=1), np.squeeze(proba_r)
|
|
4211
4427
|
|
|
4212
4428
|
def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
|
|
4213
|
-
penalty_val = 0.
|
|
4429
|
+
penalty_val = 0.1
|
|
4214
4430
|
penalty_val_max = 130
|
|
4215
4431
|
|
|
4216
4432
|
# print('change_later')
|
|
@@ -4226,8 +4442,8 @@ class ObjectiveFunction(object):
|
|
|
4226
4442
|
if abs(i) > penalty_val_max:
|
|
4227
4443
|
penalty += abs(i)
|
|
4228
4444
|
|
|
4229
|
-
#
|
|
4230
|
-
#
|
|
4445
|
+
#if abs(i) < penalty_val:
|
|
4446
|
+
# penalty += 5
|
|
4231
4447
|
|
|
4232
4448
|
# penalty = 0
|
|
4233
4449
|
return penalty
|
|
@@ -4334,8 +4550,7 @@ class ObjectiveFunction(object):
|
|
|
4334
4550
|
index += 1
|
|
4335
4551
|
|
|
4336
4552
|
brstd = br_std
|
|
4337
|
-
|
|
4338
|
-
print(brstd)
|
|
4553
|
+
|
|
4339
4554
|
|
|
4340
4555
|
|
|
4341
4556
|
def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
|
|
@@ -4367,7 +4582,7 @@ class ObjectiveFunction(object):
|
|
|
4367
4582
|
penalty = self._penalty_betas(
|
|
4368
4583
|
betas, dispersion, penalty, float(len(y) / 10.0))
|
|
4369
4584
|
self.n_obs = len(y) # feeds into gradient
|
|
4370
|
-
if draws is None and draws_grouped is None and (
|
|
4585
|
+
if draws is None and draws_grouped is None and (model_nature is None or
|
|
4371
4586
|
'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
|
|
4372
4587
|
#TODO do i shuffle the draws
|
|
4373
4588
|
if type(Xd) == dict:
|
|
@@ -4420,7 +4635,7 @@ class ObjectiveFunction(object):
|
|
|
4420
4635
|
penalty = self.regularise_l2(betas)
|
|
4421
4636
|
|
|
4422
4637
|
if not np.isreal(loglik):
|
|
4423
|
-
loglik = -
|
|
4638
|
+
loglik = - 10000000.0
|
|
4424
4639
|
|
|
4425
4640
|
output = (-loglik + penalty,)
|
|
4426
4641
|
if return_gradient:
|
|
@@ -4428,14 +4643,19 @@ class ObjectiveFunction(object):
|
|
|
4428
4643
|
if return_gradient_n:
|
|
4429
4644
|
der, grad_n = self.simple_score_grad(
|
|
4430
4645
|
betas, y, eVd, Xd, dispersion, both=True)
|
|
4431
|
-
return (-loglik + penalty, -der, grad_n)
|
|
4646
|
+
#return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
|
|
4647
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
|
|
4648
|
+
return scaled_tuple
|
|
4432
4649
|
else:
|
|
4433
4650
|
der = self.simple_score_grad(
|
|
4434
4651
|
betas, y, eVd, Xd, dispersion, both=False)
|
|
4435
|
-
|
|
4436
|
-
|
|
4652
|
+
scaled_tuple = tuple(
|
|
4653
|
+
x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
|
|
4654
|
+
return scaled_tuple
|
|
4655
|
+
#return (-loglik + penalty, -der.ravel())*self.minimize_scaler
|
|
4437
4656
|
else:
|
|
4438
|
-
|
|
4657
|
+
|
|
4658
|
+
return (-loglik + penalty)*self.minimize_scaler
|
|
4439
4659
|
# Else, we have draws
|
|
4440
4660
|
self.n_obs = len(y) * self.Ndraws #todo is this problematic
|
|
4441
4661
|
penalty += self._penalty_betas(
|
|
@@ -4509,7 +4729,9 @@ class ObjectiveFunction(object):
|
|
|
4509
4729
|
Kf = 0
|
|
4510
4730
|
else:
|
|
4511
4731
|
if n_coeff != len(betas):
|
|
4512
|
-
raise Exception
|
|
4732
|
+
raise Exception(
|
|
4733
|
+
|
|
4734
|
+
)
|
|
4513
4735
|
Bf = betas[0:Kf] # Fixed betas
|
|
4514
4736
|
|
|
4515
4737
|
|
|
@@ -4611,7 +4833,8 @@ class ObjectiveFunction(object):
|
|
|
4611
4833
|
eVd = self.lam_transform(eVd, dispersion, betas[-1])
|
|
4612
4834
|
|
|
4613
4835
|
if self.is_dispersion(dispersion):
|
|
4614
|
-
|
|
4836
|
+
if not self.no_extra_param:
|
|
4837
|
+
penalty, betas[-1] = self._penalty_dispersion(
|
|
4615
4838
|
dispersion, betas[-1], eVd, y, penalty, model_nature)
|
|
4616
4839
|
|
|
4617
4840
|
'''
|
|
@@ -4655,38 +4878,22 @@ class ObjectiveFunction(object):
|
|
|
4655
4878
|
proba.append(dev.to_cpu(proba_))
|
|
4656
4879
|
|
|
4657
4880
|
lik = np.stack(proba).sum(axis=0) / R # (N, )
|
|
4658
|
-
lik = np.clip(lik, min_comp_val,
|
|
4881
|
+
lik = np.clip(lik, min_comp_val, max_comp_val)
|
|
4659
4882
|
# lik = np.nan_to_num(lik, )
|
|
4660
4883
|
loglik = np.log(lik)
|
|
4661
4884
|
llf_main = loglik
|
|
4662
|
-
if 'exog_infl' in model_nature:
|
|
4663
|
-
params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
|
|
4664
|
-
params_main = Bf
|
|
4665
|
-
exog_infl = model_nature.get('exog_inflX')
|
|
4666
|
-
llf_main = llf_main.ravel() # TODO test this
|
|
4667
|
-
w = self.predict_logit_part(params_infl, exog_infl)
|
|
4668
|
-
|
|
4669
|
-
w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
|
|
4670
|
-
|
|
4671
|
-
zero_idx = np.nonzero(y == 0)[0]
|
|
4672
|
-
nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
|
|
4673
|
-
|
|
4674
|
-
llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
|
|
4675
|
-
llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
|
|
4676
|
-
llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
|
|
4677
|
-
loglik = llf.sum()
|
|
4678
|
-
else:
|
|
4679
4885
|
|
|
4680
|
-
|
|
4886
|
+
|
|
4887
|
+
loglik = loglik.sum()
|
|
4681
4888
|
|
|
4682
4889
|
loglik = np.clip(loglik, log_lik_min, log_lik_max)
|
|
4683
4890
|
if self.power_up_ll:
|
|
4684
4891
|
penalty += self.regularise_l2(betas)
|
|
4685
|
-
|
|
4892
|
+
|
|
4686
4893
|
penalty += self.regularise_l2(betas)
|
|
4687
4894
|
if not return_gradient:
|
|
4688
4895
|
|
|
4689
|
-
output = (-loglik + penalty,)
|
|
4896
|
+
output = ((-loglik + penalty)*self.minimize_scaler,)
|
|
4690
4897
|
if verbose > 1:
|
|
4691
4898
|
print(
|
|
4692
4899
|
f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
|
|
@@ -4716,19 +4923,24 @@ class ObjectiveFunction(object):
|
|
|
4716
4923
|
# Hinv = np.linalg.inv(H)
|
|
4717
4924
|
# except Exception:
|
|
4718
4925
|
# Hinv = np.linalg.pinv(H)
|
|
4719
|
-
|
|
4926
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
|
|
4927
|
+
return scaled_tuple
|
|
4928
|
+
#output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
|
|
4720
4929
|
|
|
4721
|
-
return output
|
|
4930
|
+
#return output
|
|
4722
4931
|
else:
|
|
4932
|
+
scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
|
|
4933
|
+
return scaled_tuple
|
|
4934
|
+
#output = (-loglik + penalty, -grad)*self.minimize_scaler
|
|
4723
4935
|
|
|
4724
|
-
output
|
|
4725
|
-
|
|
4726
|
-
return output
|
|
4936
|
+
#return output
|
|
4727
4937
|
except Exception as e:
|
|
4728
4938
|
traceback.print_exc()
|
|
4729
4939
|
print(e)
|
|
4730
4940
|
|
|
4731
|
-
|
|
4941
|
+
def minimize_function(self, loglike):
|
|
4942
|
+
r'Takes the logliklihood function and tranforms it to a more handed minimization function'
|
|
4943
|
+
return loglike/self.n_obs
|
|
4732
4944
|
def print_chol_mat(self, betas):
|
|
4733
4945
|
print(self.chol_mat)
|
|
4734
4946
|
self.get_br_and_bstd(betas)
|
|
@@ -4996,7 +5208,7 @@ class ObjectiveFunction(object):
|
|
|
4996
5208
|
H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
|
|
4997
5209
|
result['Hessian'] = H
|
|
4998
5210
|
result['hess_inv'] = np.linalg.pinv(H)
|
|
4999
|
-
|
|
5211
|
+
|
|
5000
5212
|
standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
|
|
5001
5213
|
return result
|
|
5002
5214
|
# return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
|
|
@@ -5220,7 +5432,7 @@ class ObjectiveFunction(object):
|
|
|
5220
5432
|
if self.power_up_ll:
|
|
5221
5433
|
loglikelihood =-optim_res['fun']/2 - penalty
|
|
5222
5434
|
else:
|
|
5223
|
-
loglikelihood = -optim_res['fun'] - penalty
|
|
5435
|
+
loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
|
|
5224
5436
|
|
|
5225
5437
|
# self.coeff_names = coeff_names
|
|
5226
5438
|
# self.total_iter = optim_res['nit']
|
|
@@ -5267,7 +5479,7 @@ class ObjectiveFunction(object):
|
|
|
5267
5479
|
return a
|
|
5268
5480
|
|
|
5269
5481
|
def fitRegression(self, mod,
|
|
5270
|
-
dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
|
|
5482
|
+
dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
|
|
5271
5483
|
|
|
5272
5484
|
"""
|
|
5273
5485
|
Fits a poisson regression given data and outcomes if dispersion is not declared
|
|
@@ -5284,7 +5496,7 @@ class ObjectiveFunction(object):
|
|
|
5284
5496
|
|
|
5285
5497
|
|
|
5286
5498
|
sol = Solution()
|
|
5287
|
-
|
|
5499
|
+
|
|
5288
5500
|
tol = {'ftol': 1e-8, 'gtol': 1e-6}
|
|
5289
5501
|
is_delete = 0
|
|
5290
5502
|
dispersion = mod.get('dispersion')
|
|
@@ -5313,6 +5525,8 @@ class ObjectiveFunction(object):
|
|
|
5313
5525
|
_g, pg, kg = 0, 0, 0
|
|
5314
5526
|
|
|
5315
5527
|
dispersion_param_num = self.is_dispersion(dispersion)
|
|
5528
|
+
if self.no_extra_param:
|
|
5529
|
+
dispersion_param_num =0
|
|
5316
5530
|
|
|
5317
5531
|
#paramNum = self.get_param_num(dispersion)
|
|
5318
5532
|
self.no_random_paramaters = 0
|
|
@@ -5367,18 +5581,27 @@ class ObjectiveFunction(object):
|
|
|
5367
5581
|
else:
|
|
5368
5582
|
bb[0] = self.constant_value
|
|
5369
5583
|
if dispersion == 1:
|
|
5370
|
-
|
|
5584
|
+
if not self.no_extra_param:
|
|
5585
|
+
bb[-1] = self.negative_binomial_value
|
|
5371
5586
|
bounds = None
|
|
5372
5587
|
|
|
5588
|
+
|
|
5589
|
+
|
|
5373
5590
|
# intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
|
|
5374
5591
|
hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
|
|
5375
|
-
|
|
5592
|
+
|
|
5593
|
+
if self.no_extra_param:
|
|
5594
|
+
dispersion_poisson = 0
|
|
5595
|
+
initial_beta = self._minimize(self._loglik_gradient, bb,
|
|
5376
5596
|
args=(XX, y, None, None, None, None, calc_gradient, hess_est,
|
|
5377
|
-
|
|
5597
|
+
dispersion_poisson, 0, False, 0, None, None, None, None, None,
|
|
5378
5598
|
mod),
|
|
5379
5599
|
method=method2, tol=1e-5, options={'gtol': tol['gtol']},
|
|
5380
5600
|
bounds=bounds)
|
|
5381
|
-
|
|
5601
|
+
if dispersion:
|
|
5602
|
+
nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
|
|
5603
|
+
|
|
5604
|
+
|
|
5382
5605
|
|
|
5383
5606
|
|
|
5384
5607
|
if method2 == 'L-BFGS-B':
|
|
@@ -5477,7 +5700,7 @@ class ObjectiveFunction(object):
|
|
|
5477
5700
|
|
|
5478
5701
|
b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
|
|
5479
5702
|
self.none_handler(self.rdm_fit)) + len(
|
|
5480
|
-
self.none_handler(self.rdm_cor_fit)) else b[i] / 1
|
|
5703
|
+
self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
|
|
5481
5704
|
else:
|
|
5482
5705
|
b = bb
|
|
5483
5706
|
|
|
@@ -5487,9 +5710,10 @@ class ObjectiveFunction(object):
|
|
|
5487
5710
|
else:
|
|
5488
5711
|
b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
|
|
5489
5712
|
if dispersion == 1:
|
|
5490
|
-
|
|
5491
|
-
|
|
5492
|
-
b[-1]
|
|
5713
|
+
if not self.no_extra_param:
|
|
5714
|
+
b[-1] = np.abs(b[-1])
|
|
5715
|
+
if b[-1] > 10:
|
|
5716
|
+
b[-1] = 5
|
|
5493
5717
|
elif dispersion == 2:
|
|
5494
5718
|
b[-1] = .5
|
|
5495
5719
|
if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
|
|
@@ -5615,13 +5839,30 @@ class ObjectiveFunction(object):
|
|
|
5615
5839
|
|
|
5616
5840
|
if draws is None and draws_hetro is not None:
|
|
5617
5841
|
print('hold')
|
|
5618
|
-
|
|
5619
|
-
|
|
5620
|
-
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5621
|
-
method=method2, tol=tol['ftol'],
|
|
5622
|
-
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5623
|
-
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5842
|
+
#self.grad_yes = True
|
|
5843
|
+
#self.hess_yes = True
|
|
5624
5844
|
|
|
5845
|
+
if self.no_extra_param:
|
|
5846
|
+
dispersion_poisson = 0
|
|
5847
|
+
betas_est = self._minimize(self._loglik_gradient, b, args=(
|
|
5848
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
|
|
5849
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5850
|
+
method=method2, tol=tol['ftol'],
|
|
5851
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5852
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5853
|
+
if dispersion:
|
|
5854
|
+
initial_fit_beta = betas_est.x
|
|
5855
|
+
parmas = np.append(initial_fit_beta, nb_parma)
|
|
5856
|
+
self.nb_parma = nb_parma
|
|
5857
|
+
#print(f'neg binomi,{self.nb_parma}')
|
|
5858
|
+
betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
|
|
5859
|
+
X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
|
|
5860
|
+
self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
|
|
5861
|
+
method=method2, tol=tol['ftol'],
|
|
5862
|
+
options={'gtol': tol['gtol']}, bounds=bounds,
|
|
5863
|
+
hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
|
|
5864
|
+
|
|
5865
|
+
#print('refit with estimation of NB')
|
|
5625
5866
|
# self.numerical_hessian_calc = True
|
|
5626
5867
|
if self.numerical_hessian_calc:
|
|
5627
5868
|
try:
|
|
@@ -5920,7 +6161,7 @@ class ObjectiveFunction(object):
|
|
|
5920
6161
|
return delim + self._model_type_codes[dispersion]
|
|
5921
6162
|
|
|
5922
6163
|
def self_standardize_positive(self, X):
|
|
5923
|
-
scaler =
|
|
6164
|
+
scaler = MinMaxScaler()
|
|
5924
6165
|
if type(X) == list:
|
|
5925
6166
|
return X
|
|
5926
6167
|
|
|
@@ -5930,12 +6171,26 @@ class ObjectiveFunction(object):
|
|
|
5930
6171
|
# Reshaping to 2D - combining the last two dimensions
|
|
5931
6172
|
df_tf_reshaped = X.reshape(original_shape[0], -1)
|
|
5932
6173
|
df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
|
|
5933
|
-
df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
6174
|
+
#df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
|
|
5934
6175
|
# Reshape back to original 3D shape if necessary
|
|
5935
6176
|
df_tf = df_tf_scaled.reshape(original_shape)
|
|
5936
6177
|
return df_tf
|
|
5937
6178
|
else:
|
|
5938
|
-
|
|
6179
|
+
# Initialize the MinMaxScaler
|
|
6180
|
+
scaler = MinMaxScaler()
|
|
6181
|
+
float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
|
|
6182
|
+
non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
|
|
6183
|
+
|
|
6184
|
+
# Fit the scaler to the float columns and transform them
|
|
6185
|
+
X[float_columns] = scaler.fit_transform(X[float_columns])
|
|
6186
|
+
# Fit the scaler to the data and transform it
|
|
6187
|
+
#scaled_data = scaler.fit_transform(X)
|
|
6188
|
+
|
|
6189
|
+
# Convert the result back to a DataFrame
|
|
6190
|
+
#scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
|
|
6191
|
+
|
|
6192
|
+
|
|
6193
|
+
return X
|
|
5939
6194
|
|
|
5940
6195
|
def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
|
|
5941
6196
|
*args, **kwargs):
|
|
@@ -5990,8 +6245,9 @@ class ObjectiveFunction(object):
|
|
|
5990
6245
|
df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
|
|
5991
6246
|
t, idx, df_test[:, :, idx])
|
|
5992
6247
|
if np.max(df_tf[:, :, idx]) >= 77000:
|
|
6248
|
+
#TODO need to normalise the data
|
|
5993
6249
|
|
|
5994
|
-
|
|
6250
|
+
print('should not be possible')
|
|
5995
6251
|
|
|
5996
6252
|
self.define_selfs_fixed_rdm_cor(model_nature)
|
|
5997
6253
|
indices = self.get_named_indices(self.fixed_fit)
|
|
@@ -6048,7 +6304,7 @@ class ObjectiveFunction(object):
|
|
|
6048
6304
|
model_nature['XH'] = XH
|
|
6049
6305
|
X_test = None
|
|
6050
6306
|
if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
|
|
6051
|
-
raise Exception('there is some kind of error')
|
|
6307
|
+
raise Exception('there is some kind of error in X')
|
|
6052
6308
|
|
|
6053
6309
|
# numpy data setup fpr estimation
|
|
6054
6310
|
indices2 = self.get_named_indices(self.rdm_fit)
|
|
@@ -6148,7 +6404,7 @@ class ObjectiveFunction(object):
|
|
|
6148
6404
|
self.log_lik = log_lik
|
|
6149
6405
|
if self.significant == 0:
|
|
6150
6406
|
|
|
6151
|
-
|
|
6407
|
+
|
|
6152
6408
|
if not self.test_flag:
|
|
6153
6409
|
alpha, alpha_rdm, alpha_cor_rdm = self.modify(
|
|
6154
6410
|
self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
|
|
@@ -6199,6 +6455,53 @@ class ObjectiveFunction(object):
|
|
|
6199
6455
|
|
|
6200
6456
|
return obj_1, model_nature
|
|
6201
6457
|
|
|
6458
|
+
def get_X_tril(self):
|
|
6459
|
+
'''For correlations find the repeating terms'''
|
|
6460
|
+
varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
|
|
6461
|
+
rv_count_all = 0
|
|
6462
|
+
chol_count = 0
|
|
6463
|
+
rv_count = 0
|
|
6464
|
+
corr_indices = []
|
|
6465
|
+
rv_indices = []
|
|
6466
|
+
for ii, var in enumerate(varnames): # TODO: BUGFIXf
|
|
6467
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6468
|
+
is_correlated = True
|
|
6469
|
+
else:
|
|
6470
|
+
is_correlated = False
|
|
6471
|
+
|
|
6472
|
+
rv_count_all += 1
|
|
6473
|
+
if is_correlated:
|
|
6474
|
+
chol_count += 1
|
|
6475
|
+
else:
|
|
6476
|
+
rv_count += 1
|
|
6477
|
+
|
|
6478
|
+
if var in self.none_handler(self.rdm_cor_fit):
|
|
6479
|
+
|
|
6480
|
+
corr_indices.append(rv_count_all - 1) # TODO: what does tis do
|
|
6481
|
+
|
|
6482
|
+
else:
|
|
6483
|
+
rv_indices.append(rv_count_all - 1)
|
|
6484
|
+
|
|
6485
|
+
# for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
|
|
6486
|
+
draws_tril_idx = np.array([corr_indices[j]
|
|
6487
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6488
|
+
for j in range(i + 1)]) # varnames pos.
|
|
6489
|
+
X_tril_idx = np.array([corr_indices[i]
|
|
6490
|
+
for i in range(len(self.none_handler(self.rdm_cor_fit)))
|
|
6491
|
+
for j in range(i + 1)])
|
|
6492
|
+
# Find the s.d. for random variables that are not correlated
|
|
6493
|
+
var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
|
|
6494
|
+
range_var = [x for x in
|
|
6495
|
+
range(len(self.none_handler(var_uncor)))]
|
|
6496
|
+
range_var = sorted(range_var)
|
|
6497
|
+
draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
|
|
6498
|
+
X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
|
|
6499
|
+
draws_tril_idx = draws_tril_idx.astype(int)
|
|
6500
|
+
X_tril_idx = X_tril_idx.astype(int)
|
|
6501
|
+
return X_tril_idx
|
|
6502
|
+
|
|
6503
|
+
|
|
6504
|
+
|
|
6202
6505
|
def modifyn(self, data):
|
|
6203
6506
|
select_data = self._characteristics_names
|
|
6204
6507
|
alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
|
|
@@ -6406,23 +6709,35 @@ class ObjectiveFunction(object):
|
|
|
6406
6709
|
# N, D = draws.shape[0], draws.shape[1]
|
|
6407
6710
|
N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
|
|
6408
6711
|
der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
|
|
6409
|
-
if len(self.none_handler(self.rdm_cor_fit)) == 0:
|
|
6410
|
-
Br_come_one = self.Br.copy()
|
|
6411
|
-
# Br_come_one =
|
|
6412
|
-
else:
|
|
6413
6712
|
|
|
6414
|
-
Br_come_one = self.Br.copy()
|
|
6415
6713
|
# betas_random = self._transform_rand_betas(betas, betas_std, draws)
|
|
6416
6714
|
#todo make sure this works for ln and truncated normal
|
|
6417
6715
|
if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
|
|
6418
|
-
|
|
6716
|
+
|
|
6717
|
+
#print('check this, intesection shouldn not happen for all')
|
|
6718
|
+
|
|
6719
|
+
if der.shape[1] != draws.shape[1]:
|
|
6720
|
+
print('why')
|
|
6419
6721
|
Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
|
|
6722
|
+
if der.shape[1] != draws.shape[1]:
|
|
6723
|
+
print('why')
|
|
6724
|
+
#TODO need to get the stuction of the rdms
|
|
6420
6725
|
for k, dist_k in enumerate(distribution):
|
|
6421
6726
|
if dist_k == 'ln_normal':
|
|
6727
|
+
if der.shape[1] != draws.shape[1]:
|
|
6728
|
+
print('why')
|
|
6422
6729
|
der[:, k, :] = Br_come_one[:, k, :]
|
|
6730
|
+
if der.shape[1] != draws.shape[1]:
|
|
6731
|
+
print('why')
|
|
6423
6732
|
elif dist_k == 'tn_normal':
|
|
6733
|
+
if der.shape[1] != draws.shape[1]:
|
|
6734
|
+
print('why')
|
|
6424
6735
|
der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
|
|
6736
|
+
if der.shape[1] != draws.shape[1]:
|
|
6737
|
+
print('why')
|
|
6425
6738
|
|
|
6739
|
+
if der.shape[1] != draws.shape[1]:
|
|
6740
|
+
print('why')
|
|
6426
6741
|
return der
|
|
6427
6742
|
|
|
6428
6743
|
def _copy_size_display_as_ones(self, matrix):
|