metacountregressor 0.1.78__py3-none-any.whl → 0.1.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,24 +30,24 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
- from sklearn.preprocessing import StandardScaler
33
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
-
35
+ import time
36
36
  try:
37
37
  from ._device_cust import device as dev
38
38
  from .pareto_file import Pareto, Solution
39
39
  from .data_split_helper import DataProcessor
40
40
  except ImportError:
41
- from metacountregressor._device_cust import device as dev
42
- from metacountregressor.pareto_file import Pareto, Solution
41
+ from _device_cust import device as dev
42
+ from pareto_file import Pareto, Solution
43
43
  from data_split_helper import DataProcessor
44
44
 
45
-
45
+ from scipy import stats
46
46
  np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
49
49
  # define the computation boundary limits
50
- min_comp_val = 1e-20
50
+ min_comp_val = 1e-160
51
51
  max_comp_val = 1e+200
52
52
  log_lik_min = -1e+200
53
53
  log_lik_max = 1e+200
@@ -122,22 +122,24 @@ class ObjectiveFunction(object):
122
122
 
123
123
  def __init__(self, x_data, y_data, **kwargs):
124
124
 
125
- self.reg_penalty = 1
125
+ self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
+ self.nb_parma = 1
127
128
  self.bic = None
128
129
  self.other_bic = False
129
130
  self.test_flag = 1
131
+ self.no_extra_param =1 #if true, fix dispersion. w
130
132
  if self.other_bic:
131
133
  print('change this to false latter ')
132
134
 
133
135
  # initialize values
134
- self.constant_value = -5.5
135
- self.negative_binomial_value = 0.05
136
+ self.constant_value = 0
137
+ self.negative_binomial_value = 1
136
138
 
137
- self.verbose_safe = True
139
+ self.verbose_safe = kwargs.get('verbose', 0)
138
140
  self.please_print = kwargs.get('please_print', 0)
139
141
  self.group_halton = None
140
- self.grad_yes = False
142
+ self.grad_yes = kwargs.get('grad_est', False)
141
143
  self.hess_yes = False
142
144
  self.group_halton_test = None
143
145
  self.panels = None
@@ -150,15 +152,15 @@ class ObjectiveFunction(object):
150
152
  self.dist_fit = None
151
153
 
152
154
  self.MAE = None
153
- self.best_obj_1 = 100000000.0
154
- self._obj_1 = 'bic'
155
- self._obj_2 = 'MSE'
155
+ self.best_obj_1 = 1000000.0
156
+ self._obj_1 = kwargs.get('_obj_1', 'bic')
157
+ self._obj_2 = kwargs.get('_obj_2', 'MSE')
156
158
  self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
157
159
  self.full_model = None
158
160
  self.GP_parameter = 0
159
- self.is_multi = 0
161
+ self.is_multi = kwargs.get('is_multi', False)
160
162
  self.complexity_level = 6
161
- self._max_iterations_improvement = 100
163
+ self._max_iterations_improvement = 10000
162
164
  self.generated_sln = set()
163
165
  self.ave_mae = 0
164
166
  # defalt paramaters for hs #TODO unpack into harmony search class
@@ -166,23 +168,32 @@ class ObjectiveFunction(object):
166
168
  self._hms = 20
167
169
  self._max_time = 60 * 60 * 24
168
170
  self._hmcr = .5
169
- self._par = 0.3
171
+ self._par = 0.3 #dont think this gets useted
170
172
  self._mpai = 1
171
173
  self._max_imp = 100000
172
- self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
174
+ self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
173
175
  self._panels = None
174
176
  self.is_multi = True
175
177
  self.method_ll = 'Nelder-Mead-BFGS'
178
+
176
179
  self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
177
- self.method_ll = 'BFGS_2'
180
+ self.method_ll = kwargs.get('method', 'BFGS_2')
181
+
182
+ #self.method_ll = 'Nelder-Mead-BFGS'
178
183
  self.Keep_Fit = 2
179
184
  self.MP = 0
180
185
  # Nelder-Mead-BFGS
181
186
 
182
- self._max_characteristics = 26
187
+ self._max_characteristics = kwargs.get('_max_vars', 26)
183
188
 
184
189
  self.beta_dict = dict
190
+ if 'model_terms' in kwargs:
191
+ print('change')
192
+ if kwargs.get('model_terms').get('group') is not None:
193
+ kwargs['group'] = kwargs.get('model_terms').get('group')
185
194
 
195
+ if kwargs.get('model_terms').get('panels') is not None:
196
+ kwargs['panels'] = kwargs.get('model_terms').get('panels')
186
197
  acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
187
198
  'algorithm', '_random_seed', '_max_time',
188
199
  'forcedvariables', '_obj_1', '_obj_2', '_par',
@@ -200,12 +211,17 @@ class ObjectiveFunction(object):
200
211
  if 'instance_number' in kwargs:
201
212
  self.instance_number = str(kwargs['instance_number'])
202
213
  else:
214
+
215
+ print('no name set, setting name as 0')
203
216
  self.instance_number = str(0) # set an arbitrary instance number
204
217
 
205
218
  if not os.path.exists(self.instance_number):
206
- os.makedirs(self.instance_number)
219
+ if kwargs.get('make_directory', True):
220
+ print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
221
+ os.makedirs(self.instance_number)
207
222
 
208
223
  if not hasattr(self, '_obj_1'):
224
+ print('_obj_1 required, define as bic, aic, ll')
209
225
  raise Exception
210
226
 
211
227
  self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
@@ -213,6 +229,11 @@ class ObjectiveFunction(object):
213
229
  self._maximize = False # do we maximize or minimize?
214
230
 
215
231
  x_data = sm.add_constant(x_data)
232
+ standardize_the_data = 0
233
+ if standardize_the_data:
234
+ print('we are standardize the data')
235
+ x_data = self.self_standardize_positive(x_data)
236
+
216
237
  self._input_data(x_data, y_data)
217
238
 
218
239
 
@@ -229,9 +250,12 @@ class ObjectiveFunction(object):
229
250
  self.test_percentage = float(kwargs.get('test_percentage', 0))
230
251
  self.val_percentage = float(kwargs.get('val_percentage', 0))
231
252
  if self.test_percentage == 0:
253
+ print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
254
+ print('continuing single objective')
255
+ time.sleep(2)
232
256
  self.is_multi = False
233
257
 
234
- if 'panels' in kwargs:
258
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
235
259
  self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
236
260
 
237
261
  x_data[kwargs['group']] = x_data[kwargs['group']].astype(
@@ -274,11 +298,11 @@ class ObjectiveFunction(object):
274
298
 
275
299
  #self.n_obs = N
276
300
  self._characteristics_names = list(self._x_data.columns)
277
- self._max_group_all_means = 1
301
+ self._max_group_all_means = 2
278
302
 
279
303
  exclude_this_test = [4]
280
304
 
281
- if 'panels' in kwargs:
305
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
282
306
  self.panels = np.asarray(df_train[kwargs['panels']])
283
307
  self.panels_test = np.asarray(df_test[kwargs['panels']])
284
308
  self.ids = np.asarray(
@@ -294,6 +318,8 @@ class ObjectiveFunction(object):
294
318
  self.group_halton = group.copy()
295
319
  self.group_dummies = pd.get_dummies(group)
296
320
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
321
+
322
+ Xnew = pd.DataFrame(Xnew, columns=X.columns)
297
323
  self.panel_info = panel_info
298
324
  self.N, self.P = panel_info.shape
299
325
  Xnew.drop(kwargs['panels'], axis=1, inplace=True)
@@ -301,9 +327,11 @@ class ObjectiveFunction(object):
301
327
  K = Xnew.shape[1]
302
328
  self._characteristics_names = list(Xnew.columns)
303
329
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
330
+ XX = XX.astype('float')
304
331
  self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
305
332
  self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
306
333
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
334
+ YY = YY.astype('float')
307
335
  self._x_data = XX.copy()
308
336
  self._y_data = YY.copy()
309
337
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
@@ -325,6 +353,7 @@ class ObjectiveFunction(object):
325
353
  K = X.shape[1]
326
354
  self.columns_names = X.columns
327
355
  X = X.values.reshape(self.N_test, self.P_test, K)
356
+ X = X.astype('float')
328
357
  self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
329
358
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
330
359
  Y = Y.astype('float')
@@ -337,6 +366,7 @@ class ObjectiveFunction(object):
337
366
 
338
367
 
339
368
  else:
369
+ print('No Panels. Grouped Random Paramaters Will not be estimated')
340
370
  self.G = None
341
371
  self._Gnum = 1
342
372
  self._max_group_all_means = 0
@@ -353,7 +383,9 @@ class ObjectiveFunction(object):
353
383
  K = Xnew.shape[1]
354
384
  self._characteristics_names = list(Xnew.columns)
355
385
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
386
+ XX = XX.astype('float')
356
387
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
388
+ YY = YY.astype('float')
357
389
  self._x_data = XX.copy()
358
390
  self._y_data = YY.copy()
359
391
 
@@ -369,7 +401,9 @@ class ObjectiveFunction(object):
369
401
  K = X.shape[1]
370
402
  self.columns_names = X.columns
371
403
  X = X.values.reshape(self.N_test, self.P_test, K)
404
+ X = X.astype('float')
372
405
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
406
+ Y = Y.astype('float')
373
407
  self._x_data_test = X.copy()
374
408
  self.y_data_test = Y.copy()
375
409
 
@@ -384,11 +418,13 @@ class ObjectiveFunction(object):
384
418
 
385
419
 
386
420
 
387
- self.Ndraws = 200 # todo: change back
421
+ self.Ndraws = kwargs.get('Ndraws', 200)
388
422
  self.draws1 = None
389
423
  self.initial_sig = 1 # pass the test of a single model
390
424
  self.pvalue_sig_value = .1
391
425
  self.observations = self._x_data.shape[0]
426
+ self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
427
+
392
428
  self.batch_size = None
393
429
  # open the file in the write mode
394
430
  self.grab_transforms = 0
@@ -400,17 +436,19 @@ class ObjectiveFunction(object):
400
436
  print('Setup Complete...')
401
437
  else:
402
438
  print('No Panels Supplied')
439
+ print('Setup Complete...')
403
440
  self._characteristics_names = list(self._x_data.columns)
404
441
  # define the variables
405
442
  # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
406
443
  self._transformations = ["no", "sqrt", "log", "arcsinh"]
407
444
  self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
408
-
445
+ self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
409
446
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
410
447
 
411
- self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'lm_normal', 'tn_normal'])
448
+ self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
412
449
 
413
450
  if self.G is not None:
451
+ #TODO need to handle this for groups
414
452
  self._distribution = ["trad| " + item for item in self._distribution
415
453
  ] + ["grpd| " + item for item in self._distribution]
416
454
 
@@ -422,18 +460,32 @@ class ObjectiveFunction(object):
422
460
 
423
461
  self.significant = 0
424
462
  # define the states of our explanatory variables
463
+
464
+
425
465
  self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
426
- kwargs.get('must_include', []))
466
+ kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
467
+
468
+
469
+
427
470
  self._discrete_values = self._discrete_values + \
428
- [[x for x in self._distribution]] * self._characteristics
471
+ self.define_distributions_analyst(extra=kwargs.get('decisions', None))
429
472
 
430
473
  if 'model_types' in kwargs:
431
474
  model_types = kwargs['model_types']
432
475
  else:
433
- model_types = [[0, 1]] # add 2 for Generalized Poisson
434
476
 
477
+
478
+ model_types = [[0, 1]] # add 2 for Generalized Poisson
479
+ #model_types = [[0]]
480
+ #TODO change back and fix NB
481
+ model_t_dict = {'Poisson':0,
482
+ "NB":1}
483
+ # Retrieve the keys (model names) corresponding to the values in model_types
484
+ model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
485
+ # Print the formatted result
486
+ print(f'The type of models possible will consider: {", ".join(model_keys)}')
435
487
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(
436
- self._transformations) + model_types
488
+ self._transformations, kwargs.get('decisions',None)) + model_types
437
489
 
438
490
  self._model_type_codes = ['p', 'nb',
439
491
  'gp', "pl", ["nb-theta", 'nb-dis']]
@@ -451,6 +503,7 @@ class ObjectiveFunction(object):
451
503
  #Manually fit from analyst specification
452
504
  manual_fit = kwargs.get('Manual_Fit')
453
505
  if manual_fit is not None:
506
+ print('fitting manual')
454
507
  self.process_manual_fit(manual_fit)
455
508
 
456
509
  self.solution_analyst = None
@@ -485,6 +538,7 @@ class ObjectiveFunction(object):
485
538
  if self.is_multi:
486
539
  self._offsets_test = self._x_data_test[:, :, val_od]
487
540
  self._x_data_test = self.remove_offset(self._x_data_test, val_od)
541
+ print(self._offsets)
488
542
  else:
489
543
  self.initialize_empty_offsets()
490
544
 
@@ -757,6 +811,8 @@ class ObjectiveFunction(object):
757
811
  if dispersion == 0:
758
812
  return None, None
759
813
  elif dispersion == 2 or dispersion == 1:
814
+ if self.no_extra_param:
815
+ return self.nb_parma, None
760
816
  return betas[-1], None
761
817
 
762
818
  elif dispersion == 3:
@@ -784,14 +840,65 @@ class ObjectiveFunction(object):
784
840
  par = np.nan_to_num(par)
785
841
  return par
786
842
 
787
- def define_alphas(self, complexity_level=4, exclude=[], include=[]):
843
+ def rename_distro(self, distro):
844
+ # Mapping dictionary
845
+ mapping = {
846
+ 'normal': ['normal', 'n', 'Normal'],
847
+ 'triangular': ['triangular', 't', 'Triangular'],
848
+ 'uniform': ['uniform', 'u', 'Uniform'],
849
+ 'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
850
+ 'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
851
+ }
852
+
853
+ # Use list comprehension with the mapping
854
+ reversed_mapping = {value: key for key, values in mapping.items() for value in values}
855
+
856
+ # Use the reversed mapping to find the corresponding key
857
+ new_distro = [reversed_mapping.get(i, i) for i in distro]
858
+ return new_distro
859
+
860
+ def define_distributions_analyst(self, extra = None):
861
+
862
+ if extra is not None:
863
+ set_alpha = []
864
+ for col in self._characteristics_names:
865
+ if col in extra[('Column')].values:
866
+ matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
867
+ distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
868
+ distro = self.rename_distro(distro)
869
+ set_alpha = set_alpha+[distro]
870
+ elif col == 'const':
871
+ set_alpha = set_alpha +[['normal']]
872
+ return set_alpha
873
+ return [[x for x in self._distribution]] * self._characteristics
874
+
875
+
876
+
877
+
878
+ def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
788
879
  'complexity level'
789
880
  '''
790
881
  2 is feature selection,
791
- 3 is random paramaters
792
- 4 is correlated random paramaters
882
+ 3 is random parameters
883
+ 4 is correlated random parameters
884
+
885
+ extra is the stuff defined by the Meta APP
793
886
  '''
794
887
  set_alpha = []
888
+ if extra is not None:
889
+ for col in self._characteristics_names:
890
+ if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
891
+ set_alpha = set_alpha + [[1]]
892
+ elif col == 'Offset':
893
+ set_alpha = set_alpha + [[1]]
894
+
895
+ elif col in extra[('Column')].values:
896
+ matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
897
+ check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
898
+ set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
899
+ return set_alpha
900
+
901
+
795
902
  for col in self._characteristics_names:
796
903
  if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
797
904
  set_alpha = set_alpha + [[1]]
@@ -841,8 +948,14 @@ class ObjectiveFunction(object):
841
948
 
842
949
  return ([self._model_type_codes[dispersion]])
843
950
 
844
- def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
845
- zi_fit=None, obj_1=None, model_nature=None):
951
+ def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
952
+ '''
953
+ setup for naming of the model summary
954
+ '''
955
+ if self.no_extra_param and dispersion ==1:
956
+
957
+ betas = np.append(betas, self.nb_parma)
958
+
846
959
  self.name_deleter = []
847
960
  group_rpm = None
848
961
  group_dist = []
@@ -962,13 +1075,15 @@ class ObjectiveFunction(object):
962
1075
  [''] * (len(names) - len(self.transform_id_names))
963
1076
  self.coeff_names = names
964
1077
 
1078
+ '''
965
1079
  if betas is not None:
966
1080
  try:
967
1081
  if len(betas) != len(names):
968
- print('no draws is', no_draws)
969
- print('fix_theano')
1082
+ print('standard_model', no_draws)
1083
+
970
1084
  except Exception as e:
971
1085
  print(e)
1086
+ '''
972
1087
 
973
1088
 
974
1089
 
@@ -993,7 +1108,8 @@ class ObjectiveFunction(object):
993
1108
  if not isinstance(self.pvalues, np.ndarray):
994
1109
  raise Exception
995
1110
 
996
-
1111
+ if 'nb' in self.coeff_names and self.no_extra_param:
1112
+ self.pvalues = np.append(self.pvalues,0)
997
1113
 
998
1114
  if self.please_print or save_state:
999
1115
 
@@ -1009,17 +1125,22 @@ class ObjectiveFunction(object):
1009
1125
 
1010
1126
  if solution is not None:
1011
1127
  print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1012
-
1128
+
1013
1129
  self.pvalues = [self.round_with_padding(
1014
1130
  x, 2) for x in self.pvalues]
1015
1131
  signif_list = self.pvalue_asterix_add(self.pvalues)
1016
1132
  if model == 1:
1017
1133
 
1018
- self.coeff_[-1] = np.abs(self.coeff_[-1])
1019
- if self.coeff_[-1] < 0.25:
1134
+ #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1135
+ if self.no_extra_param:
1136
+ self.coeff_ = np.append(self.coeff_, self.nb_parma)
1137
+ self.stderr = np.append(self.stderr, 0.00001)
1138
+ self.zvalues = np.append(self.zvalues, 50)
1139
+
1140
+ elif self.coeff_[-1] < 0.25:
1020
1141
  print(self.coeff_[-1], 'Warning Check Dispersion')
1021
1142
  print(np.exp(self.coeff_[-1]))
1022
- self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1143
+ #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1023
1144
 
1024
1145
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1025
1146
 
@@ -1231,7 +1352,7 @@ class ObjectiveFunction(object):
1231
1352
  with open(filename, 'w') as file:
1232
1353
  file.write(content)
1233
1354
 
1234
- def define_poissible_transforms(self, transforms) -> list:
1355
+ def define_poissible_transforms(self, transforms, extra= None) -> list:
1235
1356
  transform_set = []
1236
1357
  if not isinstance(self._x_data, pd.DataFrame):
1237
1358
  x_data = self._x_data.reshape(self.N * self.P, -1).copy()
@@ -1242,6 +1363,7 @@ class ObjectiveFunction(object):
1242
1363
 
1243
1364
  if 'AADT' in self._characteristics_names[col]:
1244
1365
  new_transform = [['log']]
1366
+ #new_transform = [['no']]
1245
1367
  transform_set = transform_set + new_transform
1246
1368
 
1247
1369
  elif all(x_data[col] <= 5):
@@ -1281,6 +1403,18 @@ class ObjectiveFunction(object):
1281
1403
 
1282
1404
  return transform_set
1283
1405
 
1406
+ def poisson_mean_get_dispersion(self, betas, X, y):
1407
+ eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1408
+ return_EV=True,
1409
+ zi_list=None, draws_grouped=None, Xgroup=None)
1410
+
1411
+ ab = ((y - eVy)**2 - eVy)/eVy
1412
+ bb = eVy -1
1413
+ disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1414
+ gamma = disp.params[0]
1415
+ #print(f'dispersion is {gamma}')
1416
+ return gamma
1417
+
1284
1418
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
1285
1419
  model_nature=None, halton=1, testing=1, validation=0):
1286
1420
  'validation if mu needs to be calculated'
@@ -1314,7 +1448,7 @@ class ObjectiveFunction(object):
1314
1448
  XG = model_nature.get('XGtest')[:total_percent, :, :]
1315
1449
  else:
1316
1450
  XG = model_nature.get('XGtest')[total_percent:, :, :]
1317
- print('chekc this is doing it wright')
1451
+
1318
1452
  else:
1319
1453
  if 'XG' in model_nature:
1320
1454
  XG = model_nature.get('XG')
@@ -1436,7 +1570,7 @@ class ObjectiveFunction(object):
1436
1570
  5: herogeneity_in _means
1437
1571
 
1438
1572
 
1439
- a: how to transofrm the original data
1573
+ a: how to transform the original data
1440
1574
  b: grab dispersion '''
1441
1575
 
1442
1576
  # todo: better way
@@ -1784,7 +1918,10 @@ class ObjectiveFunction(object):
1784
1918
  elif dispersion == 4:
1785
1919
  return 2
1786
1920
  else:
1787
- return 1
1921
+ if self.no_extra_param:
1922
+ return 0
1923
+ else:
1924
+ return 1
1788
1925
 
1789
1926
  def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
1790
1927
  return_violated_terms=0):
@@ -1799,6 +1936,7 @@ class ObjectiveFunction(object):
1799
1936
 
1800
1937
  else:
1801
1938
  slice_this_amount = self.num_dispersion_params(dispersion)
1939
+ slice_this_amount = 1 #TODO handle this
1802
1940
  if pvalues[-1] > sig_value:
1803
1941
  vio_counts += 1
1804
1942
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -2223,7 +2361,7 @@ class ObjectiveFunction(object):
2223
2361
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2224
2362
 
2225
2363
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2226
- obj_1 = 10.0 ** 8
2364
+ obj_1 = 10.0 ** 5
2227
2365
  obj_best = None
2228
2366
  sub_slns = list()
2229
2367
 
@@ -2234,7 +2372,7 @@ class ObjectiveFunction(object):
2234
2372
  try:
2235
2373
  self.repair(vector)
2236
2374
  except Exception as e:
2237
- print('prob here')
2375
+ print('prolem repairing here')
2238
2376
  print(vector)
2239
2377
  print(e)
2240
2378
  layout = vector.copy()
@@ -2481,7 +2619,7 @@ class ObjectiveFunction(object):
2481
2619
  random.seed(seed)
2482
2620
 
2483
2621
  def set_random_seed(self):
2484
- print('Imbdedding Seed', self._random_seed)
2622
+ print('Imbedding Seed', self._random_seed)
2485
2623
  np.random.seed(self._random_seed)
2486
2624
 
2487
2625
  random.seed(self._random_seed)
@@ -2515,7 +2653,7 @@ class ObjectiveFunction(object):
2515
2653
  self._hmcr = (
2516
2654
  self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
2517
2655
 
2518
- # end def
2656
+
2519
2657
 
2520
2658
  def update_par(self, iteration, is_sin=False):
2521
2659
  """
@@ -2683,7 +2821,7 @@ class ObjectiveFunction(object):
2683
2821
  grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
2684
2822
  return gradient, grad_n
2685
2823
 
2686
- def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
2824
+ def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
2687
2825
  """
2688
2826
  Negative Binomial model score (gradient) vector of the log-likelihood
2689
2827
  Parameters
@@ -2703,9 +2841,43 @@ class ObjectiveFunction(object):
2703
2841
 
2704
2842
  """
2705
2843
 
2706
- try:
2844
+ # Calculate common terms
2845
+ '''
2846
+ n = len(y)
2847
+ n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
2848
+
2849
+ # Flatten the data since there's only one panel, simplifying the operations
2850
+ X_flat = X.reshape(n * p, d)
2851
+ y_flat = y.flatten()
2852
+ mu_flat = mu.flatten()
2707
2853
 
2708
- alpha = params[-1]
2854
+ # Prepare score array
2855
+ score = np.zeros(d + 1) # +1 for alpha
2856
+
2857
+ # Compute the gradient for regression coefficients
2858
+ for j in range(d): # Exclude the last parameter (alpha)
2859
+ score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
2860
+
2861
+ # Compute the gradient for the dispersion parameter
2862
+ if obs_specific:
2863
+ # Adjust the calculation if observation-specific effects are considered
2864
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2865
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2866
+ score[-1] = np.sum(sum_terms)
2867
+ else:
2868
+ # Standard calculation
2869
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2870
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2871
+ score[-1] = np.sum(sum_terms)
2872
+ return score
2873
+ '''
2874
+ #return score
2875
+
2876
+ try:
2877
+ if alpha is None:
2878
+ alpha = np.exp(params[-1])
2879
+ else:
2880
+ alpha = np.exp(params[-1])
2709
2881
  a1 = 1 / alpha * mu ** Q
2710
2882
  prob = a1 / (a1 + mu)
2711
2883
  exog = X
@@ -2747,7 +2919,8 @@ class ObjectiveFunction(object):
2747
2919
  return np.concatenate((dparams, dalpha),
2748
2920
  axis=1)
2749
2921
  except Exception as e:
2750
- print('in ki nb probkemng')
2922
+ print(e)
2923
+ print('NB score exception problem..')
2751
2924
  exc_type, exc_obj, exc_tb = sys.exc_info()
2752
2925
  fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
2753
2926
  print(exc_type, fname, exc_tb.tb_lineno)
@@ -2840,7 +3013,7 @@ class ObjectiveFunction(object):
2840
3013
  argument = prob.mean(axis=1)
2841
3014
  # if less than 0 penalise
2842
3015
  if np.min(argument) < 0:
2843
- print('what the fuck')
3016
+ print('Error with args..')
2844
3017
  if np.min(argument) < limit:
2845
3018
  # add a penalty for too small argument of log
2846
3019
  log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3331,6 +3504,7 @@ class ObjectiveFunction(object):
3331
3504
  else:
3332
3505
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3333
3506
 
3507
+
3334
3508
  for ii, corr_pair in enumerate(corr_pairs):
3335
3509
  # lower cholesky matrix
3336
3510
  chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
@@ -3358,7 +3532,7 @@ class ObjectiveFunction(object):
3358
3532
  a = 0
3359
3533
  b = 0
3360
3534
  stuff = []
3361
- # todo get order
3535
+ # TODO get order
3362
3536
  for j, i in enumerate(list_sizes):
3363
3537
  br_mean = betas_hetro[a:i + a]
3364
3538
  a += i
@@ -3385,7 +3559,30 @@ class ObjectiveFunction(object):
3385
3559
  br_mean = betas_m
3386
3560
  br_sd = betas_sd # Last Kr positions
3387
3561
  # Compute: betas = mean + sd*draws
3388
- betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3562
+ if len(br_sd) != draws.shape[1]:
3563
+ #get the same size as the mean
3564
+ betas_random = self.Br.copy()
3565
+
3566
+ '''
3567
+ c = self.get_num_params()[3:5]
3568
+
3569
+ cor = []
3570
+ for i in range(c[0]):
3571
+ cor.append(i)
3572
+
3573
+ vall =[]
3574
+ for i, val in enumerate(reversed(br_sd)):
3575
+ vall.append()
3576
+
3577
+ remaining = draws.shape[1] - len(betas_sd)
3578
+ '''
3579
+
3580
+ else:
3581
+
3582
+
3583
+ betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3584
+
3585
+
3389
3586
  betas_random = self._apply_distribution(betas_random)
3390
3587
 
3391
3588
  return betas_random
@@ -3404,28 +3601,71 @@ class ObjectiveFunction(object):
3404
3601
  # if gamma <= 0.01: #min defined value for stable nb
3405
3602
  # gamma = 0.01
3406
3603
 
3604
+ #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
3605
+
3606
+ #gg = stats.poisson.rvs(g)
3607
+
3608
+
3609
+
3610
+
3407
3611
  endog = y
3408
3612
  mu = lam
3613
+ ''''
3614
+ mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
3615
+ alpha = np.exp(gamma)
3616
+
3617
+ '''
3409
3618
  alpha = gamma
3410
3619
  size = 1.0 / alpha * mu ** Q
3411
- alpha_size = alpha * mu ** Q
3412
- # prob = size/(size+mu)
3413
- prob = alpha / (alpha + mu)
3414
- # prob = 1/(1+mu*alpha)
3620
+
3621
+ prob = size/(size+mu)
3622
+
3623
+
3624
+
3625
+ '''test'''
3626
+
3627
+
3628
+ '''
3629
+ size = 1 / np.exp(gamma) * mu ** 0
3630
+ prob = size / (size + mu)
3631
+ coeff = (gammaln(size + y) - gammaln(y + 1) -
3632
+ gammaln(size))
3633
+ llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
3634
+ '''
3635
+
3415
3636
  try:
3416
3637
  # print(np.shape(y),np.shape(size), np.shape(prob))
3417
- # gg2 = self.negbinom_pmf(alpha_size, prob, y)
3638
+ #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
3639
+ #import time
3640
+ #start_time = time.time()
3418
3641
 
3642
+
3643
+ # Measure time for negbinom_pmf
3644
+ #start_time = time.time()
3645
+ #for _ in range(10000):
3646
+
3647
+
3648
+ #end_time = time.time()
3649
+ #print("Custom functieon time:", end_time - start_time)
3650
+ #start_time = time.time()
3651
+ #for _ in range(10000):
3652
+ '''
3419
3653
  gg = np.exp(
3420
3654
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3421
3655
  y + alpha) * np.log(mu + alpha))
3422
-
3423
- # gg1 = self.negbinom_pmf(alpha_size, prob, y)
3424
- # gg = nbinom.pmf(y ,alpha, prob)
3656
+ gg[np.isnan(gg)] = 1
3657
+ '''
3658
+ gg_alt = nbinom.pmf(y ,1/alpha, prob)
3659
+ #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
3660
+ #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
3661
+ #print('check theses')
3662
+ #gg = nbinom.pmf(y ,alpha, prob)
3663
+ #end_time = time.time()
3664
+ #print("Custom functieon time:", end_time - start_time)
3425
3665
 
3426
3666
  except Exception as e:
3427
- print(e)
3428
- return gg
3667
+ print("Neg Binom error.")
3668
+ return gg_alt
3429
3669
 
3430
3670
  def lindley_pmf(self, x, r, theta, k=50):
3431
3671
  """
@@ -3492,7 +3732,7 @@ class ObjectiveFunction(object):
3492
3732
 
3493
3733
  endog = y
3494
3734
  mu = lam
3495
- alpha = gamma
3735
+ alpha = np.exp(gamma)
3496
3736
  alpha = alpha * mu ** Q
3497
3737
  size = 1 / alpha * mu ** Q # also r
3498
3738
  # self.rate_param = size
@@ -3572,21 +3812,8 @@ class ObjectiveFunction(object):
3572
3812
 
3573
3813
  if dispersion == 1 or dispersion == 4: # nb
3574
3814
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3575
- # if b_gam < 0.8*model_nature['dispersion_penalty']:
3576
- # penalty += model_nature['dispersion_penalty'] -b_gam
3577
-
3578
- # if abs(b_gam) < 0.01:
3579
- # penalty += 1/np.abs(b_gam)
3580
-
3581
- if b_gam >= 4.5:
3582
- penalty += b_gam
3583
- b_gam = 4.61
3584
- # b_gam = 7.9
3585
- # penalty += model_nature['dispersion_penalty'] -b_gam
3586
- # penalty += 1/np.max((0.01,abs(b_gam)))
3587
- # b_gam = model_nature['dispersion_penalty']
3588
-
3589
- """
3815
+ #b_gam = 1/np.exp(b_gam)
3816
+ #print(b_gam)
3590
3817
  if b_gam <= 0:
3591
3818
  #penalty += 100
3592
3819
  #penalty += abs(b_gam)
@@ -3594,21 +3821,21 @@ class ObjectiveFunction(object):
3594
3821
  #b_gam = 1
3595
3822
 
3596
3823
  # if b_gam < 0.03:
3597
- penalty += 10
3824
+ penalty += min(1, np.abs(b_gam), 0)
3598
3825
 
3599
- b_gam = 0.03
3826
+ #b_gam = 0.001
3600
3827
  #
3601
3828
 
3602
- if b_gam >= 10:
3603
- penalty+= b_gam
3829
+ #if b_gam >= 10:
3830
+ # penalty+= b_gam
3604
3831
 
3605
- if b_gam == 0:
3606
- b_gam = min_comp_val
3832
+ # if b_gam == 0:
3833
+ #b_gam = min_comp_val
3607
3834
  #b_gam = 0.03
3608
3835
 
3609
- b_gam = abs(b_gam)
3836
+ # b_gam = abs(b_gam)
3610
3837
 
3611
- """
3838
+
3612
3839
 
3613
3840
  elif dispersion == 2:
3614
3841
  if b_gam >= 1:
@@ -3628,8 +3855,15 @@ class ObjectiveFunction(object):
3628
3855
  def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3629
3856
 
3630
3857
  # print('this was 0')
3631
- eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3858
+ if dispersion:
3859
+ eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3860
+
3861
+ #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3862
+ #print('check if this holds size')
3863
+ else:
3864
+ eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3632
3865
  eta = np.array(eta)
3866
+
3633
3867
  # eta = np.float64(eta)
3634
3868
  # eta = np.dot(Xd, params_main)+offset[:,:,0]
3635
3869
  # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3648,7 +3882,7 @@ class ObjectiveFunction(object):
3648
3882
 
3649
3883
  else:
3650
3884
  # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
3651
-
3885
+ eta = eta.astype('float')
3652
3886
  eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3653
3887
  return eVd
3654
3888
 
@@ -3761,9 +3995,10 @@ class ObjectiveFunction(object):
3761
3995
  elif dispersion == 1:
3762
3996
 
3763
3997
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
3764
- # print(1)
3998
+
3999
+
3765
4000
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3766
- # print('fuck if this actually works')
4001
+
3767
4002
 
3768
4003
  elif dispersion == 2:
3769
4004
 
@@ -3784,7 +4019,7 @@ class ObjectiveFunction(object):
3784
4019
  # proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
3785
4020
  proba_r = np.array(store)
3786
4021
  proba_r = np.atleast_2d(proba_r).T
3787
- print(1)
4022
+
3788
4023
 
3789
4024
  else:
3790
4025
  raise Exception('not implemented other modeling forms')
@@ -3793,7 +4028,7 @@ class ObjectiveFunction(object):
3793
4028
  proba_p = self._prob_product_across_panels(
3794
4029
  proba_r, self.panel_info)
3795
4030
  proba_r = proba_p
3796
- proba_r = np.clip(proba_r, min_comp_val, None)
4031
+ proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
3797
4032
  loglik = np.log(proba_r)
3798
4033
  return loglik
3799
4034
 
@@ -3801,6 +4036,8 @@ class ObjectiveFunction(object):
3801
4036
  if dispersion == 0 or dispersion == 3:
3802
4037
  return 0
3803
4038
  else:
4039
+
4040
+
3804
4041
  return 1
3805
4042
 
3806
4043
  def _prob_product_across_panels(self, pch, panel_info):
@@ -3856,7 +4093,7 @@ class ObjectiveFunction(object):
3856
4093
  if y[i] == 0:
3857
4094
  gr_e[i] = 0
3858
4095
 
3859
- if self.is_dispersion(dispersion):
4096
+ if self.is_dispersion(dispersion) and not self.no_extra_param:
3860
4097
  gr_d = np.zeros((N, 1))
3861
4098
  if dispersion == 1:
3862
4099
  # trying alt
@@ -3960,12 +4197,13 @@ class ObjectiveFunction(object):
3960
4197
  br, draws_, brstd, dis_fit_long) # (N,K,R)
3961
4198
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
3962
4199
  einsum_model_form, dtype=np.float64) # (N,K,R)
3963
- der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
3964
- der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
3965
- der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
3966
- print('which one of these')
4200
+ #der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4201
+ #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4202
+
4203
+ der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
4204
+
3967
4205
  der_t = self._compute_derivatives(
3968
- br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4206
+ br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
3969
4207
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
3970
4208
  der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
3971
4209
  der_t * proba_n[:, None, :] # (N,K,R)
@@ -4026,14 +4264,18 @@ class ObjectiveFunction(object):
4026
4264
  grad_n = self._concat_gradients(
4027
4265
  (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4028
4266
  else:
4029
- grad_n = self._concat_gradients(
4030
- (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4031
- grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4032
- grad_n = np.clip(grad_n, -1000, 1000)
4267
+ if self.no_extra_param:
4268
+ grad_n = self._concat_gradients(
4269
+ (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4270
+ else:
4271
+ grad_n = self._concat_gradients(
4272
+ (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4273
+ grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
4274
+ grad_n = np.clip(grad_n, -100, 100)
4033
4275
  n = np.shape(grad_n)[0]
4034
4276
  # subtract out mean gradient value
4035
- # grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4036
- # grad_n = grad_n_sub
4277
+ grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4278
+ grad_n = grad_n_sub
4037
4279
  grad = grad_n.sum(axis=0)
4038
4280
  return grad, grad_n
4039
4281
 
@@ -4095,9 +4337,9 @@ class ObjectiveFunction(object):
4095
4337
 
4096
4338
  elif dispersion == 1:
4097
4339
 
4098
- der = -self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4340
+ der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4099
4341
  if both:
4100
- grad_n = -self.NB_Score(betas, y, eVd, Xd, 0, True)
4342
+ grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
4101
4343
  return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
4102
4344
  neginf=-140)
4103
4345
 
@@ -4184,7 +4426,7 @@ class ObjectiveFunction(object):
4184
4426
  return proba_r.sum(axis=1), np.squeeze(proba_r)
4185
4427
 
4186
4428
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4187
- penalty_val = 0.05
4429
+ penalty_val = 0.1
4188
4430
  penalty_val_max = 130
4189
4431
 
4190
4432
  # print('change_later')
@@ -4200,8 +4442,8 @@ class ObjectiveFunction(object):
4200
4442
  if abs(i) > penalty_val_max:
4201
4443
  penalty += abs(i)
4202
4444
 
4203
- # if abs(i) < penalty_val:
4204
- # penalty += 5
4445
+ #if abs(i) < penalty_val:
4446
+ # penalty += 5
4205
4447
 
4206
4448
  # penalty = 0
4207
4449
  return penalty
@@ -4308,8 +4550,7 @@ class ObjectiveFunction(object):
4308
4550
  index += 1
4309
4551
 
4310
4552
  brstd = br_std
4311
- print(brstd)
4312
- print(brstd)
4553
+
4313
4554
 
4314
4555
 
4315
4556
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
@@ -4341,7 +4582,7 @@ class ObjectiveFunction(object):
4341
4582
  penalty = self._penalty_betas(
4342
4583
  betas, dispersion, penalty, float(len(y) / 10.0))
4343
4584
  self.n_obs = len(y) # feeds into gradient
4344
- if draws is None and draws_grouped is None and (
4585
+ if draws is None and draws_grouped is None and (model_nature is None or
4345
4586
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4346
4587
  #TODO do i shuffle the draws
4347
4588
  if type(Xd) == dict:
@@ -4351,7 +4592,7 @@ class ObjectiveFunction(object):
4351
4592
  P += Xd[key].shape[1]
4352
4593
  Kf += Xd[key].shape[2]
4353
4594
  else:
4354
- self.naming_for_printing(betas, 1, dispersion, zi_fit=zi_list, model_nature=model_nature)
4595
+ self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
4355
4596
  N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
4356
4597
  betas = np.array(betas)
4357
4598
  Bf = betas[0:Kf] # Fixed betas
@@ -4381,7 +4622,7 @@ class ObjectiveFunction(object):
4381
4622
  llf_main = self.loglik_obs(
4382
4623
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4383
4624
 
4384
- # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4625
+ llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4385
4626
 
4386
4627
  loglik = llf_main.sum()
4387
4628
 
@@ -4394,7 +4635,7 @@ class ObjectiveFunction(object):
4394
4635
  penalty = self.regularise_l2(betas)
4395
4636
 
4396
4637
  if not np.isreal(loglik):
4397
- loglik = - 1000000000.0
4638
+ loglik = - 10000000.0
4398
4639
 
4399
4640
  output = (-loglik + penalty,)
4400
4641
  if return_gradient:
@@ -4402,14 +4643,19 @@ class ObjectiveFunction(object):
4402
4643
  if return_gradient_n:
4403
4644
  der, grad_n = self.simple_score_grad(
4404
4645
  betas, y, eVd, Xd, dispersion, both=True)
4405
- return (-loglik + penalty, -der, grad_n)
4646
+ #return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
4647
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
4648
+ return scaled_tuple
4406
4649
  else:
4407
4650
  der = self.simple_score_grad(
4408
4651
  betas, y, eVd, Xd, dispersion, both=False)
4409
-
4410
- return (-loglik + penalty, -der.ravel())
4652
+ scaled_tuple = tuple(
4653
+ x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
4654
+ return scaled_tuple
4655
+ #return (-loglik + penalty, -der.ravel())*self.minimize_scaler
4411
4656
  else:
4412
- return -loglik + penalty
4657
+
4658
+ return (-loglik + penalty)*self.minimize_scaler
4413
4659
  # Else, we have draws
4414
4660
  self.n_obs = len(y) * self.Ndraws #todo is this problematic
4415
4661
  penalty += self._penalty_betas(
@@ -4420,7 +4666,7 @@ class ObjectiveFunction(object):
4420
4666
  # Kf =0
4421
4667
  betas = np.array(betas)
4422
4668
  betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
4423
- self.naming_for_printing(betas, 0, dispersion, zi_fit=zi_list, model_nature=model_nature)
4669
+ self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
4424
4670
  y = dev.to_gpu(y)
4425
4671
  if draws is not None and draws_grouped is not None:
4426
4672
  draws = np.concatenate((draws_grouped, draws), axis=1)
@@ -4483,7 +4729,9 @@ class ObjectiveFunction(object):
4483
4729
  Kf = 0
4484
4730
  else:
4485
4731
  if n_coeff != len(betas):
4486
- raise Exception
4732
+ raise Exception(
4733
+
4734
+ )
4487
4735
  Bf = betas[0:Kf] # Fixed betas
4488
4736
 
4489
4737
 
@@ -4509,11 +4757,11 @@ class ObjectiveFunction(object):
4509
4757
  # brstd), draws_) # Get random coefficients, old method
4510
4758
  Br = self._transform_rand_betas(br,
4511
4759
  brstd, draws_) # Get random coefficients
4512
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4760
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4513
4761
  self.Br = Br.copy()
4514
4762
 
4515
4763
  else:
4516
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4764
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4517
4765
  chol_mat = self._chol_mat(
4518
4766
  len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
4519
4767
  self.chol_mat = chol_mat.copy()
@@ -4585,7 +4833,8 @@ class ObjectiveFunction(object):
4585
4833
  eVd = self.lam_transform(eVd, dispersion, betas[-1])
4586
4834
 
4587
4835
  if self.is_dispersion(dispersion):
4588
- penalty, betas[-1] = self._penalty_dispersion(
4836
+ if not self.no_extra_param:
4837
+ penalty, betas[-1] = self._penalty_dispersion(
4589
4838
  dispersion, betas[-1], eVd, y, penalty, model_nature)
4590
4839
 
4591
4840
  '''
@@ -4629,38 +4878,22 @@ class ObjectiveFunction(object):
4629
4878
  proba.append(dev.to_cpu(proba_))
4630
4879
 
4631
4880
  lik = np.stack(proba).sum(axis=0) / R # (N, )
4632
- lik = np.clip(lik, min_comp_val, 10000)
4881
+ lik = np.clip(lik, min_comp_val, max_comp_val)
4633
4882
  # lik = np.nan_to_num(lik, )
4634
4883
  loglik = np.log(lik)
4635
4884
  llf_main = loglik
4636
- if 'exog_infl' in model_nature:
4637
- params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
4638
- params_main = Bf
4639
- exog_infl = model_nature.get('exog_inflX')
4640
- llf_main = llf_main.ravel() # TODO test this
4641
- w = self.predict_logit_part(params_infl, exog_infl)
4642
-
4643
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4644
-
4645
- zero_idx = np.nonzero(y == 0)[0]
4646
- nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
4647
-
4648
- llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
4649
- llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4650
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4651
- loglik = llf.sum()
4652
- else:
4653
4885
 
4654
- loglik = loglik.sum()
4886
+
4887
+ loglik = loglik.sum()
4655
4888
 
4656
4889
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4657
4890
  if self.power_up_ll:
4658
4891
  penalty += self.regularise_l2(betas)
4659
- loglik = 2*loglik
4892
+
4660
4893
  penalty += self.regularise_l2(betas)
4661
4894
  if not return_gradient:
4662
4895
 
4663
- output = (-loglik + penalty,)
4896
+ output = ((-loglik + penalty)*self.minimize_scaler,)
4664
4897
  if verbose > 1:
4665
4898
  print(
4666
4899
  f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
@@ -4690,19 +4923,24 @@ class ObjectiveFunction(object):
4690
4923
  # Hinv = np.linalg.inv(H)
4691
4924
  # except Exception:
4692
4925
  # Hinv = np.linalg.pinv(H)
4693
- output = (-loglik + penalty, -grad, grad_n)
4926
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
4927
+ return scaled_tuple
4928
+ #output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
4694
4929
 
4695
- return output
4930
+ #return output
4696
4931
  else:
4932
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
4933
+ return scaled_tuple
4934
+ #output = (-loglik + penalty, -grad)*self.minimize_scaler
4697
4935
 
4698
- output = (-loglik + penalty, -grad)
4699
-
4700
- return output
4936
+ #return output
4701
4937
  except Exception as e:
4702
4938
  traceback.print_exc()
4703
4939
  print(e)
4704
4940
 
4705
-
4941
+ def minimize_function(self, loglike):
4942
+ r'Takes the logliklihood function and tranforms it to a more handed minimization function'
4943
+ return loglike/self.n_obs
4706
4944
  def print_chol_mat(self, betas):
4707
4945
  print(self.chol_mat)
4708
4946
  self.get_br_and_bstd(betas)
@@ -4938,12 +5176,16 @@ class ObjectiveFunction(object):
4938
5176
  return H
4939
5177
 
4940
5178
  def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
4941
-
5179
+ #method = 'BFGS'
4942
5180
  if method == "BFGS":
4943
5181
 
4944
5182
  try:
5183
+ argbs = list(args)
4945
5184
 
4946
- return self._bfgs(loglik_fn, x, args=args, tol=tol, **options) # @IgnoreException
5185
+ argbs[7] = True
5186
+ argsb = tuple(argbs)
5187
+ a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
5188
+ return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
4947
5189
 
4948
5190
  except:
4949
5191
  return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
@@ -4966,7 +5208,7 @@ class ObjectiveFunction(object):
4966
5208
  H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
4967
5209
  result['Hessian'] = H
4968
5210
  result['hess_inv'] = np.linalg.pinv(H)
4969
- print('to do, only if hessian is fhfhfhf')
5211
+
4970
5212
  standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
4971
5213
  return result
4972
5214
  # return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
@@ -5190,7 +5432,7 @@ class ObjectiveFunction(object):
5190
5432
  if self.power_up_ll:
5191
5433
  loglikelihood =-optim_res['fun']/2 - penalty
5192
5434
  else:
5193
- loglikelihood = -optim_res['fun'] - penalty
5435
+ loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
5194
5436
 
5195
5437
  # self.coeff_names = coeff_names
5196
5438
  # self.total_iter = optim_res['nit']
@@ -5237,7 +5479,7 @@ class ObjectiveFunction(object):
5237
5479
  return a
5238
5480
 
5239
5481
  def fitRegression(self, mod,
5240
- dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5482
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5241
5483
 
5242
5484
  """
5243
5485
  Fits a poisson regression given data and outcomes if dispersion is not declared
@@ -5249,12 +5491,12 @@ class ObjectiveFunction(object):
5249
5491
  betas_est - array. Coefficients which maximize the negative log-liklihood.
5250
5492
  """
5251
5493
  # Set defualt method
5252
- sub_zi = None
5253
- exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5254
- inf_betas = 0 if exog_infl is None else len(exog_infl)
5494
+ #TODO, the inital fit worked but it throws
5495
+
5496
+
5255
5497
 
5256
5498
  sol = Solution()
5257
- log_ll = 10.0 ** 9
5499
+
5258
5500
  tol = {'ftol': 1e-8, 'gtol': 1e-6}
5259
5501
  is_delete = 0
5260
5502
  dispersion = mod.get('dispersion')
@@ -5266,10 +5508,7 @@ class ObjectiveFunction(object):
5266
5508
  if self.hess_yes == False:
5267
5509
  method2 = 'BFGS_2'
5268
5510
  method2 = self.method_ll
5269
- # method2 = 'BFGS_2'
5270
5511
 
5271
- # method2 = 'BFGS_2'
5272
- # method2 = 'dogleg'
5273
5512
  bic = None
5274
5513
  pvalue_alt = None
5275
5514
  zvalues = None
@@ -5286,8 +5525,10 @@ class ObjectiveFunction(object):
5286
5525
  _g, pg, kg = 0, 0, 0
5287
5526
 
5288
5527
  dispersion_param_num = self.is_dispersion(dispersion)
5528
+ if self.no_extra_param:
5529
+ dispersion_param_num =0
5289
5530
 
5290
- paramNum = self.get_param_num(dispersion)
5531
+ #paramNum = self.get_param_num(dispersion)
5291
5532
  self.no_random_paramaters = 0
5292
5533
  if 'XG' in mod:
5293
5534
  XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
@@ -5313,7 +5554,7 @@ class ObjectiveFunction(object):
5313
5554
  XX_test = mod.get('Xr_test')
5314
5555
 
5315
5556
  bb = np.random.uniform(
5316
- -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num + inf_betas)
5557
+ -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
5317
5558
 
5318
5559
  if method == 'L-BFGS-B':
5319
5560
  if dispersion == 0:
@@ -5340,17 +5581,28 @@ class ObjectiveFunction(object):
5340
5581
  else:
5341
5582
  bb[0] = self.constant_value
5342
5583
  if dispersion == 1:
5343
- bb[-1] = self.negative_binomial_value
5584
+ if not self.no_extra_param:
5585
+ bb[-1] = self.negative_binomial_value
5344
5586
  bounds = None
5345
5587
 
5588
+
5589
+
5346
5590
  # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5347
5591
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5348
- initial_beta = self._minimize(self._loglik_gradient, bb,
5592
+
5593
+ if self.no_extra_param:
5594
+ dispersion_poisson = 0
5595
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5349
5596
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5350
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None,
5597
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5351
5598
  mod),
5352
5599
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5353
5600
  bounds=bounds)
5601
+ if dispersion:
5602
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5603
+
5604
+
5605
+
5354
5606
 
5355
5607
  if method2 == 'L-BFGS-B':
5356
5608
  if hasattr(initial_beta.hess_inv, 'todense'):
@@ -5363,7 +5615,7 @@ class ObjectiveFunction(object):
5363
5615
  if initial_beta is not None and np.isnan(initial_beta['fun']):
5364
5616
  initial_beta = self._minimize(self._loglik_gradient, bb,
5365
5617
  args=(XX, y, None, None, None, None, True, True, dispersion,
5366
- 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5618
+ 0, False, 0, None, None, None, None, None, mod),
5367
5619
  method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
5368
5620
 
5369
5621
  if initial_beta is not None and not np.isnan(initial_beta['fun']):
@@ -5387,24 +5639,24 @@ class ObjectiveFunction(object):
5387
5639
  loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
5388
5640
 
5389
5641
  self.naming_for_printing(
5390
- initial_beta['x'], 1, dispersion, zi_fit=sub_zi, model_nature=mod)
5642
+ initial_beta['x'], 1, dispersion, model_nature=mod)
5391
5643
 
5392
5644
  if self.is_multi:
5393
5645
  in_sample_mae = self.validation(
5394
5646
  initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
5395
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5647
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5396
5648
  testing=0)
5397
5649
 
5398
5650
  sol.add_objective(TRAIN=in_sample_mae)
5399
5651
  MAE_out = self.validation(
5400
5652
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5401
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0)
5653
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
5402
5654
  sol.add_objective(TEST=MAE_out)
5403
5655
 
5404
5656
  if self.val_percentage >0:
5405
5657
  MAE_VAL = self.validation(
5406
5658
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5407
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5659
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5408
5660
  validation=1)
5409
5661
  sol.add_objective(VAL=MAE_VAL)
5410
5662
  if sol[self._obj_1] <= self.best_obj_1:
@@ -5448,7 +5700,7 @@ class ObjectiveFunction(object):
5448
5700
 
5449
5701
  b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
5450
5702
  self.none_handler(self.rdm_fit)) + len(
5451
- self.none_handler(self.rdm_cor_fit)) else b[i] / 1.3 for i in range(len(b))]
5703
+ self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
5452
5704
  else:
5453
5705
  b = bb
5454
5706
 
@@ -5458,9 +5710,10 @@ class ObjectiveFunction(object):
5458
5710
  else:
5459
5711
  b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
5460
5712
  if dispersion == 1:
5461
- b[-1] = np.abs(b[-1])
5462
- if b[-1] > 10:
5463
- b[-1] = 5
5713
+ if not self.no_extra_param:
5714
+ b[-1] = np.abs(b[-1])
5715
+ if b[-1] > 10:
5716
+ b[-1] = 5
5464
5717
  elif dispersion == 2:
5465
5718
  b[-1] = .5
5466
5719
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
@@ -5509,9 +5762,6 @@ class ObjectiveFunction(object):
5509
5762
 
5510
5763
  bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
5511
5764
  count += 1
5512
-
5513
-
5514
-
5515
5765
  elif ii < jj:
5516
5766
  if bob2[count] > 0:
5517
5767
 
@@ -5584,18 +5834,35 @@ class ObjectiveFunction(object):
5584
5834
  mod['dispersion_penalty'] = np.abs(b[-1])
5585
5835
  grad_args = (
5586
5836
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
5587
- None, exog_infl, draws_grouped, XG, mod)
5837
+ None, None, draws_grouped, XG, mod)
5588
5838
  # self.gradients_est_yes = (1, 1)
5589
5839
 
5590
5840
  if draws is None and draws_hetro is not None:
5591
5841
  print('hold')
5592
- betas_est = self._minimize(self._loglik_gradient, b, args=(
5593
- X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5594
- self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
5595
- method=method2, tol=tol['ftol'],
5596
- options={'gtol': tol['gtol']}, bounds=bounds,
5597
- hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5842
+ #self.grad_yes = True
5843
+ #self.hess_yes = True
5598
5844
 
5845
+ if self.no_extra_param:
5846
+ dispersion_poisson = 0
5847
+ betas_est = self._minimize(self._loglik_gradient, b, args=(
5848
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5849
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5850
+ method=method2, tol=tol['ftol'],
5851
+ options={'gtol': tol['gtol']}, bounds=bounds,
5852
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5853
+ if dispersion:
5854
+ initial_fit_beta = betas_est.x
5855
+ parmas = np.append(initial_fit_beta, nb_parma)
5856
+ self.nb_parma = nb_parma
5857
+ #print(f'neg binomi,{self.nb_parma}')
5858
+ betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5859
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5860
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5861
+ method=method2, tol=tol['ftol'],
5862
+ options={'gtol': tol['gtol']}, bounds=bounds,
5863
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5864
+
5865
+ #print('refit with estimation of NB')
5599
5866
  # self.numerical_hessian_calc = True
5600
5867
  if self.numerical_hessian_calc:
5601
5868
  try:
@@ -5610,7 +5877,7 @@ class ObjectiveFunction(object):
5610
5877
  betas_est = self._minimize(self._loglik_gradient, b, args=(
5611
5878
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
5612
5879
  self.rdm_cor_fit,
5613
- None, exog_infl, draws_grouped, XG, mod),
5880
+ None, None, draws_grouped, XG, mod),
5614
5881
  method=method2, tol=tol['ftol'],
5615
5882
  options={'gtol': tol['gtol']})
5616
5883
 
@@ -5646,7 +5913,7 @@ class ObjectiveFunction(object):
5646
5913
 
5647
5914
  paramNum = len(betas_est['x'])
5648
5915
  self.naming_for_printing(
5649
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5916
+ betas_est['x'], 0, dispersion, model_nature=mod)
5650
5917
 
5651
5918
  sol.add_objective(bic=bic, aic=aic,
5652
5919
  loglik=log_ll, num_parm=paramNum, GOF=other_measures)
@@ -5656,19 +5923,19 @@ class ObjectiveFunction(object):
5656
5923
  try:
5657
5924
 
5658
5925
  in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
5659
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5926
+ rdm_cor_fit=self.rdm_cor_fit,
5660
5927
  model_nature=mod, testing=0)
5661
5928
  sol.add_objective(TRAIN=in_sample_mae)
5662
5929
  y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
5663
5930
  Xr_grouped_test = mod.get('Xrtest')
5664
5931
  MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
5665
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5932
+ rdm_cor_fit=self.rdm_cor_fit,
5666
5933
  model_nature=mod)
5667
5934
 
5668
5935
  sol.add_objective(TEST=MAE_test)
5669
- if self.val_percentage >0:
5936
+ if self.val_percentage > 0:
5670
5937
  MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
5671
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5938
+ rdm_cor_fit=self.rdm_cor_fit,
5672
5939
  model_nature=mod, validation=1)
5673
5940
  sol.add_objective(VAL=MAE_val)
5674
5941
 
@@ -5894,7 +6161,7 @@ class ObjectiveFunction(object):
5894
6161
  return delim + self._model_type_codes[dispersion]
5895
6162
 
5896
6163
  def self_standardize_positive(self, X):
5897
- scaler = StandardScaler()
6164
+ scaler = MinMaxScaler()
5898
6165
  if type(X) == list:
5899
6166
  return X
5900
6167
 
@@ -5904,12 +6171,26 @@ class ObjectiveFunction(object):
5904
6171
  # Reshaping to 2D - combining the last two dimensions
5905
6172
  df_tf_reshaped = X.reshape(original_shape[0], -1)
5906
6173
  df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
5907
- df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6174
+ #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
5908
6175
  # Reshape back to original 3D shape if necessary
5909
6176
  df_tf = df_tf_scaled.reshape(original_shape)
5910
6177
  return df_tf
5911
6178
  else:
5912
- raise X
6179
+ # Initialize the MinMaxScaler
6180
+ scaler = MinMaxScaler()
6181
+ float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
6182
+ non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
6183
+
6184
+ # Fit the scaler to the float columns and transform them
6185
+ X[float_columns] = scaler.fit_transform(X[float_columns])
6186
+ # Fit the scaler to the data and transform it
6187
+ #scaled_data = scaler.fit_transform(X)
6188
+
6189
+ # Convert the result back to a DataFrame
6190
+ #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
6191
+
6192
+
6193
+ return X
5913
6194
 
5914
6195
  def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
5915
6196
  *args, **kwargs):
@@ -5964,8 +6245,9 @@ class ObjectiveFunction(object):
5964
6245
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
5965
6246
  t, idx, df_test[:, :, idx])
5966
6247
  if np.max(df_tf[:, :, idx]) >= 77000:
6248
+ #TODO need to normalise the data
5967
6249
 
5968
- raise Exception('should not be possible')
6250
+ print('should not be possible')
5969
6251
 
5970
6252
  self.define_selfs_fixed_rdm_cor(model_nature)
5971
6253
  indices = self.get_named_indices(self.fixed_fit)
@@ -6022,7 +6304,7 @@ class ObjectiveFunction(object):
6022
6304
  model_nature['XH'] = XH
6023
6305
  X_test = None
6024
6306
  if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
6025
- raise Exception('there is some kind of error')
6307
+ raise Exception('there is some kind of error in X')
6026
6308
 
6027
6309
  # numpy data setup fpr estimation
6028
6310
  indices2 = self.get_named_indices(self.rdm_fit)
@@ -6105,24 +6387,24 @@ class ObjectiveFunction(object):
6105
6387
  if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
6106
6388
  self.bic = obj_1['bic']
6107
6389
  self.pvalues = pvalues
6108
- if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", "zi", 'grp', 'xh']):
6390
+ if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
6109
6391
  # todo: probably delete
6110
6392
  self.naming_for_printing(
6111
- pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'], obj_1['zi_fit'],
6393
+ pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6112
6394
  obj_1, model_nature)
6113
6395
  else:
6114
6396
  if is_delete == 0:
6115
6397
  # todo: probably delete
6116
6398
  self.naming_for_printing(
6117
6399
  pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6118
- obj_1['zi_fit'], obj_1, model_nature)
6400
+ obj_1, model_nature)
6119
6401
  self.coeff_ = betas
6120
6402
  self.stderr = stderr
6121
6403
  self.zvalues = zvalues
6122
6404
  self.log_lik = log_lik
6123
6405
  if self.significant == 0:
6124
6406
 
6125
- print(self.full_model, 'full model is')
6407
+
6126
6408
  if not self.test_flag:
6127
6409
  alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6128
6410
  self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
@@ -6173,6 +6455,53 @@ class ObjectiveFunction(object):
6173
6455
 
6174
6456
  return obj_1, model_nature
6175
6457
 
6458
+ def get_X_tril(self):
6459
+ '''For correlations find the repeating terms'''
6460
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6461
+ rv_count_all = 0
6462
+ chol_count = 0
6463
+ rv_count = 0
6464
+ corr_indices = []
6465
+ rv_indices = []
6466
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6467
+ if var in self.none_handler(self.rdm_cor_fit):
6468
+ is_correlated = True
6469
+ else:
6470
+ is_correlated = False
6471
+
6472
+ rv_count_all += 1
6473
+ if is_correlated:
6474
+ chol_count += 1
6475
+ else:
6476
+ rv_count += 1
6477
+
6478
+ if var in self.none_handler(self.rdm_cor_fit):
6479
+
6480
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6481
+
6482
+ else:
6483
+ rv_indices.append(rv_count_all - 1)
6484
+
6485
+ # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6486
+ draws_tril_idx = np.array([corr_indices[j]
6487
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6488
+ for j in range(i + 1)]) # varnames pos.
6489
+ X_tril_idx = np.array([corr_indices[i]
6490
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6491
+ for j in range(i + 1)])
6492
+ # Find the s.d. for random variables that are not correlated
6493
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6494
+ range_var = [x for x in
6495
+ range(len(self.none_handler(var_uncor)))]
6496
+ range_var = sorted(range_var)
6497
+ draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6498
+ X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
6499
+ draws_tril_idx = draws_tril_idx.astype(int)
6500
+ X_tril_idx = X_tril_idx.astype(int)
6501
+ return X_tril_idx
6502
+
6503
+
6504
+
6176
6505
  def modifyn(self, data):
6177
6506
  select_data = self._characteristics_names
6178
6507
  alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
@@ -6380,23 +6709,35 @@ class ObjectiveFunction(object):
6380
6709
  # N, D = draws.shape[0], draws.shape[1]
6381
6710
  N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
6382
6711
  der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
6383
- if len(self.none_handler(self.rdm_cor_fit)) == 0:
6384
- Br_come_one = self.Br.copy()
6385
- # Br_come_one =
6386
- else:
6387
6712
 
6388
- Br_come_one = self.Br.copy()
6389
6713
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6390
6714
  #todo make sure this works for ln and truncated normal
6391
6715
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6392
- print('check this, intesection shouldn not happen for all')
6716
+
6717
+ #print('check this, intesection shouldn not happen for all')
6718
+
6719
+ if der.shape[1] != draws.shape[1]:
6720
+ print('why')
6393
6721
  Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6722
+ if der.shape[1] != draws.shape[1]:
6723
+ print('why')
6724
+ #TODO need to get the stuction of the rdms
6394
6725
  for k, dist_k in enumerate(distribution):
6395
6726
  if dist_k == 'ln_normal':
6727
+ if der.shape[1] != draws.shape[1]:
6728
+ print('why')
6396
6729
  der[:, k, :] = Br_come_one[:, k, :]
6730
+ if der.shape[1] != draws.shape[1]:
6731
+ print('why')
6397
6732
  elif dist_k == 'tn_normal':
6733
+ if der.shape[1] != draws.shape[1]:
6734
+ print('why')
6398
6735
  der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
6736
+ if der.shape[1] != draws.shape[1]:
6737
+ print('why')
6399
6738
 
6739
+ if der.shape[1] != draws.shape[1]:
6740
+ print('why')
6400
6741
  return der
6401
6742
 
6402
6743
  def _copy_size_display_as_ones(self, matrix):