metacountregressor 0.1.93__py3-none-any.whl → 0.1.97__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,19 +30,19 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
- from sklearn.preprocessing import StandardScaler
33
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
-
35
+ import time
36
36
  try:
37
37
  from ._device_cust import device as dev
38
38
  from .pareto_file import Pareto, Solution
39
39
  from .data_split_helper import DataProcessor
40
40
  except ImportError:
41
- from metacountregressor._device_cust import device as dev
42
- from metacountregressor.pareto_file import Pareto, Solution
41
+ from _device_cust import device as dev
42
+ from pareto_file import Pareto, Solution
43
43
  from data_split_helper import DataProcessor
44
44
 
45
-
45
+ from scipy import stats
46
46
  np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
@@ -124,10 +124,11 @@ class ObjectiveFunction(object):
124
124
 
125
125
  self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
-
127
+ self.nb_parma = 1
128
128
  self.bic = None
129
129
  self.other_bic = False
130
130
  self.test_flag = 1
131
+ self.no_extra_param =1 #if true, fix dispersion. w
131
132
  if self.other_bic:
132
133
  print('change this to false latter ')
133
134
 
@@ -135,10 +136,10 @@ class ObjectiveFunction(object):
135
136
  self.constant_value = 0
136
137
  self.negative_binomial_value = 1
137
138
 
138
- self.verbose_safe = True
139
+ self.verbose_safe = kwargs.get('verbose', 0)
139
140
  self.please_print = kwargs.get('please_print', 0)
140
141
  self.group_halton = None
141
- self.grad_yes = False
142
+ self.grad_yes = kwargs.get('grad_est', False)
142
143
  self.hess_yes = False
143
144
  self.group_halton_test = None
144
145
  self.panels = None
@@ -151,15 +152,15 @@ class ObjectiveFunction(object):
151
152
  self.dist_fit = None
152
153
 
153
154
  self.MAE = None
154
- self.best_obj_1 = 100000000.0
155
- self._obj_1 = 'bic'
156
- self._obj_2 = 'MSE'
155
+ self.best_obj_1 = 1000000.0
156
+ self._obj_1 = kwargs.get('_obj_1', 'bic')
157
+ self._obj_2 = kwargs.get('_obj_2', 'MSE')
157
158
  self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
158
159
  self.full_model = None
159
160
  self.GP_parameter = 0
160
- self.is_multi = 0
161
+ self.is_multi = kwargs.get('is_multi', False)
161
162
  self.complexity_level = 6
162
- self._max_iterations_improvement = 100
163
+ self._max_iterations_improvement = 10000
163
164
  self.generated_sln = set()
164
165
  self.ave_mae = 0
165
166
  # defalt paramaters for hs #TODO unpack into harmony search class
@@ -167,23 +168,32 @@ class ObjectiveFunction(object):
167
168
  self._hms = 20
168
169
  self._max_time = 60 * 60 * 24
169
170
  self._hmcr = .5
170
- self._par = 0.3
171
+ self._par = 0.3 #dont think this gets useted
171
172
  self._mpai = 1
172
173
  self._max_imp = 100000
173
174
  self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
174
175
  self._panels = None
175
176
  self.is_multi = True
176
177
  self.method_ll = 'Nelder-Mead-BFGS'
178
+
177
179
  self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
178
- self.method_ll = 'BFGS_2'
180
+ self.method_ll = kwargs.get('method', 'BFGS_2')
181
+
182
+ #self.method_ll = 'Nelder-Mead-BFGS'
179
183
  self.Keep_Fit = 2
180
184
  self.MP = 0
181
185
  # Nelder-Mead-BFGS
182
186
 
183
- self._max_characteristics = 26
187
+ self._max_characteristics = kwargs.get('_max_vars', 26)
184
188
 
185
189
  self.beta_dict = dict
190
+ if 'model_terms' in kwargs:
191
+ print('change')
192
+ if kwargs.get('model_terms').get('group') is not None:
193
+ kwargs['group'] = kwargs.get('model_terms').get('group')
186
194
 
195
+ if kwargs.get('model_terms').get('panels') is not None:
196
+ kwargs['panels'] = kwargs.get('model_terms').get('panels')
187
197
  acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
188
198
  'algorithm', '_random_seed', '_max_time',
189
199
  'forcedvariables', '_obj_1', '_obj_2', '_par',
@@ -201,12 +211,17 @@ class ObjectiveFunction(object):
201
211
  if 'instance_number' in kwargs:
202
212
  self.instance_number = str(kwargs['instance_number'])
203
213
  else:
214
+
215
+ print('no name set, setting name as 0')
204
216
  self.instance_number = str(0) # set an arbitrary instance number
205
217
 
206
218
  if not os.path.exists(self.instance_number):
207
- os.makedirs(self.instance_number)
219
+ if kwargs.get('make_directory', True):
220
+ print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
221
+ os.makedirs(self.instance_number)
208
222
 
209
223
  if not hasattr(self, '_obj_1'):
224
+ print('_obj_1 required, define as bic, aic, ll')
210
225
  raise Exception
211
226
 
212
227
  self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
@@ -214,6 +229,11 @@ class ObjectiveFunction(object):
214
229
  self._maximize = False # do we maximize or minimize?
215
230
 
216
231
  x_data = sm.add_constant(x_data)
232
+ standardize_the_data = 0
233
+ if standardize_the_data:
234
+ print('we are standardize the data')
235
+ x_data = self.self_standardize_positive(x_data)
236
+
217
237
  self._input_data(x_data, y_data)
218
238
 
219
239
 
@@ -230,13 +250,17 @@ class ObjectiveFunction(object):
230
250
  self.test_percentage = float(kwargs.get('test_percentage', 0))
231
251
  self.val_percentage = float(kwargs.get('val_percentage', 0))
232
252
  if self.test_percentage == 0:
253
+ print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
254
+ print('continuing single objective')
255
+ time.sleep(2)
233
256
  self.is_multi = False
234
257
 
235
- if 'panels' in kwargs:
236
- self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
258
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
259
+ if kwargs.get('group') is not None:
260
+ self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
237
261
 
238
- x_data[kwargs['group']] = x_data[kwargs['group']].astype(
239
- 'category').cat.codes
262
+ x_data[kwargs['group']] = x_data[kwargs['group']].astype(
263
+ 'category').cat.codes
240
264
  self.complexity_level = 6
241
265
  # create test dataset
242
266
 
@@ -275,26 +299,31 @@ class ObjectiveFunction(object):
275
299
 
276
300
  #self.n_obs = N
277
301
  self._characteristics_names = list(self._x_data.columns)
278
- self._max_group_all_means = 1
302
+ self._max_group_all_means = 2
279
303
 
280
304
  exclude_this_test = [4]
281
305
 
282
- if 'panels' in kwargs:
306
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
283
307
  self.panels = np.asarray(df_train[kwargs['panels']])
284
308
  self.panels_test = np.asarray(df_test[kwargs['panels']])
285
309
  self.ids = np.asarray(
286
310
  df_train[kwargs['panels']]) if kwargs['panels'] is not None else None
287
311
  self.ids_test = np.asarray(
288
312
  df_test[kwargs['panels']]) if kwargs['panels'] is not None else None
289
- groupll = np.asarray(df_train[kwargs['group']].astype(
290
- 'category').cat.codes)
291
- group_test = np.asarray(df_test[kwargs['group']].astype(
292
- 'category').cat.codes)
313
+ if kwargs.get('group') is not None:
314
+ groupll = np.asarray(df_train[kwargs['group']].astype(
315
+ 'category').cat.codes)
316
+ group_test = np.asarray(df_test[kwargs['group']].astype(
317
+ 'category').cat.codes)
318
+ else:
319
+ groupll = None
293
320
  X, Y, panel, group = self._arrange_long_format(
294
321
  df_train, y_train, self.ids, self.ids, groupll)
295
322
  self.group_halton = group.copy()
296
323
  self.group_dummies = pd.get_dummies(group)
297
324
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
325
+
326
+ Xnew = pd.DataFrame(Xnew, columns=X.columns)
298
327
  self.panel_info = panel_info
299
328
  self.N, self.P = panel_info.shape
300
329
  Xnew.drop(kwargs['panels'], axis=1, inplace=True)
@@ -302,9 +331,11 @@ class ObjectiveFunction(object):
302
331
  K = Xnew.shape[1]
303
332
  self._characteristics_names = list(Xnew.columns)
304
333
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
334
+ XX = XX.astype('float')
305
335
  self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
306
336
  self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
307
337
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
338
+ YY = YY.astype('float')
308
339
  self._x_data = XX.copy()
309
340
  self._y_data = YY.copy()
310
341
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
@@ -326,6 +357,7 @@ class ObjectiveFunction(object):
326
357
  K = X.shape[1]
327
358
  self.columns_names = X.columns
328
359
  X = X.values.reshape(self.N_test, self.P_test, K)
360
+ X = X.astype('float')
329
361
  self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
330
362
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
331
363
  Y = Y.astype('float')
@@ -338,6 +370,7 @@ class ObjectiveFunction(object):
338
370
 
339
371
 
340
372
  else:
373
+ print('No Panels. Grouped Random Paramaters Will not be estimated')
341
374
  self.G = None
342
375
  self._Gnum = 1
343
376
  self._max_group_all_means = 0
@@ -354,7 +387,9 @@ class ObjectiveFunction(object):
354
387
  K = Xnew.shape[1]
355
388
  self._characteristics_names = list(Xnew.columns)
356
389
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
390
+ XX = XX.astype('float')
357
391
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
392
+ YY = YY.astype('float')
358
393
  self._x_data = XX.copy()
359
394
  self._y_data = YY.copy()
360
395
 
@@ -370,7 +405,9 @@ class ObjectiveFunction(object):
370
405
  K = X.shape[1]
371
406
  self.columns_names = X.columns
372
407
  X = X.values.reshape(self.N_test, self.P_test, K)
408
+ X = X.astype('float')
373
409
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
410
+ Y = Y.astype('float')
374
411
  self._x_data_test = X.copy()
375
412
  self.y_data_test = Y.copy()
376
413
 
@@ -385,7 +422,7 @@ class ObjectiveFunction(object):
385
422
 
386
423
 
387
424
 
388
- self.Ndraws = 200 # todo: change back
425
+ self.Ndraws = kwargs.get('Ndraws', 200)
389
426
  self.draws1 = None
390
427
  self.initial_sig = 1 # pass the test of a single model
391
428
  self.pvalue_sig_value = .1
@@ -403,17 +440,19 @@ class ObjectiveFunction(object):
403
440
  print('Setup Complete...')
404
441
  else:
405
442
  print('No Panels Supplied')
443
+ print('Setup Complete...')
406
444
  self._characteristics_names = list(self._x_data.columns)
407
445
  # define the variables
408
446
  # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
409
447
  self._transformations = ["no", "sqrt", "log", "arcsinh"]
410
448
  self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
411
-
449
+ self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
412
450
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
413
451
 
414
- self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'lm_normal', 'tn_normal'])
452
+ self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
415
453
 
416
454
  if self.G is not None:
455
+ #TODO need to handle this for groups
417
456
  self._distribution = ["trad| " + item for item in self._distribution
418
457
  ] + ["grpd| " + item for item in self._distribution]
419
458
 
@@ -425,18 +464,32 @@ class ObjectiveFunction(object):
425
464
 
426
465
  self.significant = 0
427
466
  # define the states of our explanatory variables
467
+
468
+
428
469
  self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
429
- kwargs.get('must_include', []))
470
+ kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
471
+
472
+
473
+
430
474
  self._discrete_values = self._discrete_values + \
431
- [[x for x in self._distribution]] * self._characteristics
475
+ self.define_distributions_analyst(extra=kwargs.get('decisions', None))
432
476
 
433
477
  if 'model_types' in kwargs:
434
478
  model_types = kwargs['model_types']
435
479
  else:
436
- model_types = [[0, 1]] # add 2 for Generalized Poisson
437
480
 
481
+
482
+ model_types = [[0, 1]] # add 2 for Generalized Poisson
483
+ #model_types = [[0]]
484
+ #TODO change back and fix NB
485
+ model_t_dict = {'Poisson':0,
486
+ "NB":1}
487
+ # Retrieve the keys (model names) corresponding to the values in model_types
488
+ model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
489
+ # Print the formatted result
490
+ print(f'The type of models possible will consider: {", ".join(model_keys)}')
438
491
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(
439
- self._transformations) + model_types
492
+ self._transformations, kwargs.get('decisions',None)) + model_types
440
493
 
441
494
  self._model_type_codes = ['p', 'nb',
442
495
  'gp', "pl", ["nb-theta", 'nb-dis']]
@@ -452,8 +505,9 @@ class ObjectiveFunction(object):
452
505
  self._max_hurdle = 4
453
506
 
454
507
  #Manually fit from analyst specification
455
- manual_fit = kwargs.get('Manual_Fit')
508
+ manual_fit = kwargs.get('Manual_Fit', None)
456
509
  if manual_fit is not None:
510
+ print('fitting manual')
457
511
  self.process_manual_fit(manual_fit)
458
512
 
459
513
  self.solution_analyst = None
@@ -488,6 +542,7 @@ class ObjectiveFunction(object):
488
542
  if self.is_multi:
489
543
  self._offsets_test = self._x_data_test[:, :, val_od]
490
544
  self._x_data_test = self.remove_offset(self._x_data_test, val_od)
545
+ #print(self._offsets)
491
546
  else:
492
547
  self.initialize_empty_offsets()
493
548
 
@@ -760,6 +815,8 @@ class ObjectiveFunction(object):
760
815
  if dispersion == 0:
761
816
  return None, None
762
817
  elif dispersion == 2 or dispersion == 1:
818
+ if self.no_extra_param:
819
+ return self.nb_parma, None
763
820
  return betas[-1], None
764
821
 
765
822
  elif dispersion == 3:
@@ -787,14 +844,65 @@ class ObjectiveFunction(object):
787
844
  par = np.nan_to_num(par)
788
845
  return par
789
846
 
790
- def define_alphas(self, complexity_level=4, exclude=[], include=[]):
847
+ def rename_distro(self, distro):
848
+ # Mapping dictionary
849
+ mapping = {
850
+ 'normal': ['normal', 'n', 'Normal'],
851
+ 'triangular': ['triangular', 't', 'Triangular'],
852
+ 'uniform': ['uniform', 'u', 'Uniform'],
853
+ 'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
854
+ 'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
855
+ }
856
+
857
+ # Use list comprehension with the mapping
858
+ reversed_mapping = {value: key for key, values in mapping.items() for value in values}
859
+
860
+ # Use the reversed mapping to find the corresponding key
861
+ new_distro = [reversed_mapping.get(i, i) for i in distro]
862
+ return new_distro
863
+
864
+ def define_distributions_analyst(self, extra = None):
865
+
866
+ if extra is not None:
867
+ set_alpha = []
868
+ for col in self._characteristics_names:
869
+ if col in extra[('Column')].values:
870
+ matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
871
+ distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
872
+ distro = self.rename_distro(distro)
873
+ set_alpha = set_alpha+[distro]
874
+ elif col == 'const':
875
+ set_alpha = set_alpha +[['normal']]
876
+ return set_alpha
877
+ return [[x for x in self._distribution]] * self._characteristics
878
+
879
+
880
+
881
+
882
+ def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
791
883
  'complexity level'
792
884
  '''
793
885
  2 is feature selection,
794
- 3 is random paramaters
795
- 4 is correlated random paramaters
886
+ 3 is random parameters
887
+ 4 is correlated random parameters
888
+
889
+ extra is the stuff defined by the Meta APP
796
890
  '''
797
891
  set_alpha = []
892
+ if extra is not None:
893
+ for col in self._characteristics_names:
894
+ if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
895
+ set_alpha = set_alpha + [[1]]
896
+ elif col == 'Offset':
897
+ set_alpha = set_alpha + [[1]]
898
+
899
+ elif col in extra[('Column')].values:
900
+ matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
901
+ check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
902
+ set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
903
+ return set_alpha
904
+
905
+
798
906
  for col in self._characteristics_names:
799
907
  if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
800
908
  set_alpha = set_alpha + [[1]]
@@ -845,10 +953,12 @@ class ObjectiveFunction(object):
845
953
  return ([self._model_type_codes[dispersion]])
846
954
 
847
955
  def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
848
- r'''
956
+ '''
849
957
  setup for naming of the model summary
850
958
  '''
959
+ if self.no_extra_param and dispersion ==1:
851
960
 
961
+ betas = np.append(betas, self.nb_parma)
852
962
 
853
963
  self.name_deleter = []
854
964
  group_rpm = None
@@ -969,13 +1079,15 @@ class ObjectiveFunction(object):
969
1079
  [''] * (len(names) - len(self.transform_id_names))
970
1080
  self.coeff_names = names
971
1081
 
1082
+ '''
972
1083
  if betas is not None:
973
1084
  try:
974
1085
  if len(betas) != len(names):
975
- print('no draws is', no_draws)
976
- print('fix_theano')
1086
+ print('standard_model', no_draws)
1087
+
977
1088
  except Exception as e:
978
1089
  print(e)
1090
+ '''
979
1091
 
980
1092
 
981
1093
 
@@ -1000,7 +1112,8 @@ class ObjectiveFunction(object):
1000
1112
  if not isinstance(self.pvalues, np.ndarray):
1001
1113
  raise Exception
1002
1114
 
1003
-
1115
+ if 'nb' in self.coeff_names and self.no_extra_param:
1116
+ self.pvalues = np.append(self.pvalues,0)
1004
1117
 
1005
1118
  if self.please_print or save_state:
1006
1119
 
@@ -1016,17 +1129,22 @@ class ObjectiveFunction(object):
1016
1129
 
1017
1130
  if solution is not None:
1018
1131
  print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1019
-
1132
+
1020
1133
  self.pvalues = [self.round_with_padding(
1021
1134
  x, 2) for x in self.pvalues]
1022
1135
  signif_list = self.pvalue_asterix_add(self.pvalues)
1023
1136
  if model == 1:
1024
1137
 
1025
- self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1026
- if self.coeff_[-1] < 0.25:
1138
+ #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1139
+ if self.no_extra_param:
1140
+ self.coeff_ = np.append(self.coeff_, self.nb_parma)
1141
+ self.stderr = np.append(self.stderr, 0.00001)
1142
+ self.zvalues = np.append(self.zvalues, 50)
1143
+
1144
+ elif self.coeff_[-1] < 0.25:
1027
1145
  print(self.coeff_[-1], 'Warning Check Dispersion')
1028
1146
  print(np.exp(self.coeff_[-1]))
1029
- self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1147
+ #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1030
1148
 
1031
1149
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1032
1150
 
@@ -1238,7 +1356,7 @@ class ObjectiveFunction(object):
1238
1356
  with open(filename, 'w') as file:
1239
1357
  file.write(content)
1240
1358
 
1241
- def define_poissible_transforms(self, transforms) -> list:
1359
+ def define_poissible_transforms(self, transforms, extra= None) -> list:
1242
1360
  transform_set = []
1243
1361
  if not isinstance(self._x_data, pd.DataFrame):
1244
1362
  x_data = self._x_data.reshape(self.N * self.P, -1).copy()
@@ -1249,6 +1367,7 @@ class ObjectiveFunction(object):
1249
1367
 
1250
1368
  if 'AADT' in self._characteristics_names[col]:
1251
1369
  new_transform = [['log']]
1370
+ #new_transform = [['no']]
1252
1371
  transform_set = transform_set + new_transform
1253
1372
 
1254
1373
  elif all(x_data[col] <= 5):
@@ -1288,6 +1407,18 @@ class ObjectiveFunction(object):
1288
1407
 
1289
1408
  return transform_set
1290
1409
 
1410
+ def poisson_mean_get_dispersion(self, betas, X, y):
1411
+ eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1412
+ return_EV=True,
1413
+ zi_list=None, draws_grouped=None, Xgroup=None)
1414
+
1415
+ ab = ((y - eVy)**2 - eVy)/eVy
1416
+ bb = eVy -1
1417
+ disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1418
+ gamma = disp.params[0]
1419
+ #print(f'dispersion is {gamma}')
1420
+ return gamma
1421
+
1291
1422
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
1292
1423
  model_nature=None, halton=1, testing=1, validation=0):
1293
1424
  'validation if mu needs to be calculated'
@@ -1321,7 +1452,7 @@ class ObjectiveFunction(object):
1321
1452
  XG = model_nature.get('XGtest')[:total_percent, :, :]
1322
1453
  else:
1323
1454
  XG = model_nature.get('XGtest')[total_percent:, :, :]
1324
- print('chekc this is doing it wright')
1455
+
1325
1456
  else:
1326
1457
  if 'XG' in model_nature:
1327
1458
  XG = model_nature.get('XG')
@@ -1443,7 +1574,7 @@ class ObjectiveFunction(object):
1443
1574
  5: herogeneity_in _means
1444
1575
 
1445
1576
 
1446
- a: how to transofrm the original data
1577
+ a: how to transform the original data
1447
1578
  b: grab dispersion '''
1448
1579
 
1449
1580
  # todo: better way
@@ -1791,7 +1922,10 @@ class ObjectiveFunction(object):
1791
1922
  elif dispersion == 4:
1792
1923
  return 2
1793
1924
  else:
1794
- return 1
1925
+ if self.no_extra_param:
1926
+ return 0
1927
+ else:
1928
+ return 1
1795
1929
 
1796
1930
  def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
1797
1931
  return_violated_terms=0):
@@ -1806,6 +1940,7 @@ class ObjectiveFunction(object):
1806
1940
 
1807
1941
  else:
1808
1942
  slice_this_amount = self.num_dispersion_params(dispersion)
1943
+ slice_this_amount = 1 #TODO handle this
1809
1944
  if pvalues[-1] > sig_value:
1810
1945
  vio_counts += 1
1811
1946
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -2230,7 +2365,7 @@ class ObjectiveFunction(object):
2230
2365
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2231
2366
 
2232
2367
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2233
- obj_1 = 10.0 ** 8
2368
+ obj_1 = 10.0 ** 4
2234
2369
  obj_best = None
2235
2370
  sub_slns = list()
2236
2371
 
@@ -2241,7 +2376,7 @@ class ObjectiveFunction(object):
2241
2376
  try:
2242
2377
  self.repair(vector)
2243
2378
  except Exception as e:
2244
- print('prob here')
2379
+ print('prolem repairing here')
2245
2380
  print(vector)
2246
2381
  print(e)
2247
2382
  layout = vector.copy()
@@ -2322,7 +2457,7 @@ class ObjectiveFunction(object):
2322
2457
 
2323
2458
 
2324
2459
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2325
- obj_1[self._obj_1] = 10 ** 9
2460
+ obj_1[self._obj_1] = 10 ** 5
2326
2461
  else:
2327
2462
  if obj_1[self._obj_1] <= 0:
2328
2463
  obj_1[self._obj_1] = 10 ** 9
@@ -2488,7 +2623,7 @@ class ObjectiveFunction(object):
2488
2623
  random.seed(seed)
2489
2624
 
2490
2625
  def set_random_seed(self):
2491
- print('Imbdedding Seed', self._random_seed)
2626
+ print('Imbedding Seed', self._random_seed)
2492
2627
  np.random.seed(self._random_seed)
2493
2628
 
2494
2629
  random.seed(self._random_seed)
@@ -2522,7 +2657,7 @@ class ObjectiveFunction(object):
2522
2657
  self._hmcr = (
2523
2658
  self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
2524
2659
 
2525
- # end def
2660
+
2526
2661
 
2527
2662
  def update_par(self, iteration, is_sin=False):
2528
2663
  """
@@ -2742,10 +2877,6 @@ class ObjectiveFunction(object):
2742
2877
  '''
2743
2878
  #return score
2744
2879
 
2745
-
2746
-
2747
-
2748
-
2749
2880
  try:
2750
2881
  if alpha is None:
2751
2882
  alpha = np.exp(params[-1])
@@ -2886,7 +3017,7 @@ class ObjectiveFunction(object):
2886
3017
  argument = prob.mean(axis=1)
2887
3018
  # if less than 0 penalise
2888
3019
  if np.min(argument) < 0:
2889
- print('what the fuck')
3020
+ print('Error with args..')
2890
3021
  if np.min(argument) < limit:
2891
3022
  # add a penalty for too small argument of log
2892
3023
  log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3377,6 +3508,7 @@ class ObjectiveFunction(object):
3377
3508
  else:
3378
3509
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3379
3510
 
3511
+
3380
3512
  for ii, corr_pair in enumerate(corr_pairs):
3381
3513
  # lower cholesky matrix
3382
3514
  chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
@@ -3404,7 +3536,7 @@ class ObjectiveFunction(object):
3404
3536
  a = 0
3405
3537
  b = 0
3406
3538
  stuff = []
3407
- # todo get order
3539
+ # TODO get order
3408
3540
  for j, i in enumerate(list_sizes):
3409
3541
  br_mean = betas_hetro[a:i + a]
3410
3542
  a += i
@@ -3431,7 +3563,32 @@ class ObjectiveFunction(object):
3431
3563
  br_mean = betas_m
3432
3564
  br_sd = betas_sd # Last Kr positions
3433
3565
  # Compute: betas = mean + sd*draws
3434
- betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3566
+ if len(br_sd) != draws.shape[1]:
3567
+ #get the same size as the mean
3568
+ #if hasattr(self.Br):
3569
+ # betas_random = self.Br.copy()
3570
+ #else:
3571
+ betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3572
+ '''
3573
+ c = self.get_num_params()[3:5]
3574
+
3575
+ cor = []
3576
+ for i in range(c[0]):
3577
+ cor.append(i)
3578
+
3579
+ vall =[]
3580
+ for i, val in enumerate(reversed(br_sd)):
3581
+ vall.append()
3582
+
3583
+ remaining = draws.shape[1] - len(betas_sd)
3584
+ '''
3585
+
3586
+ else:
3587
+
3588
+
3589
+ betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3590
+
3591
+
3435
3592
  betas_random = self._apply_distribution(betas_random)
3436
3593
 
3437
3594
  return betas_random
@@ -3450,21 +3607,38 @@ class ObjectiveFunction(object):
3450
3607
  # if gamma <= 0.01: #min defined value for stable nb
3451
3608
  # gamma = 0.01
3452
3609
 
3610
+ #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
3453
3611
 
3612
+ #gg = stats.poisson.rvs(g)
3454
3613
 
3614
+
3455
3615
 
3616
+
3456
3617
  endog = y
3457
3618
  mu = lam
3619
+ ''''
3620
+ mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
3458
3621
  alpha = np.exp(gamma)
3459
- #size = 1.0 / alpha * mu ** Q
3460
- alpha_size = alpha * mu ** Q
3461
- # prob = size/(size+mu)
3462
- prob = alpha / (alpha + mu)
3463
- # prob = 1/(1+mu*alpha)
3622
+
3623
+ '''
3624
+ alpha = gamma
3625
+ size = 1.0 / alpha * mu ** Q
3626
+
3627
+ prob = size/(size+mu)
3628
+
3629
+
3464
3630
 
3465
3631
  '''test'''
3466
3632
 
3467
3633
 
3634
+ '''
3635
+ size = 1 / np.exp(gamma) * mu ** 0
3636
+ prob = size / (size + mu)
3637
+ coeff = (gammaln(size + y) - gammaln(y + 1) -
3638
+ gammaln(size))
3639
+ llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
3640
+ '''
3641
+
3468
3642
  try:
3469
3643
  # print(np.shape(y),np.shape(size), np.shape(prob))
3470
3644
  #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
@@ -3476,22 +3650,28 @@ class ObjectiveFunction(object):
3476
3650
  #start_time = time.time()
3477
3651
  #for _ in range(10000):
3478
3652
 
3479
- #gg = self.negbinom_pmf(alpha_size, prob, y)
3653
+
3480
3654
  #end_time = time.time()
3481
3655
  #print("Custom functieon time:", end_time - start_time)
3482
3656
  #start_time = time.time()
3483
3657
  #for _ in range(10000):
3658
+ '''
3484
3659
  gg = np.exp(
3485
3660
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3486
3661
  y + alpha) * np.log(mu + alpha))
3487
3662
  gg[np.isnan(gg)] = 1
3663
+ '''
3664
+ gg_alt = nbinom.pmf(y ,1/alpha, prob)
3665
+ #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
3666
+ #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
3667
+ #print('check theses')
3488
3668
  #gg = nbinom.pmf(y ,alpha, prob)
3489
3669
  #end_time = time.time()
3490
3670
  #print("Custom functieon time:", end_time - start_time)
3491
3671
 
3492
3672
  except Exception as e:
3493
- print(e)
3494
- return gg
3673
+ print("Neg Binom error.")
3674
+ return gg_alt
3495
3675
 
3496
3676
  def lindley_pmf(self, x, r, theta, k=50):
3497
3677
  """
@@ -3638,8 +3818,8 @@ class ObjectiveFunction(object):
3638
3818
 
3639
3819
  if dispersion == 1 or dispersion == 4: # nb
3640
3820
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3641
-
3642
-
3821
+ #b_gam = 1/np.exp(b_gam)
3822
+ #print(b_gam)
3643
3823
  if b_gam <= 0:
3644
3824
  #penalty += 100
3645
3825
  #penalty += abs(b_gam)
@@ -3647,9 +3827,9 @@ class ObjectiveFunction(object):
3647
3827
  #b_gam = 1
3648
3828
 
3649
3829
  # if b_gam < 0.03:
3650
- penalty += min(1, np.abs(b_gam))
3830
+ penalty += min(1, np.abs(b_gam), 0)
3651
3831
 
3652
- b_gam = 0.001
3832
+ #b_gam = 0.001
3653
3833
  #
3654
3834
 
3655
3835
  #if b_gam >= 10:
@@ -3681,8 +3861,15 @@ class ObjectiveFunction(object):
3681
3861
  def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3682
3862
 
3683
3863
  # print('this was 0')
3684
- eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3864
+ if dispersion:
3865
+ eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3866
+
3867
+ #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3868
+ #print('check if this holds size')
3869
+ else:
3870
+ eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3685
3871
  eta = np.array(eta)
3872
+
3686
3873
  # eta = np.float64(eta)
3687
3874
  # eta = np.dot(Xd, params_main)+offset[:,:,0]
3688
3875
  # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3701,7 +3888,7 @@ class ObjectiveFunction(object):
3701
3888
 
3702
3889
  else:
3703
3890
  # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
3704
-
3891
+ eta = eta.astype('float')
3705
3892
  eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3706
3893
  return eVd
3707
3894
 
@@ -3817,7 +4004,7 @@ class ObjectiveFunction(object):
3817
4004
 
3818
4005
 
3819
4006
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3820
- # print('fuck if this actually works')
4007
+
3821
4008
 
3822
4009
  elif dispersion == 2:
3823
4010
 
@@ -3838,7 +4025,7 @@ class ObjectiveFunction(object):
3838
4025
  # proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
3839
4026
  proba_r = np.array(store)
3840
4027
  proba_r = np.atleast_2d(proba_r).T
3841
- print(1)
4028
+
3842
4029
 
3843
4030
  else:
3844
4031
  raise Exception('not implemented other modeling forms')
@@ -3855,6 +4042,8 @@ class ObjectiveFunction(object):
3855
4042
  if dispersion == 0 or dispersion == 3:
3856
4043
  return 0
3857
4044
  else:
4045
+
4046
+
3858
4047
  return 1
3859
4048
 
3860
4049
  def _prob_product_across_panels(self, pch, panel_info):
@@ -3910,7 +4099,7 @@ class ObjectiveFunction(object):
3910
4099
  if y[i] == 0:
3911
4100
  gr_e[i] = 0
3912
4101
 
3913
- if self.is_dispersion(dispersion):
4102
+ if self.is_dispersion(dispersion) and not self.no_extra_param:
3914
4103
  gr_d = np.zeros((N, 1))
3915
4104
  if dispersion == 1:
3916
4105
  # trying alt
@@ -4014,12 +4203,13 @@ class ObjectiveFunction(object):
4014
4203
  br, draws_, brstd, dis_fit_long) # (N,K,R)
4015
4204
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
4016
4205
  einsum_model_form, dtype=np.float64) # (N,K,R)
4017
- der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4018
- der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4019
- der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
4020
- print('which one of these')
4206
+ #der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4207
+ #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4208
+
4209
+ der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
4210
+
4021
4211
  der_t = self._compute_derivatives(
4022
- br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4212
+ br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
4023
4213
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
4024
4214
  der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
4025
4215
  der_t * proba_n[:, None, :] # (N,K,R)
@@ -4080,14 +4270,18 @@ class ObjectiveFunction(object):
4080
4270
  grad_n = self._concat_gradients(
4081
4271
  (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4082
4272
  else:
4083
- grad_n = self._concat_gradients(
4084
- (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4085
- grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4086
- grad_n = np.clip(grad_n, -1000, 1000)
4273
+ if self.no_extra_param:
4274
+ grad_n = self._concat_gradients(
4275
+ (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4276
+ else:
4277
+ grad_n = self._concat_gradients(
4278
+ (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4279
+ grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
4280
+ grad_n = np.clip(grad_n, -100, 100)
4087
4281
  n = np.shape(grad_n)[0]
4088
4282
  # subtract out mean gradient value
4089
- # grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4090
- # grad_n = grad_n_sub
4283
+ grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4284
+ grad_n = grad_n_sub
4091
4285
  grad = grad_n.sum(axis=0)
4092
4286
  return grad, grad_n
4093
4287
 
@@ -4238,7 +4432,7 @@ class ObjectiveFunction(object):
4238
4432
  return proba_r.sum(axis=1), np.squeeze(proba_r)
4239
4433
 
4240
4434
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4241
- penalty_val = 0.05
4435
+ penalty_val = 0.1
4242
4436
  penalty_val_max = 130
4243
4437
 
4244
4438
  # print('change_later')
@@ -4254,8 +4448,8 @@ class ObjectiveFunction(object):
4254
4448
  if abs(i) > penalty_val_max:
4255
4449
  penalty += abs(i)
4256
4450
 
4257
- # if abs(i) < penalty_val:
4258
- # penalty += 5
4451
+ #if abs(i) < penalty_val:
4452
+ # penalty += 5
4259
4453
 
4260
4454
  # penalty = 0
4261
4455
  return penalty
@@ -4362,8 +4556,7 @@ class ObjectiveFunction(object):
4362
4556
  index += 1
4363
4557
 
4364
4558
  brstd = br_std
4365
- print(brstd)
4366
- print(brstd)
4559
+
4367
4560
 
4368
4561
 
4369
4562
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
@@ -4395,7 +4588,7 @@ class ObjectiveFunction(object):
4395
4588
  penalty = self._penalty_betas(
4396
4589
  betas, dispersion, penalty, float(len(y) / 10.0))
4397
4590
  self.n_obs = len(y) # feeds into gradient
4398
- if draws is None and draws_grouped is None and (
4591
+ if draws is None and draws_grouped is None and (model_nature is None or
4399
4592
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4400
4593
  #TODO do i shuffle the draws
4401
4594
  if type(Xd) == dict:
@@ -4448,7 +4641,7 @@ class ObjectiveFunction(object):
4448
4641
  penalty = self.regularise_l2(betas)
4449
4642
 
4450
4643
  if not np.isreal(loglik):
4451
- loglik = - 1000000000.0
4644
+ loglik = - 10000000.0
4452
4645
 
4453
4646
  output = (-loglik + penalty,)
4454
4647
  if return_gradient:
@@ -4542,7 +4735,9 @@ class ObjectiveFunction(object):
4542
4735
  Kf = 0
4543
4736
  else:
4544
4737
  if n_coeff != len(betas):
4545
- raise Exception
4738
+ raise Exception(
4739
+
4740
+ )
4546
4741
  Bf = betas[0:Kf] # Fixed betas
4547
4742
 
4548
4743
 
@@ -4644,7 +4839,8 @@ class ObjectiveFunction(object):
4644
4839
  eVd = self.lam_transform(eVd, dispersion, betas[-1])
4645
4840
 
4646
4841
  if self.is_dispersion(dispersion):
4647
- penalty, betas[-1] = self._penalty_dispersion(
4842
+ if not self.no_extra_param:
4843
+ penalty, betas[-1] = self._penalty_dispersion(
4648
4844
  dispersion, betas[-1], eVd, y, penalty, model_nature)
4649
4845
 
4650
4846
  '''
@@ -4688,7 +4884,7 @@ class ObjectiveFunction(object):
4688
4884
  proba.append(dev.to_cpu(proba_))
4689
4885
 
4690
4886
  lik = np.stack(proba).sum(axis=0) / R # (N, )
4691
- lik = np.clip(lik, min_comp_val, 10000)
4887
+ lik = np.clip(lik, min_comp_val, max_comp_val)
4692
4888
  # lik = np.nan_to_num(lik, )
4693
4889
  loglik = np.log(lik)
4694
4890
  llf_main = loglik
@@ -5018,7 +5214,7 @@ class ObjectiveFunction(object):
5018
5214
  H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
5019
5215
  result['Hessian'] = H
5020
5216
  result['hess_inv'] = np.linalg.pinv(H)
5021
- print('to do, only if hessian is fhfhfhf')
5217
+
5022
5218
  standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
5023
5219
  return result
5024
5220
  # return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
@@ -5289,7 +5485,7 @@ class ObjectiveFunction(object):
5289
5485
  return a
5290
5486
 
5291
5487
  def fitRegression(self, mod,
5292
- dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5488
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5293
5489
 
5294
5490
  """
5295
5491
  Fits a poisson regression given data and outcomes if dispersion is not declared
@@ -5306,7 +5502,7 @@ class ObjectiveFunction(object):
5306
5502
 
5307
5503
 
5308
5504
  sol = Solution()
5309
- log_ll = 10.0 ** 9
5505
+
5310
5506
  tol = {'ftol': 1e-8, 'gtol': 1e-6}
5311
5507
  is_delete = 0
5312
5508
  dispersion = mod.get('dispersion')
@@ -5335,6 +5531,8 @@ class ObjectiveFunction(object):
5335
5531
  _g, pg, kg = 0, 0, 0
5336
5532
 
5337
5533
  dispersion_param_num = self.is_dispersion(dispersion)
5534
+ if self.no_extra_param:
5535
+ dispersion_param_num =0
5338
5536
 
5339
5537
  #paramNum = self.get_param_num(dispersion)
5340
5538
  self.no_random_paramaters = 0
@@ -5389,17 +5587,26 @@ class ObjectiveFunction(object):
5389
5587
  else:
5390
5588
  bb[0] = self.constant_value
5391
5589
  if dispersion == 1:
5392
- bb[-1] = self.negative_binomial_value
5590
+ if not self.no_extra_param:
5591
+ bb[-1] = self.negative_binomial_value
5393
5592
  bounds = None
5394
5593
 
5594
+
5595
+
5395
5596
  # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5396
5597
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5397
- initial_beta = self._minimize(self._loglik_gradient, bb,
5598
+
5599
+ if self.no_extra_param:
5600
+ dispersion_poisson = 0
5601
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5398
5602
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5399
- dispersion, 0, False, 0, None, None, None, None, None,
5603
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5400
5604
  mod),
5401
5605
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5402
5606
  bounds=bounds)
5607
+ if dispersion:
5608
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5609
+
5403
5610
 
5404
5611
 
5405
5612
 
@@ -5499,7 +5706,7 @@ class ObjectiveFunction(object):
5499
5706
 
5500
5707
  b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
5501
5708
  self.none_handler(self.rdm_fit)) + len(
5502
- self.none_handler(self.rdm_cor_fit)) else b[i] / 1.3 for i in range(len(b))]
5709
+ self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
5503
5710
  else:
5504
5711
  b = bb
5505
5712
 
@@ -5509,9 +5716,10 @@ class ObjectiveFunction(object):
5509
5716
  else:
5510
5717
  b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
5511
5718
  if dispersion == 1:
5512
- b[-1] = np.abs(b[-1])
5513
- if b[-1] > 10:
5514
- b[-1] = 5
5719
+ if not self.no_extra_param:
5720
+ b[-1] = np.abs(b[-1])
5721
+ if b[-1] > 10:
5722
+ b[-1] = 5
5515
5723
  elif dispersion == 2:
5516
5724
  b[-1] = .5
5517
5725
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
@@ -5637,13 +5845,30 @@ class ObjectiveFunction(object):
5637
5845
 
5638
5846
  if draws is None and draws_hetro is not None:
5639
5847
  print('hold')
5640
- betas_est = self._minimize(self._loglik_gradient, b, args=(
5641
- X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5642
- self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5643
- method=method2, tol=tol['ftol'],
5644
- options={'gtol': tol['gtol']}, bounds=bounds,
5645
- hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5848
+ #self.grad_yes = True
5849
+ #self.hess_yes = True
5646
5850
 
5851
+ if self.no_extra_param:
5852
+ dispersion_poisson = 0
5853
+ betas_est = self._minimize(self._loglik_gradient, b, args=(
5854
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5855
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5856
+ method=method2, tol=tol['ftol'],
5857
+ options={'gtol': tol['gtol']}, bounds=bounds,
5858
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5859
+ if dispersion:
5860
+ initial_fit_beta = betas_est.x
5861
+ parmas = np.append(initial_fit_beta, nb_parma)
5862
+ self.nb_parma = nb_parma
5863
+ #print(f'neg binomi,{self.nb_parma}')
5864
+ betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5865
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5866
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5867
+ method=method2, tol=tol['ftol'],
5868
+ options={'gtol': tol['gtol']}, bounds=bounds,
5869
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5870
+
5871
+ #print('refit with estimation of NB')
5647
5872
  # self.numerical_hessian_calc = True
5648
5873
  if self.numerical_hessian_calc:
5649
5874
  try:
@@ -5942,7 +6167,7 @@ class ObjectiveFunction(object):
5942
6167
  return delim + self._model_type_codes[dispersion]
5943
6168
 
5944
6169
  def self_standardize_positive(self, X):
5945
- scaler = StandardScaler()
6170
+ scaler = MinMaxScaler()
5946
6171
  if type(X) == list:
5947
6172
  return X
5948
6173
 
@@ -5952,12 +6177,26 @@ class ObjectiveFunction(object):
5952
6177
  # Reshaping to 2D - combining the last two dimensions
5953
6178
  df_tf_reshaped = X.reshape(original_shape[0], -1)
5954
6179
  df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
5955
- df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6180
+ #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
5956
6181
  # Reshape back to original 3D shape if necessary
5957
6182
  df_tf = df_tf_scaled.reshape(original_shape)
5958
6183
  return df_tf
5959
6184
  else:
5960
- raise X
6185
+ # Initialize the MinMaxScaler
6186
+ scaler = MinMaxScaler()
6187
+ float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
6188
+ non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
6189
+
6190
+ # Fit the scaler to the float columns and transform them
6191
+ X[float_columns] = scaler.fit_transform(X[float_columns])
6192
+ # Fit the scaler to the data and transform it
6193
+ #scaled_data = scaler.fit_transform(X)
6194
+
6195
+ # Convert the result back to a DataFrame
6196
+ #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
6197
+
6198
+
6199
+ return X
5961
6200
 
5962
6201
  def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
5963
6202
  *args, **kwargs):
@@ -6012,8 +6251,9 @@ class ObjectiveFunction(object):
6012
6251
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
6013
6252
  t, idx, df_test[:, :, idx])
6014
6253
  if np.max(df_tf[:, :, idx]) >= 77000:
6254
+ #TODO need to normalise the data
6015
6255
 
6016
- raise Exception('should not be possible')
6256
+ print('should not be possible')
6017
6257
 
6018
6258
  self.define_selfs_fixed_rdm_cor(model_nature)
6019
6259
  indices = self.get_named_indices(self.fixed_fit)
@@ -6070,7 +6310,7 @@ class ObjectiveFunction(object):
6070
6310
  model_nature['XH'] = XH
6071
6311
  X_test = None
6072
6312
  if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
6073
- raise Exception('there is some kind of error')
6313
+ raise Exception('there is some kind of error in X')
6074
6314
 
6075
6315
  # numpy data setup fpr estimation
6076
6316
  indices2 = self.get_named_indices(self.rdm_fit)
@@ -6170,7 +6410,7 @@ class ObjectiveFunction(object):
6170
6410
  self.log_lik = log_lik
6171
6411
  if self.significant == 0:
6172
6412
 
6173
- print(self.full_model, 'full model is')
6413
+
6174
6414
  if not self.test_flag:
6175
6415
  alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6176
6416
  self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
@@ -6221,6 +6461,53 @@ class ObjectiveFunction(object):
6221
6461
 
6222
6462
  return obj_1, model_nature
6223
6463
 
6464
+ def get_X_tril(self):
6465
+ '''For correlations find the repeating terms'''
6466
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6467
+ rv_count_all = 0
6468
+ chol_count = 0
6469
+ rv_count = 0
6470
+ corr_indices = []
6471
+ rv_indices = []
6472
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6473
+ if var in self.none_handler(self.rdm_cor_fit):
6474
+ is_correlated = True
6475
+ else:
6476
+ is_correlated = False
6477
+
6478
+ rv_count_all += 1
6479
+ if is_correlated:
6480
+ chol_count += 1
6481
+ else:
6482
+ rv_count += 1
6483
+
6484
+ if var in self.none_handler(self.rdm_cor_fit):
6485
+
6486
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6487
+
6488
+ else:
6489
+ rv_indices.append(rv_count_all - 1)
6490
+
6491
+ # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6492
+ draws_tril_idx = np.array([corr_indices[j]
6493
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6494
+ for j in range(i + 1)]) # varnames pos.
6495
+ X_tril_idx = np.array([corr_indices[i]
6496
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6497
+ for j in range(i + 1)])
6498
+ # Find the s.d. for random variables that are not correlated
6499
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6500
+ range_var = [x for x in
6501
+ range(len(self.none_handler(var_uncor)))]
6502
+ range_var = sorted(range_var)
6503
+ draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6504
+ X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
6505
+ draws_tril_idx = draws_tril_idx.astype(int)
6506
+ X_tril_idx = X_tril_idx.astype(int)
6507
+ return X_tril_idx
6508
+
6509
+
6510
+
6224
6511
  def modifyn(self, data):
6225
6512
  select_data = self._characteristics_names
6226
6513
  alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
@@ -6428,23 +6715,35 @@ class ObjectiveFunction(object):
6428
6715
  # N, D = draws.shape[0], draws.shape[1]
6429
6716
  N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
6430
6717
  der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
6431
- if len(self.none_handler(self.rdm_cor_fit)) == 0:
6432
- Br_come_one = self.Br.copy()
6433
- # Br_come_one =
6434
- else:
6435
6718
 
6436
- Br_come_one = self.Br.copy()
6437
6719
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6438
6720
  #todo make sure this works for ln and truncated normal
6439
6721
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6440
- print('check this, intesection shouldn not happen for all')
6722
+
6723
+ #print('check this, intesection shouldn not happen for all')
6724
+
6725
+ if der.shape[1] != draws.shape[1]:
6726
+ print('why')
6441
6727
  Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6728
+ if der.shape[1] != draws.shape[1]:
6729
+ print('why')
6730
+ #TODO need to get the stuction of the rdms
6442
6731
  for k, dist_k in enumerate(distribution):
6443
6732
  if dist_k == 'ln_normal':
6733
+ if der.shape[1] != draws.shape[1]:
6734
+ print('why')
6444
6735
  der[:, k, :] = Br_come_one[:, k, :]
6736
+ if der.shape[1] != draws.shape[1]:
6737
+ print('why')
6445
6738
  elif dist_k == 'tn_normal':
6739
+ if der.shape[1] != draws.shape[1]:
6740
+ print('why')
6446
6741
  der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
6742
+ if der.shape[1] != draws.shape[1]:
6743
+ print('why')
6447
6744
 
6745
+ if der.shape[1] != draws.shape[1]:
6746
+ print('why')
6448
6747
  return der
6449
6748
 
6450
6749
  def _copy_size_display_as_ones(self, matrix):