metacountregressor 0.1.108__py3-none-any.whl → 0.1.116__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,9 +30,9 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
- from sklearn.preprocessing import StandardScaler
33
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
34
  from texttable import Texttable
35
-
35
+ import time
36
36
  try:
37
37
  from ._device_cust import device as dev
38
38
  from .pareto_file import Pareto, Solution
@@ -42,7 +42,7 @@ except ImportError:
42
42
  from pareto_file import Pareto, Solution
43
43
  from data_split_helper import DataProcessor
44
44
 
45
-
45
+ from scipy import stats
46
46
  np.seterr(divide='ignore', invalid='ignore')
47
47
  warnings.simplefilter("ignore")
48
48
 
@@ -124,10 +124,11 @@ class ObjectiveFunction(object):
124
124
 
125
125
  self.reg_penalty = 0
126
126
  self.power_up_ll = False
127
-
127
+ self.nb_parma = 1
128
128
  self.bic = None
129
129
  self.other_bic = False
130
130
  self.test_flag = 1
131
+ self.no_extra_param =1 #if true, fix dispersion. w
131
132
  if self.other_bic:
132
133
  print('change this to false latter ')
133
134
 
@@ -135,10 +136,10 @@ class ObjectiveFunction(object):
135
136
  self.constant_value = 0
136
137
  self.negative_binomial_value = 1
137
138
 
138
- self.verbose_safe = True
139
+ self.verbose_safe = kwargs.get('verbose', 0)
139
140
  self.please_print = kwargs.get('please_print', 0)
140
141
  self.group_halton = None
141
- self.grad_yes = False
142
+ self.grad_yes = kwargs.get('grad_est', False)
142
143
  self.hess_yes = False
143
144
  self.group_halton_test = None
144
145
  self.panels = None
@@ -151,15 +152,15 @@ class ObjectiveFunction(object):
151
152
  self.dist_fit = None
152
153
 
153
154
  self.MAE = None
154
- self.best_obj_1 = 100000000.0
155
- self._obj_1 = 'bic'
156
- self._obj_2 = 'MSE'
155
+ self.best_obj_1 = 1000000.0
156
+ self._obj_1 = kwargs.get('_obj_1', 'bic')
157
+ self._obj_2 = kwargs.get('_obj_2', 'MSE')
157
158
  self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
158
159
  self.full_model = None
159
160
  self.GP_parameter = 0
160
- self.is_multi = 0
161
+ self.is_multi = kwargs.get('is_multi', False)
161
162
  self.complexity_level = 6
162
- self._max_iterations_improvement = 100
163
+ self._max_iterations_improvement = 10000
163
164
  self.generated_sln = set()
164
165
  self.ave_mae = 0
165
166
  # defalt paramaters for hs #TODO unpack into harmony search class
@@ -167,23 +168,32 @@ class ObjectiveFunction(object):
167
168
  self._hms = 20
168
169
  self._max_time = 60 * 60 * 24
169
170
  self._hmcr = .5
170
- self._par = 0.3
171
+ self._par = 0.3 #dont think this gets useted
171
172
  self._mpai = 1
172
173
  self._max_imp = 100000
173
174
  self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
174
175
  self._panels = None
175
176
  self.is_multi = True
176
177
  self.method_ll = 'Nelder-Mead-BFGS'
178
+
177
179
  self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
178
- self.method_ll = 'BFGS_2'
180
+ self.method_ll = kwargs.get('method', 'BFGS_2')
181
+
182
+ #self.method_ll = 'Nelder-Mead-BFGS'
179
183
  self.Keep_Fit = 2
180
184
  self.MP = 0
181
185
  # Nelder-Mead-BFGS
182
186
 
183
- self._max_characteristics = 26
187
+ self._max_characteristics = kwargs.get('_max_vars', 26)
184
188
 
185
189
  self.beta_dict = dict
190
+ if 'model_terms' in kwargs:
191
+ print('change')
192
+ if kwargs.get('model_terms').get('group') is not None:
193
+ kwargs['group'] = kwargs.get('model_terms').get('group')
186
194
 
195
+ if kwargs.get('model_terms').get('panels') is not None:
196
+ kwargs['panels'] = kwargs.get('model_terms').get('panels')
187
197
  acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
188
198
  'algorithm', '_random_seed', '_max_time',
189
199
  'forcedvariables', '_obj_1', '_obj_2', '_par',
@@ -201,12 +211,17 @@ class ObjectiveFunction(object):
201
211
  if 'instance_number' in kwargs:
202
212
  self.instance_number = str(kwargs['instance_number'])
203
213
  else:
214
+
215
+ print('no name set, setting name as 0')
204
216
  self.instance_number = str(0) # set an arbitrary instance number
205
217
 
206
218
  if not os.path.exists(self.instance_number):
207
- os.makedirs(self.instance_number)
219
+ if kwargs.get('make_directory', True):
220
+ print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
221
+ os.makedirs(self.instance_number)
208
222
 
209
223
  if not hasattr(self, '_obj_1'):
224
+ print('_obj_1 required, define as bic, aic, ll')
210
225
  raise Exception
211
226
 
212
227
  self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
@@ -214,6 +229,11 @@ class ObjectiveFunction(object):
214
229
  self._maximize = False # do we maximize or minimize?
215
230
 
216
231
  x_data = sm.add_constant(x_data)
232
+ standardize_the_data = 0
233
+ if standardize_the_data:
234
+ print('we are standardize the data')
235
+ x_data = self.self_standardize_positive(x_data)
236
+
217
237
  self._input_data(x_data, y_data)
218
238
 
219
239
 
@@ -230,13 +250,17 @@ class ObjectiveFunction(object):
230
250
  self.test_percentage = float(kwargs.get('test_percentage', 0))
231
251
  self.val_percentage = float(kwargs.get('val_percentage', 0))
232
252
  if self.test_percentage == 0:
253
+ print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
254
+ print('continuing single objective')
255
+ time.sleep(2)
233
256
  self.is_multi = False
234
257
 
235
- if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
236
- self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
258
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
259
+ if kwargs.get('group') is not None:
260
+ self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
237
261
 
238
- x_data[kwargs['group']] = x_data[kwargs['group']].astype(
239
- 'category').cat.codes
262
+ x_data[kwargs['group']] = x_data[kwargs['group']].astype(
263
+ 'category').cat.codes
240
264
  self.complexity_level = 6
241
265
  # create test dataset
242
266
 
@@ -275,26 +299,31 @@ class ObjectiveFunction(object):
275
299
 
276
300
  #self.n_obs = N
277
301
  self._characteristics_names = list(self._x_data.columns)
278
- self._max_group_all_means = 1
302
+ self._max_group_all_means = 2
279
303
 
280
304
  exclude_this_test = [4]
281
305
 
282
- if 'panels' in kwargs and not np.isnan(kwargs.get('panels')):
306
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
283
307
  self.panels = np.asarray(df_train[kwargs['panels']])
284
308
  self.panels_test = np.asarray(df_test[kwargs['panels']])
285
309
  self.ids = np.asarray(
286
310
  df_train[kwargs['panels']]) if kwargs['panels'] is not None else None
287
311
  self.ids_test = np.asarray(
288
312
  df_test[kwargs['panels']]) if kwargs['panels'] is not None else None
289
- groupll = np.asarray(df_train[kwargs['group']].astype(
290
- 'category').cat.codes)
291
- group_test = np.asarray(df_test[kwargs['group']].astype(
292
- 'category').cat.codes)
313
+ if kwargs.get('group') is not None:
314
+ groupll = np.asarray(df_train[kwargs['group']].astype(
315
+ 'category').cat.codes)
316
+ group_test = np.asarray(df_test[kwargs['group']].astype(
317
+ 'category').cat.codes)
318
+ else:
319
+ groupll = None
293
320
  X, Y, panel, group = self._arrange_long_format(
294
321
  df_train, y_train, self.ids, self.ids, groupll)
295
322
  self.group_halton = group.copy()
296
323
  self.group_dummies = pd.get_dummies(group)
297
324
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
325
+
326
+ Xnew = pd.DataFrame(Xnew, columns=X.columns)
298
327
  self.panel_info = panel_info
299
328
  self.N, self.P = panel_info.shape
300
329
  Xnew.drop(kwargs['panels'], axis=1, inplace=True)
@@ -302,9 +331,11 @@ class ObjectiveFunction(object):
302
331
  K = Xnew.shape[1]
303
332
  self._characteristics_names = list(Xnew.columns)
304
333
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
334
+ XX = XX.astype('float')
305
335
  self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
306
336
  self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
307
337
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
338
+ YY = YY.astype('float')
308
339
  self._x_data = XX.copy()
309
340
  self._y_data = YY.copy()
310
341
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
@@ -326,6 +357,7 @@ class ObjectiveFunction(object):
326
357
  K = X.shape[1]
327
358
  self.columns_names = X.columns
328
359
  X = X.values.reshape(self.N_test, self.P_test, K)
360
+ X = X.astype('float')
329
361
  self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
330
362
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
331
363
  Y = Y.astype('float')
@@ -338,6 +370,7 @@ class ObjectiveFunction(object):
338
370
 
339
371
 
340
372
  else:
373
+ print('No Panels. Grouped Random Paramaters Will not be estimated')
341
374
  self.G = None
342
375
  self._Gnum = 1
343
376
  self._max_group_all_means = 0
@@ -354,7 +387,9 @@ class ObjectiveFunction(object):
354
387
  K = Xnew.shape[1]
355
388
  self._characteristics_names = list(Xnew.columns)
356
389
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
390
+ XX = XX.astype('float')
357
391
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
392
+ YY = YY.astype('float')
358
393
  self._x_data = XX.copy()
359
394
  self._y_data = YY.copy()
360
395
 
@@ -370,7 +405,9 @@ class ObjectiveFunction(object):
370
405
  K = X.shape[1]
371
406
  self.columns_names = X.columns
372
407
  X = X.values.reshape(self.N_test, self.P_test, K)
408
+ X = X.astype('float')
373
409
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
410
+ Y = Y.astype('float')
374
411
  self._x_data_test = X.copy()
375
412
  self.y_data_test = Y.copy()
376
413
 
@@ -385,7 +422,7 @@ class ObjectiveFunction(object):
385
422
 
386
423
 
387
424
 
388
- self.Ndraws = 200 # todo: change back
425
+ self.Ndraws = kwargs.get('Ndraws', 200)
389
426
  self.draws1 = None
390
427
  self.initial_sig = 1 # pass the test of a single model
391
428
  self.pvalue_sig_value = .1
@@ -403,12 +440,13 @@ class ObjectiveFunction(object):
403
440
  print('Setup Complete...')
404
441
  else:
405
442
  print('No Panels Supplied')
443
+ print('Setup Complete...')
406
444
  self._characteristics_names = list(self._x_data.columns)
407
445
  # define the variables
408
446
  # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
409
447
  self._transformations = ["no", "sqrt", "log", "arcsinh"]
410
448
  self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
411
-
449
+ self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
412
450
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
413
451
 
414
452
  self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
@@ -439,8 +477,17 @@ class ObjectiveFunction(object):
439
477
  if 'model_types' in kwargs:
440
478
  model_types = kwargs['model_types']
441
479
  else:
442
- model_types = [[0, 1]] # add 2 for Generalized Poisson
443
480
 
481
+
482
+ model_types = [[0, 1]] # add 2 for Generalized Poisson
483
+ #model_types = [[0]]
484
+ #TODO change back and fix NB
485
+ model_t_dict = {'Poisson':0,
486
+ "NB":1}
487
+ # Retrieve the keys (model names) corresponding to the values in model_types
488
+ model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
489
+ # Print the formatted result
490
+ print(f'The type of models possible will consider: {", ".join(model_keys)}')
444
491
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(
445
492
  self._transformations, kwargs.get('decisions',None)) + model_types
446
493
 
@@ -458,8 +505,9 @@ class ObjectiveFunction(object):
458
505
  self._max_hurdle = 4
459
506
 
460
507
  #Manually fit from analyst specification
461
- manual_fit = kwargs.get('Manual_Fit')
508
+ manual_fit = kwargs.get('Manual_Fit', None)
462
509
  if manual_fit is not None:
510
+ print('fitting manual')
463
511
  self.process_manual_fit(manual_fit)
464
512
 
465
513
  self.solution_analyst = None
@@ -494,6 +542,7 @@ class ObjectiveFunction(object):
494
542
  if self.is_multi:
495
543
  self._offsets_test = self._x_data_test[:, :, val_od]
496
544
  self._x_data_test = self.remove_offset(self._x_data_test, val_od)
545
+ #print(self._offsets)
497
546
  else:
498
547
  self.initialize_empty_offsets()
499
548
 
@@ -766,6 +815,8 @@ class ObjectiveFunction(object):
766
815
  if dispersion == 0:
767
816
  return None, None
768
817
  elif dispersion == 2 or dispersion == 1:
818
+ if self.no_extra_param:
819
+ return self.nb_parma, None
769
820
  return betas[-1], None
770
821
 
771
822
  elif dispersion == 3:
@@ -796,15 +847,18 @@ class ObjectiveFunction(object):
796
847
  def rename_distro(self, distro):
797
848
  # Mapping dictionary
798
849
  mapping = {
799
- 'Normal': 'normal',
800
- 'Triangular': 'triangular',
801
- 'Uniform': 'uniform',
802
- 'Log-Normal': 'ln_normal',
803
- 'Trunc-Normal': 'tn_normal'
850
+ 'normal': ['normal', 'n', 'Normal'],
851
+ 'triangular': ['triangular', 't', 'Triangular'],
852
+ 'uniform': ['uniform', 'u', 'Uniform'],
853
+ 'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
854
+ 'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
804
855
  }
805
856
 
806
857
  # Use list comprehension with the mapping
807
- new_distro = [mapping.get(i, i) for i in distro]
858
+ reversed_mapping = {value: key for key, values in mapping.items() for value in values}
859
+
860
+ # Use the reversed mapping to find the corresponding key
861
+ new_distro = [reversed_mapping.get(i, i) for i in distro]
808
862
  return new_distro
809
863
 
810
864
  def define_distributions_analyst(self, extra = None):
@@ -817,6 +871,8 @@ class ObjectiveFunction(object):
817
871
  distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
818
872
  distro = self.rename_distro(distro)
819
873
  set_alpha = set_alpha+[distro]
874
+ elif col == 'const':
875
+ set_alpha = set_alpha +[['normal']]
820
876
  return set_alpha
821
877
  return [[x for x in self._distribution]] * self._characteristics
822
878
 
@@ -897,10 +953,12 @@ class ObjectiveFunction(object):
897
953
  return ([self._model_type_codes[dispersion]])
898
954
 
899
955
  def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
900
- r'''
956
+ '''
901
957
  setup for naming of the model summary
902
958
  '''
959
+ if self.no_extra_param and dispersion ==1:
903
960
 
961
+ betas = np.append(betas, self.nb_parma)
904
962
 
905
963
  self.name_deleter = []
906
964
  group_rpm = None
@@ -1021,13 +1079,15 @@ class ObjectiveFunction(object):
1021
1079
  [''] * (len(names) - len(self.transform_id_names))
1022
1080
  self.coeff_names = names
1023
1081
 
1082
+ '''
1024
1083
  if betas is not None:
1025
1084
  try:
1026
1085
  if len(betas) != len(names):
1027
- print('no draws is', no_draws)
1028
- print('fix_theano')
1086
+ print('standard_model', no_draws)
1087
+
1029
1088
  except Exception as e:
1030
1089
  print(e)
1090
+ '''
1031
1091
 
1032
1092
 
1033
1093
 
@@ -1052,7 +1112,8 @@ class ObjectiveFunction(object):
1052
1112
  if not isinstance(self.pvalues, np.ndarray):
1053
1113
  raise Exception
1054
1114
 
1055
-
1115
+ if 'nb' in self.coeff_names and self.no_extra_param:
1116
+ self.pvalues = np.append(self.pvalues,0)
1056
1117
 
1057
1118
  if self.please_print or save_state:
1058
1119
 
@@ -1068,17 +1129,22 @@ class ObjectiveFunction(object):
1068
1129
 
1069
1130
  if solution is not None:
1070
1131
  print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1071
-
1132
+
1072
1133
  self.pvalues = [self.round_with_padding(
1073
1134
  x, 2) for x in self.pvalues]
1074
1135
  signif_list = self.pvalue_asterix_add(self.pvalues)
1075
1136
  if model == 1:
1076
1137
 
1077
- self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1078
- if self.coeff_[-1] < 0.25:
1138
+ #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1139
+ if self.no_extra_param:
1140
+ self.coeff_ = np.append(self.coeff_, self.nb_parma)
1141
+ self.stderr = np.append(self.stderr, 0.00001)
1142
+ self.zvalues = np.append(self.zvalues, 50)
1143
+
1144
+ elif self.coeff_[-1] < 0.25:
1079
1145
  print(self.coeff_[-1], 'Warning Check Dispersion')
1080
1146
  print(np.exp(self.coeff_[-1]))
1081
- self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1147
+ #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1082
1148
 
1083
1149
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1084
1150
 
@@ -1301,6 +1367,7 @@ class ObjectiveFunction(object):
1301
1367
 
1302
1368
  if 'AADT' in self._characteristics_names[col]:
1303
1369
  new_transform = [['log']]
1370
+ #new_transform = [['no']]
1304
1371
  transform_set = transform_set + new_transform
1305
1372
 
1306
1373
  elif all(x_data[col] <= 5):
@@ -1340,6 +1407,18 @@ class ObjectiveFunction(object):
1340
1407
 
1341
1408
  return transform_set
1342
1409
 
1410
+ def poisson_mean_get_dispersion(self, betas, X, y):
1411
+ eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1412
+ return_EV=True,
1413
+ zi_list=None, draws_grouped=None, Xgroup=None)
1414
+
1415
+ ab = ((y - eVy)**2 - eVy)/eVy
1416
+ bb = eVy -1
1417
+ disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1418
+ gamma = disp.params[0]
1419
+ #print(f'dispersion is {gamma}')
1420
+ return gamma
1421
+
1343
1422
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
1344
1423
  model_nature=None, halton=1, testing=1, validation=0):
1345
1424
  'validation if mu needs to be calculated'
@@ -1373,7 +1452,7 @@ class ObjectiveFunction(object):
1373
1452
  XG = model_nature.get('XGtest')[:total_percent, :, :]
1374
1453
  else:
1375
1454
  XG = model_nature.get('XGtest')[total_percent:, :, :]
1376
- print('chekc this is doing it wright')
1455
+
1377
1456
  else:
1378
1457
  if 'XG' in model_nature:
1379
1458
  XG = model_nature.get('XG')
@@ -1495,7 +1574,7 @@ class ObjectiveFunction(object):
1495
1574
  5: herogeneity_in _means
1496
1575
 
1497
1576
 
1498
- a: how to transofrm the original data
1577
+ a: how to transform the original data
1499
1578
  b: grab dispersion '''
1500
1579
 
1501
1580
  # todo: better way
@@ -1843,7 +1922,10 @@ class ObjectiveFunction(object):
1843
1922
  elif dispersion == 4:
1844
1923
  return 2
1845
1924
  else:
1846
- return 1
1925
+ if self.no_extra_param:
1926
+ return 0
1927
+ else:
1928
+ return 1
1847
1929
 
1848
1930
  def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
1849
1931
  return_violated_terms=0):
@@ -1858,6 +1940,7 @@ class ObjectiveFunction(object):
1858
1940
 
1859
1941
  else:
1860
1942
  slice_this_amount = self.num_dispersion_params(dispersion)
1943
+ slice_this_amount = 1 #TODO handle this
1861
1944
  if pvalues[-1] > sig_value:
1862
1945
  vio_counts += 1
1863
1946
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -2282,7 +2365,7 @@ class ObjectiveFunction(object):
2282
2365
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2283
2366
 
2284
2367
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2285
- obj_1 = 10.0 ** 8
2368
+ obj_1 = 10.0 ** 4
2286
2369
  obj_best = None
2287
2370
  sub_slns = list()
2288
2371
 
@@ -2290,12 +2373,14 @@ class ObjectiveFunction(object):
2290
2373
  vector) # just added to grab the fixed fit TODO: Clean up
2291
2374
  dispersion = model_nature.get('dispersion')
2292
2375
  self.define_selfs_fixed_rdm_cor(model_nature)
2376
+ print('before', vector)
2293
2377
  try:
2294
2378
  self.repair(vector)
2295
2379
  except Exception as e:
2296
- print('prob here')
2380
+ print('problem repairing here')
2297
2381
  print(vector)
2298
2382
  print(e)
2383
+ print('after', vector)
2299
2384
  layout = vector.copy()
2300
2385
  trial_run = 0
2301
2386
  max_trial = 0
@@ -2374,10 +2459,10 @@ class ObjectiveFunction(object):
2374
2459
 
2375
2460
 
2376
2461
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2377
- obj_1[self._obj_1] = 10 ** 9
2462
+ obj_1[self._obj_1] = 10 ** 5
2378
2463
  else:
2379
2464
  if obj_1[self._obj_1] <= 0:
2380
- obj_1[self._obj_1] = 10 ** 9
2465
+ obj_1[self._obj_1] = 10 ** 5
2381
2466
 
2382
2467
  if multi:
2383
2468
 
@@ -2408,10 +2493,10 @@ class ObjectiveFunction(object):
2408
2493
 
2409
2494
  self.reset_sln()
2410
2495
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2411
- obj_1[self._obj_1] = 10 ** 9
2496
+ obj_1[self._obj_1] = 10 ** 5
2412
2497
  else:
2413
2498
  if obj_1[self._obj_1] == 0:
2414
- obj_1[self._obj_1] = 10 ** 9
2499
+ obj_1[self._obj_1] = 10 **5
2415
2500
  if verbose:
2416
2501
  print('The best solution iteratively is of objective value:', obj_1)
2417
2502
 
@@ -2574,7 +2659,7 @@ class ObjectiveFunction(object):
2574
2659
  self._hmcr = (
2575
2660
  self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
2576
2661
 
2577
- # end def
2662
+
2578
2663
 
2579
2664
  def update_par(self, iteration, is_sin=False):
2580
2665
  """
@@ -2794,10 +2879,6 @@ class ObjectiveFunction(object):
2794
2879
  '''
2795
2880
  #return score
2796
2881
 
2797
-
2798
-
2799
-
2800
-
2801
2882
  try:
2802
2883
  if alpha is None:
2803
2884
  alpha = np.exp(params[-1])
@@ -2938,7 +3019,7 @@ class ObjectiveFunction(object):
2938
3019
  argument = prob.mean(axis=1)
2939
3020
  # if less than 0 penalise
2940
3021
  if np.min(argument) < 0:
2941
- print('what the fuck')
3022
+ print('Error with args..')
2942
3023
  if np.min(argument) < limit:
2943
3024
  # add a penalty for too small argument of log
2944
3025
  log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -2954,6 +3035,39 @@ class ObjectiveFunction(object):
2954
3035
  # print('log_lik poisson', log_lik)
2955
3036
  return -log_lik
2956
3037
 
3038
+ def extract_parameters(self, betas, Kf, Kr, Kchol_a, Krb_a):
3039
+ """
3040
+ Extracts parameters from the `betas` array based on the given sizes.
3041
+
3042
+ Parameters:
3043
+ betas (numpy.ndarray): The array of betas.
3044
+ Kf (int): Size of Bf (first Kf elements of betas).
3045
+ Kr (int): Size of Br.
3046
+ Kchol_a (int): Part of the size for brstd.
3047
+ Krb_a (int): Part of the size for brstd.
3048
+
3049
+ Returns:
3050
+ tuple: A tuple containing:
3051
+ - Bf (numpy.ndarray): The first Kf elements of betas.
3052
+ - Br (numpy.ndarray): The next Kr elements of betas after Bf.
3053
+ - brstd (numpy.ndarray): The next Kchol_a + Krb_a elements of betas after Br.
3054
+ - remaining_betas (numpy.ndarray): Any remaining elements in betas after brstd.
3055
+ """
3056
+ # Step 1: Extract Bf
3057
+ Bf = betas[:Kf] # First Kf elements
3058
+
3059
+ # Step 2: Extract Br
3060
+ Br = betas[Kf:Kf + Kr] # Next Kr elements after Bf
3061
+
3062
+ # Step 3: Extract brstd
3063
+ brstd_size = Kchol_a + Krb_a # Total size of brstd
3064
+ brstd = betas[Kf + Kr:Kf + Kr + brstd_size] # Next brstd_size elements after Br
3065
+
3066
+ # Step 4: Extract remaining betas
3067
+ remaining_betas = betas[Kf + Kr + brstd_size:] # Remaining elements in betas
3068
+
3069
+ return Bf, Br, brstd, remaining_betas
3070
+
2957
3071
  def convert_nbinom_params(self, mu, theta):
2958
3072
  """
2959
3073
  Convert mean/dispersion parameterization of a negative binomial to the ones scipy supports
@@ -3429,6 +3543,7 @@ class ObjectiveFunction(object):
3429
3543
  else:
3430
3544
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3431
3545
 
3546
+
3432
3547
  for ii, corr_pair in enumerate(corr_pairs):
3433
3548
  # lower cholesky matrix
3434
3549
  chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
@@ -3456,7 +3571,7 @@ class ObjectiveFunction(object):
3456
3571
  a = 0
3457
3572
  b = 0
3458
3573
  stuff = []
3459
- # todo get order
3574
+ # TODO get order
3460
3575
  for j, i in enumerate(list_sizes):
3461
3576
  br_mean = betas_hetro[a:i + a]
3462
3577
  a += i
@@ -3483,7 +3598,33 @@ class ObjectiveFunction(object):
3483
3598
  br_mean = betas_m
3484
3599
  br_sd = betas_sd # Last Kr positions
3485
3600
  # Compute: betas = mean + sd*draws
3486
- betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3601
+ if len(br_sd) != draws.shape[1]:
3602
+ #get the same size as the mean
3603
+ #if hasattr(self.Br):
3604
+ # betas_random = self.Br.copy()
3605
+ #else:
3606
+ idx = self.get_X_draw_tril()
3607
+ betas_random = br_mean[None, :, None] + draws[:,idx, :] * br_sd[None, :, None]
3608
+ '''
3609
+ c = self.get_num_params()[3:5]
3610
+
3611
+ cor = []
3612
+ for i in range(c[0]):
3613
+ cor.append(i)
3614
+
3615
+ vall =[]
3616
+ for i, val in enumerate(reversed(br_sd)):
3617
+ vall.append()
3618
+
3619
+ remaining = draws.shape[1] - len(betas_sd)
3620
+ '''
3621
+
3622
+ else:
3623
+
3624
+
3625
+ betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3626
+
3627
+
3487
3628
  betas_random = self._apply_distribution(betas_random)
3488
3629
 
3489
3630
  return betas_random
@@ -3502,21 +3643,38 @@ class ObjectiveFunction(object):
3502
3643
  # if gamma <= 0.01: #min defined value for stable nb
3503
3644
  # gamma = 0.01
3504
3645
 
3646
+ #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
3505
3647
 
3648
+ #gg = stats.poisson.rvs(g)
3506
3649
 
3650
+
3507
3651
 
3652
+
3508
3653
  endog = y
3509
3654
  mu = lam
3655
+ ''''
3656
+ mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
3510
3657
  alpha = np.exp(gamma)
3511
- #size = 1.0 / alpha * mu ** Q
3512
- alpha_size = alpha * mu ** Q
3513
- # prob = size/(size+mu)
3514
- prob = alpha / (alpha + mu)
3515
- # prob = 1/(1+mu*alpha)
3658
+
3659
+ '''
3660
+ alpha = gamma
3661
+ size = 1.0 / alpha * mu ** Q
3662
+
3663
+ prob = size/(size+mu)
3664
+
3665
+
3516
3666
 
3517
3667
  '''test'''
3518
3668
 
3519
3669
 
3670
+ '''
3671
+ size = 1 / np.exp(gamma) * mu ** 0
3672
+ prob = size / (size + mu)
3673
+ coeff = (gammaln(size + y) - gammaln(y + 1) -
3674
+ gammaln(size))
3675
+ llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
3676
+ '''
3677
+
3520
3678
  try:
3521
3679
  # print(np.shape(y),np.shape(size), np.shape(prob))
3522
3680
  #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
@@ -3528,22 +3686,28 @@ class ObjectiveFunction(object):
3528
3686
  #start_time = time.time()
3529
3687
  #for _ in range(10000):
3530
3688
 
3531
- #gg = self.negbinom_pmf(alpha_size, prob, y)
3689
+
3532
3690
  #end_time = time.time()
3533
3691
  #print("Custom functieon time:", end_time - start_time)
3534
3692
  #start_time = time.time()
3535
3693
  #for _ in range(10000):
3694
+ '''
3536
3695
  gg = np.exp(
3537
3696
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3538
3697
  y + alpha) * np.log(mu + alpha))
3539
3698
  gg[np.isnan(gg)] = 1
3699
+ '''
3700
+ gg_alt = nbinom.pmf(y ,1/alpha, prob)
3701
+ #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
3702
+ #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
3703
+ #print('check theses')
3540
3704
  #gg = nbinom.pmf(y ,alpha, prob)
3541
3705
  #end_time = time.time()
3542
3706
  #print("Custom functieon time:", end_time - start_time)
3543
3707
 
3544
3708
  except Exception as e:
3545
- print(e)
3546
- return gg
3709
+ print("Neg Binom error.")
3710
+ return gg_alt
3547
3711
 
3548
3712
  def lindley_pmf(self, x, r, theta, k=50):
3549
3713
  """
@@ -3690,8 +3854,8 @@ class ObjectiveFunction(object):
3690
3854
 
3691
3855
  if dispersion == 1 or dispersion == 4: # nb
3692
3856
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3693
-
3694
-
3857
+ #b_gam = 1/np.exp(b_gam)
3858
+ #print(b_gam)
3695
3859
  if b_gam <= 0:
3696
3860
  #penalty += 100
3697
3861
  #penalty += abs(b_gam)
@@ -3699,9 +3863,9 @@ class ObjectiveFunction(object):
3699
3863
  #b_gam = 1
3700
3864
 
3701
3865
  # if b_gam < 0.03:
3702
- penalty += min(1, np.abs(b_gam))
3866
+ penalty += min(1, np.abs(b_gam), 0)
3703
3867
 
3704
- b_gam = 0.001
3868
+ #b_gam = 0.001
3705
3869
  #
3706
3870
 
3707
3871
  #if b_gam >= 10:
@@ -3733,8 +3897,15 @@ class ObjectiveFunction(object):
3733
3897
  def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3734
3898
 
3735
3899
  # print('this was 0')
3736
- eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3900
+ if dispersion:
3901
+ eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3902
+
3903
+ #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3904
+ #print('check if this holds size')
3905
+ else:
3906
+ eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3737
3907
  eta = np.array(eta)
3908
+
3738
3909
  # eta = np.float64(eta)
3739
3910
  # eta = np.dot(Xd, params_main)+offset[:,:,0]
3740
3911
  # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3753,7 +3924,7 @@ class ObjectiveFunction(object):
3753
3924
 
3754
3925
  else:
3755
3926
  # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
3756
-
3927
+ eta = eta.astype('float')
3757
3928
  eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3758
3929
  return eVd
3759
3930
 
@@ -3869,7 +4040,7 @@ class ObjectiveFunction(object):
3869
4040
 
3870
4041
 
3871
4042
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
3872
- # print('fuck if this actually works')
4043
+
3873
4044
 
3874
4045
  elif dispersion == 2:
3875
4046
 
@@ -3890,7 +4061,7 @@ class ObjectiveFunction(object):
3890
4061
  # proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
3891
4062
  proba_r = np.array(store)
3892
4063
  proba_r = np.atleast_2d(proba_r).T
3893
- print(1)
4064
+
3894
4065
 
3895
4066
  else:
3896
4067
  raise Exception('not implemented other modeling forms')
@@ -3907,6 +4078,8 @@ class ObjectiveFunction(object):
3907
4078
  if dispersion == 0 or dispersion == 3:
3908
4079
  return 0
3909
4080
  else:
4081
+
4082
+
3910
4083
  return 1
3911
4084
 
3912
4085
  def _prob_product_across_panels(self, pch, panel_info):
@@ -3962,7 +4135,7 @@ class ObjectiveFunction(object):
3962
4135
  if y[i] == 0:
3963
4136
  gr_e[i] = 0
3964
4137
 
3965
- if self.is_dispersion(dispersion):
4138
+ if self.is_dispersion(dispersion) and not self.no_extra_param:
3966
4139
  gr_d = np.zeros((N, 1))
3967
4140
  if dispersion == 1:
3968
4141
  # trying alt
@@ -4066,12 +4239,13 @@ class ObjectiveFunction(object):
4066
4239
  br, draws_, brstd, dis_fit_long) # (N,K,R)
4067
4240
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
4068
4241
  einsum_model_form, dtype=np.float64) # (N,K,R)
4069
- der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4070
- der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4071
- der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
4072
- print('which one of these')
4242
+ #der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4243
+ #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4244
+
4245
+ der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
4246
+
4073
4247
  der_t = self._compute_derivatives(
4074
- br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4248
+ br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
4075
4249
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
4076
4250
  der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
4077
4251
  der_t * proba_n[:, None, :] # (N,K,R)
@@ -4132,14 +4306,18 @@ class ObjectiveFunction(object):
4132
4306
  grad_n = self._concat_gradients(
4133
4307
  (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4134
4308
  else:
4135
- grad_n = self._concat_gradients(
4136
- (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4137
- grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4138
- grad_n = np.clip(grad_n, -1000, 1000)
4309
+ if self.no_extra_param:
4310
+ grad_n = self._concat_gradients(
4311
+ (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4312
+ else:
4313
+ grad_n = self._concat_gradients(
4314
+ (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4315
+ grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
4316
+ grad_n = np.clip(grad_n, -100, 100)
4139
4317
  n = np.shape(grad_n)[0]
4140
4318
  # subtract out mean gradient value
4141
- # grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4142
- # grad_n = grad_n_sub
4319
+ grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4320
+ grad_n = grad_n_sub
4143
4321
  grad = grad_n.sum(axis=0)
4144
4322
  return grad, grad_n
4145
4323
 
@@ -4290,7 +4468,7 @@ class ObjectiveFunction(object):
4290
4468
  return proba_r.sum(axis=1), np.squeeze(proba_r)
4291
4469
 
4292
4470
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4293
- penalty_val = 0.05
4471
+ penalty_val = 0.1
4294
4472
  penalty_val_max = 130
4295
4473
 
4296
4474
  # print('change_later')
@@ -4306,8 +4484,8 @@ class ObjectiveFunction(object):
4306
4484
  if abs(i) > penalty_val_max:
4307
4485
  penalty += abs(i)
4308
4486
 
4309
- # if abs(i) < penalty_val:
4310
- # penalty += 5
4487
+ #if abs(i) < penalty_val:
4488
+ # penalty += 5
4311
4489
 
4312
4490
  # penalty = 0
4313
4491
  return penalty
@@ -4414,8 +4592,7 @@ class ObjectiveFunction(object):
4414
4592
  index += 1
4415
4593
 
4416
4594
  brstd = br_std
4417
- print(brstd)
4418
- print(brstd)
4595
+
4419
4596
 
4420
4597
 
4421
4598
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
@@ -4447,7 +4624,7 @@ class ObjectiveFunction(object):
4447
4624
  penalty = self._penalty_betas(
4448
4625
  betas, dispersion, penalty, float(len(y) / 10.0))
4449
4626
  self.n_obs = len(y) # feeds into gradient
4450
- if draws is None and draws_grouped is None and (
4627
+ if draws is None and draws_grouped is None and (model_nature is None or
4451
4628
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4452
4629
  #TODO do i shuffle the draws
4453
4630
  if type(Xd) == dict:
@@ -4500,7 +4677,7 @@ class ObjectiveFunction(object):
4500
4677
  penalty = self.regularise_l2(betas)
4501
4678
 
4502
4679
  if not np.isreal(loglik):
4503
- loglik = - 1000000000.0
4680
+ loglik = - 10000000.0
4504
4681
 
4505
4682
  output = (-loglik + penalty,)
4506
4683
  if return_gradient:
@@ -4581,10 +4758,10 @@ class ObjectiveFunction(object):
4581
4758
  n_coeff = self.get_param_num(dispersion)
4582
4759
  Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4583
4760
  if Kchol_a != Kchol:
4584
- print('hold')
4761
+ print('hold qhy')
4585
4762
 
4586
4763
  if Kr_b != Kr_b_a:
4587
- print('hold')
4764
+ print('hold qhy')
4588
4765
 
4589
4766
 
4590
4767
 
@@ -4594,17 +4771,41 @@ class ObjectiveFunction(object):
4594
4771
  Kf = 0
4595
4772
  else:
4596
4773
  if n_coeff != len(betas):
4597
- raise Exception
4598
- Bf = betas[0:Kf] # Fixed betas
4599
-
4774
+ raise Exception(
4600
4775
 
4776
+ )
4777
+ Bf = betas[0:Kf] # Fixed betas
4601
4778
 
4602
4779
 
4780
+ Bf_new, Br_new, Br_std_new, Br_rema = self.extract_parameters(betas, Kf, Kr, Kchol_a, Kr_b_a)
4781
+ if Bf_new != Bf:
4782
+ print('check this')
4603
4783
 
4604
4784
  Vdf = dev.np.einsum('njk,k -> nj', Xdf, Bf, dtype=np.float64) # (N, P)
4605
4785
  br = betas[Kf:Kf + Kr]
4786
+ if br != Br_new:
4787
+ print('why')
4788
+
4789
+
4790
+ #i have an array of betas, Kf represents the first kf of the betas array
4791
+ # now return Bf where size of bf = kf
4792
+
4793
+ # size of br needs to be Kr
4794
+ #Kr
4795
+ #now extract from betas, after all the Bf
4796
+ # cakk
4797
+
4798
+ #the next array is brstd
4799
+
4800
+ # size of brstd needs to be
4801
+ # Kchol_a + Krb_a
4802
+ #its grabbing from the
4803
+
4804
+
4606
4805
 
4607
4806
  brstd = betas[Kf + Kr:Kf + Kr + Kr_b + Kchol]
4807
+ if brstd != Br_std_new:
4808
+ print('okay')
4608
4809
  # initialises size matrix
4609
4810
  proba = [] # Temp batching storage
4610
4811
 
@@ -4618,6 +4819,8 @@ class ObjectiveFunction(object):
4618
4819
  if len(self.none_handler(self.rdm_cor_fit)) == 0:
4619
4820
  # Br = self._transform_rand_betas(br, np.abs(
4620
4821
  # brstd), draws_) # Get random coefficients, old method
4822
+ #TODO
4823
+ print('tril the draws')
4621
4824
  Br = self._transform_rand_betas(br,
4622
4825
  brstd, draws_) # Get random coefficients
4623
4826
  self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
@@ -4696,7 +4899,8 @@ class ObjectiveFunction(object):
4696
4899
  eVd = self.lam_transform(eVd, dispersion, betas[-1])
4697
4900
 
4698
4901
  if self.is_dispersion(dispersion):
4699
- penalty, betas[-1] = self._penalty_dispersion(
4902
+ if not self.no_extra_param:
4903
+ penalty, betas[-1] = self._penalty_dispersion(
4700
4904
  dispersion, betas[-1], eVd, y, penalty, model_nature)
4701
4905
 
4702
4906
  '''
@@ -4740,7 +4944,7 @@ class ObjectiveFunction(object):
4740
4944
  proba.append(dev.to_cpu(proba_))
4741
4945
 
4742
4946
  lik = np.stack(proba).sum(axis=0) / R # (N, )
4743
- lik = np.clip(lik, min_comp_val, 10000)
4947
+ lik = np.clip(lik, min_comp_val, max_comp_val)
4744
4948
  # lik = np.nan_to_num(lik, )
4745
4949
  loglik = np.log(lik)
4746
4950
  llf_main = loglik
@@ -5070,7 +5274,7 @@ class ObjectiveFunction(object):
5070
5274
  H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
5071
5275
  result['Hessian'] = H
5072
5276
  result['hess_inv'] = np.linalg.pinv(H)
5073
- print('to do, only if hessian is fhfhfhf')
5277
+
5074
5278
  standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
5075
5279
  return result
5076
5280
  # return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
@@ -5341,7 +5545,7 @@ class ObjectiveFunction(object):
5341
5545
  return a
5342
5546
 
5343
5547
  def fitRegression(self, mod,
5344
- dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5548
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5345
5549
 
5346
5550
  """
5347
5551
  Fits a poisson regression given data and outcomes if dispersion is not declared
@@ -5358,7 +5562,7 @@ class ObjectiveFunction(object):
5358
5562
 
5359
5563
 
5360
5564
  sol = Solution()
5361
- log_ll = 10.0 ** 9
5565
+
5362
5566
  tol = {'ftol': 1e-8, 'gtol': 1e-6}
5363
5567
  is_delete = 0
5364
5568
  dispersion = mod.get('dispersion')
@@ -5387,6 +5591,8 @@ class ObjectiveFunction(object):
5387
5591
  _g, pg, kg = 0, 0, 0
5388
5592
 
5389
5593
  dispersion_param_num = self.is_dispersion(dispersion)
5594
+ if self.no_extra_param:
5595
+ dispersion_param_num =0
5390
5596
 
5391
5597
  #paramNum = self.get_param_num(dispersion)
5392
5598
  self.no_random_paramaters = 0
@@ -5441,17 +5647,26 @@ class ObjectiveFunction(object):
5441
5647
  else:
5442
5648
  bb[0] = self.constant_value
5443
5649
  if dispersion == 1:
5444
- bb[-1] = self.negative_binomial_value
5650
+ if not self.no_extra_param:
5651
+ bb[-1] = self.negative_binomial_value
5445
5652
  bounds = None
5446
5653
 
5654
+
5655
+
5447
5656
  # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5448
5657
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5449
- initial_beta = self._minimize(self._loglik_gradient, bb,
5658
+
5659
+ if self.no_extra_param:
5660
+ dispersion_poisson = 0
5661
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5450
5662
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5451
- dispersion, 0, False, 0, None, None, None, None, None,
5663
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5452
5664
  mod),
5453
5665
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5454
5666
  bounds=bounds)
5667
+ if dispersion:
5668
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5669
+
5455
5670
 
5456
5671
 
5457
5672
 
@@ -5551,7 +5766,7 @@ class ObjectiveFunction(object):
5551
5766
 
5552
5767
  b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
5553
5768
  self.none_handler(self.rdm_fit)) + len(
5554
- self.none_handler(self.rdm_cor_fit)) else b[i] / 1.3 for i in range(len(b))]
5769
+ self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
5555
5770
  else:
5556
5771
  b = bb
5557
5772
 
@@ -5561,9 +5776,10 @@ class ObjectiveFunction(object):
5561
5776
  else:
5562
5777
  b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
5563
5778
  if dispersion == 1:
5564
- b[-1] = np.abs(b[-1])
5565
- if b[-1] > 10:
5566
- b[-1] = 5
5779
+ if not self.no_extra_param:
5780
+ b[-1] = np.abs(b[-1])
5781
+ if b[-1] > 10:
5782
+ b[-1] = 5
5567
5783
  elif dispersion == 2:
5568
5784
  b[-1] = .5
5569
5785
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
@@ -5689,13 +5905,35 @@ class ObjectiveFunction(object):
5689
5905
 
5690
5906
  if draws is None and draws_hetro is not None:
5691
5907
  print('hold')
5692
- betas_est = self._minimize(self._loglik_gradient, b, args=(
5693
- X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5694
- self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5695
- method=method2, tol=tol['ftol'],
5696
- options={'gtol': tol['gtol']}, bounds=bounds,
5697
- hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5698
-
5908
+ #self.grad_yes = True
5909
+ #self.hess_yes = True
5910
+
5911
+ if self.no_extra_param:
5912
+ dispersion_poisson = 0
5913
+ print('b :', len(b))
5914
+ print(self.get_param_num())
5915
+ baby = self.get_param_num()
5916
+ if len(b) != baby:
5917
+ print('modify')
5918
+ betas_est = self._minimize(self._loglik_gradient, b, args=(
5919
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5920
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5921
+ method=method2, tol=tol['ftol'],
5922
+ options={'gtol': tol['gtol']}, bounds=bounds,
5923
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5924
+ if dispersion:
5925
+ initial_fit_beta = betas_est.x
5926
+ parmas = np.append(initial_fit_beta, nb_parma)
5927
+ self.nb_parma = nb_parma
5928
+ #print(f'neg binomi,{self.nb_parma}')
5929
+ betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5930
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5931
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5932
+ method=method2, tol=tol['ftol'],
5933
+ options={'gtol': tol['gtol']}, bounds=bounds,
5934
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5935
+
5936
+ #print('refit with estimation of NB')
5699
5937
  # self.numerical_hessian_calc = True
5700
5938
  if self.numerical_hessian_calc:
5701
5939
  try:
@@ -5994,7 +6232,7 @@ class ObjectiveFunction(object):
5994
6232
  return delim + self._model_type_codes[dispersion]
5995
6233
 
5996
6234
  def self_standardize_positive(self, X):
5997
- scaler = StandardScaler()
6235
+ scaler = MinMaxScaler()
5998
6236
  if type(X) == list:
5999
6237
  return X
6000
6238
 
@@ -6004,12 +6242,26 @@ class ObjectiveFunction(object):
6004
6242
  # Reshaping to 2D - combining the last two dimensions
6005
6243
  df_tf_reshaped = X.reshape(original_shape[0], -1)
6006
6244
  df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
6007
- df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6245
+ #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6008
6246
  # Reshape back to original 3D shape if necessary
6009
6247
  df_tf = df_tf_scaled.reshape(original_shape)
6010
6248
  return df_tf
6011
6249
  else:
6012
- raise X
6250
+ # Initialize the MinMaxScaler
6251
+ scaler = MinMaxScaler()
6252
+ float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
6253
+ non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
6254
+
6255
+ # Fit the scaler to the float columns and transform them
6256
+ X[float_columns] = scaler.fit_transform(X[float_columns])
6257
+ # Fit the scaler to the data and transform it
6258
+ #scaled_data = scaler.fit_transform(X)
6259
+
6260
+ # Convert the result back to a DataFrame
6261
+ #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
6262
+
6263
+
6264
+ return X
6013
6265
 
6014
6266
  def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
6015
6267
  *args, **kwargs):
@@ -6064,8 +6316,9 @@ class ObjectiveFunction(object):
6064
6316
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
6065
6317
  t, idx, df_test[:, :, idx])
6066
6318
  if np.max(df_tf[:, :, idx]) >= 77000:
6319
+ #TODO need to normalise the data
6067
6320
 
6068
- raise Exception('should not be possible')
6321
+ print('should not be possible')
6069
6322
 
6070
6323
  self.define_selfs_fixed_rdm_cor(model_nature)
6071
6324
  indices = self.get_named_indices(self.fixed_fit)
@@ -6122,7 +6375,7 @@ class ObjectiveFunction(object):
6122
6375
  model_nature['XH'] = XH
6123
6376
  X_test = None
6124
6377
  if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
6125
- raise Exception('there is some kind of error')
6378
+ raise Exception('there is some kind of error in X')
6126
6379
 
6127
6380
  # numpy data setup fpr estimation
6128
6381
  indices2 = self.get_named_indices(self.rdm_fit)
@@ -6222,7 +6475,7 @@ class ObjectiveFunction(object):
6222
6475
  self.log_lik = log_lik
6223
6476
  if self.significant == 0:
6224
6477
 
6225
- print(self.full_model, 'full model is')
6478
+
6226
6479
  if not self.test_flag:
6227
6480
  alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6228
6481
  self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
@@ -6273,6 +6526,93 @@ class ObjectiveFunction(object):
6273
6526
 
6274
6527
  return obj_1, model_nature
6275
6528
 
6529
+ def get_X_tril(self):
6530
+ '''For correlations find the repeating terms'''
6531
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6532
+ rv_count_all = 0
6533
+ chol_count = 0
6534
+ rv_count = 0
6535
+ corr_indices = []
6536
+ rv_indices = []
6537
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6538
+ if var in self.none_handler(self.rdm_cor_fit):
6539
+ is_correlated = True
6540
+ else:
6541
+ is_correlated = False
6542
+
6543
+ rv_count_all += 1
6544
+ if is_correlated:
6545
+ chol_count += 1
6546
+ else:
6547
+ rv_count += 1
6548
+
6549
+ if var in self.none_handler(self.rdm_cor_fit):
6550
+
6551
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6552
+
6553
+ else:
6554
+ rv_indices.append(rv_count_all - 1)
6555
+
6556
+
6557
+ X_tril_idx = np.array([corr_indices[i]
6558
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6559
+ for j in range(i + 1)])
6560
+ # Find the s.d. for random variables that are not correlated
6561
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6562
+ range_var = [x for x in
6563
+ range(len(self.none_handler(var_uncor)))]
6564
+ range_var = sorted(range_var)
6565
+
6566
+ X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
6567
+
6568
+ X_tril_idx = X_tril_idx.astype(int)
6569
+ return X_tril_idx
6570
+
6571
+ def get_X_draw_tril(self):
6572
+ '''For correlations find the repeating terms'''
6573
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6574
+ rv_count_all = 0
6575
+ chol_count = 0
6576
+ rv_count = 0
6577
+ corr_indices = []
6578
+ rv_indices = []
6579
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6580
+ if var in self.none_handler(self.rdm_cor_fit):
6581
+ is_correlated = True
6582
+ else:
6583
+ is_correlated = False
6584
+
6585
+ rv_count_all += 1
6586
+ if is_correlated:
6587
+ chol_count += 1
6588
+ else:
6589
+ rv_count += 1
6590
+
6591
+ if var in self.none_handler(self.rdm_cor_fit):
6592
+
6593
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6594
+
6595
+ else:
6596
+ rv_indices.append(rv_count_all - 1)
6597
+
6598
+ # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6599
+ draws_tril_idx = np.array([corr_indices[j]
6600
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6601
+ for j in range(i + 1)]) # varnames pos.
6602
+
6603
+ # Find the s.d. for random variables that are not correlated
6604
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6605
+ range_var = [x for x in
6606
+ range(len(self.none_handler(var_uncor)))]
6607
+ range_var = sorted(range_var)
6608
+ draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6609
+
6610
+ draws_tril_idx = draws_tril_idx.astype(int)
6611
+
6612
+ return draws_tril_idx
6613
+
6614
+
6615
+
6276
6616
  def modifyn(self, data):
6277
6617
  select_data = self._characteristics_names
6278
6618
  alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
@@ -6480,23 +6820,35 @@ class ObjectiveFunction(object):
6480
6820
  # N, D = draws.shape[0], draws.shape[1]
6481
6821
  N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
6482
6822
  der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
6483
- if len(self.none_handler(self.rdm_cor_fit)) == 0:
6484
- Br_come_one = self.Br.copy()
6485
- # Br_come_one =
6486
- else:
6487
6823
 
6488
- Br_come_one = self.Br.copy()
6489
6824
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6490
6825
  #todo make sure this works for ln and truncated normal
6491
6826
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6492
- print('check this, intesection shouldn not happen for all')
6827
+
6828
+ #print('check this, intesection shouldn not happen for all')
6829
+
6830
+ if der.shape[1] != draws.shape[1]:
6831
+ print('why')
6493
6832
  Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6833
+ if der.shape[1] != draws.shape[1]:
6834
+ print('why')
6835
+ #TODO need to get the stuction of the rdms
6494
6836
  for k, dist_k in enumerate(distribution):
6495
6837
  if dist_k == 'ln_normal':
6838
+ if der.shape[1] != draws.shape[1]:
6839
+ print('why')
6496
6840
  der[:, k, :] = Br_come_one[:, k, :]
6841
+ if der.shape[1] != draws.shape[1]:
6842
+ print('why')
6497
6843
  elif dist_k == 'tn_normal':
6844
+ if der.shape[1] != draws.shape[1]:
6845
+ print('why')
6498
6846
  der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
6847
+ if der.shape[1] != draws.shape[1]:
6848
+ print('why')
6499
6849
 
6850
+ if der.shape[1] != draws.shape[1]:
6851
+ print('why')
6500
6852
  return der
6501
6853
 
6502
6854
  def _copy_size_display_as_ones(self, matrix):