metacountregressor 0.1.73__py3-none-any.whl → 0.1.83__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,12 +9,10 @@ import math
9
9
  import os
10
10
  import random
11
11
  import sys
12
- import time
13
12
  import warnings
14
13
  from collections import Counter
15
14
  from functools import wraps
16
15
 
17
- from tempfile import TemporaryFile
18
16
  import traceback
19
17
  import latextable
20
18
  import numpy as np
@@ -32,18 +30,25 @@ from scipy.special import gammaln
32
30
  from sklearn.metrics import mean_absolute_error as MAE
33
31
  from sklearn.metrics import mean_squared_error as MSPE
34
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
35
- from sklearn.preprocessing import StandardScaler
33
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
36
34
  from texttable import Texttable
37
-
38
- from ._device_cust import device as dev
39
- from .pareto_file import Pareto, Solution
40
-
35
+ import time
36
+ try:
37
+ from ._device_cust import device as dev
38
+ from .pareto_file import Pareto, Solution
39
+ from .data_split_helper import DataProcessor
40
+ except ImportError:
41
+ from _device_cust import device as dev
42
+ from pareto_file import Pareto, Solution
43
+ from data_split_helper import DataProcessor
44
+
45
+ from scipy import stats
41
46
  np.seterr(divide='ignore', invalid='ignore')
42
47
  warnings.simplefilter("ignore")
43
48
 
44
- # defube the computation boundary limits
45
- min_comp_val = 1e-200
46
- max_comp_val = 1e+300
49
+ # define the computation boundary limits
50
+ min_comp_val = 1e-160
51
+ max_comp_val = 1e+200
47
52
  log_lik_min = -1e+200
48
53
  log_lik_max = 1e+200
49
54
 
@@ -117,24 +122,24 @@ class ObjectiveFunction(object):
117
122
 
118
123
  def __init__(self, x_data, y_data, **kwargs):
119
124
 
120
- self.reg_penalty = .5
125
+ self.reg_penalty = 0
121
126
  self.power_up_ll = False
127
+ self.nb_parma = 1
122
128
  self.bic = None
123
129
  self.other_bic = False
130
+ self.test_flag = 1
131
+ self.no_extra_param =1 #if true, fix dispersion. w
124
132
  if self.other_bic:
125
133
  print('change this to false latter ')
126
- offset = None
127
134
 
128
- # initi
129
- self.constant_value = -5.5
130
- self.negative_binomial_value = 0.05
135
+ # initialize values
136
+ self.constant_value = 0
137
+ self.negative_binomial_value = 1
131
138
 
132
- self.verbose_safe = True
133
- self.zi_force = None # Analst want a zi model and formally declares the zi components below
134
- self.zi_force_names = None # delare the zi components
139
+ self.verbose_safe = kwargs.get('verbose', 0)
135
140
  self.please_print = kwargs.get('please_print', 0)
136
141
  self.group_halton = None
137
- self.grad_yes = False
142
+ self.grad_yes = kwargs.get('grad_est', False)
138
143
  self.hess_yes = False
139
144
  self.group_halton_test = None
140
145
  self.panels = None
@@ -145,41 +150,50 @@ class ObjectiveFunction(object):
145
150
  self.rdm_fit = None
146
151
  self.rdm_cor_fit = None
147
152
  self.dist_fit = None
148
- self.zi_fit = None
153
+
149
154
  self.MAE = None
150
- self.best_obj_1 = 100000000.0
151
- self._obj_1 = 'bic'
152
- self._obj_2 = 'MSE'
155
+ self.best_obj_1 = 1000000.0
156
+ self._obj_1 = kwargs.get('_obj_1', 'bic')
157
+ self._obj_2 = kwargs.get('_obj_2', 'MSE')
153
158
  self.numerical_hessian_calc = 0 # calculates hessian by statsmodels otherwise scipy
154
159
  self.full_model = None
155
160
  self.GP_parameter = 0
156
- self.is_multi = 0
161
+ self.is_multi = kwargs.get('is_multi', False)
157
162
  self.complexity_level = 6
158
- self._max_iterations_improvement = 100
163
+ self._max_iterations_improvement = 10000
159
164
  self.generated_sln = set()
160
165
  self.ave_mae = 0
161
- # defualt paraamaters for hs
166
+ # defalt paramaters for hs #TODO unpack into harmony search class
162
167
  self.algorithm = 'hs' # 'sa' 'de' also avialable
163
168
  self._hms = 20
164
169
  self._max_time = 60 * 60 * 24
165
170
  self._hmcr = .5
166
- self._par = 0.3
171
+ self._par = 0.3 #dont think this gets useted
167
172
  self._mpai = 1
168
173
  self._max_imp = 100000
169
- self._WIC = 1000 # Number of ITerations without Multiobjective Improvement
174
+ self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
170
175
  self._panels = None
171
176
  self.is_multi = True
172
177
  self.method_ll = 'Nelder-Mead-BFGS'
178
+
173
179
  self.method_ll = 'L-BFGS-B' # alternatives 'BFGS_2', 'BFGS
174
- self.method_ll = 'BFGS_2'
180
+ self.method_ll = kwargs.get('method', 'BFGS_2')
181
+
182
+ #self.method_ll = 'Nelder-Mead-BFGS'
175
183
  self.Keep_Fit = 2
176
184
  self.MP = 0
177
185
  # Nelder-Mead-BFGS
178
186
 
179
- self._max_characteristics = 26
187
+ self._max_characteristics = kwargs.get('_max_vars', 26)
180
188
 
181
189
  self.beta_dict = dict
190
+ if 'model_terms' in kwargs:
191
+ print('change')
192
+ if kwargs.get('model_terms').get('group') is not None:
193
+ kwargs['group'] = kwargs.get('model_terms').get('group')
182
194
 
195
+ if kwargs.get('model_terms').get('panels') is not None:
196
+ kwargs['panels'] = kwargs.get('model_terms').get('panels')
183
197
  acceptable_keys_list = ['_par', '_max_imp', '_hmcr', 'steps',
184
198
  'algorithm', '_random_seed', '_max_time',
185
199
  'forcedvariables', '_obj_1', '_obj_2', '_par',
@@ -190,11 +204,6 @@ class ObjectiveFunction(object):
190
204
  if k in acceptable_keys_list:
191
205
  self.__setattr__(k, self.tryeval(kwargs[k]))
192
206
 
193
- if self.zi_force_names is not None:
194
- self.zi_force = True
195
- if 'const' not in self.zi_force_names:
196
- self.zi_force_names = ['const'] + self.zi_force_names
197
- print('did this work?')
198
207
 
199
208
  if 'complexity_level' in kwargs:
200
209
  self.complexity_level = kwargs['complexity_level']
@@ -202,35 +211,52 @@ class ObjectiveFunction(object):
202
211
  if 'instance_number' in kwargs:
203
212
  self.instance_number = str(kwargs['instance_number'])
204
213
  else:
214
+
215
+ print('no name set, setting name as 0')
205
216
  self.instance_number = str(0) # set an arbitrary instance number
206
217
 
207
218
  if not os.path.exists(self.instance_number):
208
- os.makedirs(self.instance_number)
219
+ if kwargs.get('make_directory', True):
220
+ print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
221
+ os.makedirs(self.instance_number)
209
222
 
210
223
  if not hasattr(self, '_obj_1'):
224
+ print('_obj_1 required, define as bic, aic, ll')
211
225
  raise Exception
212
226
 
213
227
  self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
214
-
215
228
  self.pvalue_exceed = 0
216
229
  self._maximize = False # do we maximize or minimize?
217
- # self._random_forest_identify_transformations(x_data, y_data)
218
- # data_names = self._random_forest_preprocess(x_data, y_data)
230
+
219
231
  x_data = sm.add_constant(x_data)
232
+ standardize_the_data = 0
233
+ if standardize_the_data:
234
+ print('we are standardize the data')
235
+ x_data = self.self_standardize_positive(x_data)
236
+
220
237
  self._input_data(x_data, y_data)
238
+
239
+
221
240
  if y_data.ndim == 1:
222
241
  y_data = pd.DataFrame(y_data)
223
242
 
224
- # split the data for testing
243
+ '''
244
+ #TODO ADD THIS IN LATER
245
+ splitter = DataProcessor(x_data, y_data, kwargs)
246
+ self.copy_class_attributes(splitter) #inherit the self objects
247
+ '''
248
+
225
249
  if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MAE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
226
250
  self.test_percentage = float(kwargs.get('test_percentage', 0))
227
251
  self.val_percentage = float(kwargs.get('val_percentage', 0))
228
252
  if self.test_percentage == 0:
253
+ print('test percentage is 0, please enter arg test_percentage as decimal, eg 0.8')
254
+ print('continuing single objective')
255
+ time.sleep(2)
229
256
  self.is_multi = False
230
257
 
231
- if 'panels' in kwargs:
232
- self.group_names = np.asarray(x_data[kwargs['group']].astype(
233
- 'category').cat._parent.dtype.categories)
258
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
259
+ self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
234
260
 
235
261
  x_data[kwargs['group']] = x_data[kwargs['group']].astype(
236
262
  'category').cat.codes
@@ -243,59 +269,40 @@ class ObjectiveFunction(object):
243
269
 
244
270
  N = len(np.unique(x_data[kwargs['panels']].values))
245
271
  id_unique = np.unique(x_data[kwargs['panels']].values)
246
-
247
272
  except KeyError:
248
273
  N = len(np.unique(x_data[kwargs['panels']]))
274
+ id_unique = np.unique(x_data[kwargs['panels']].values)
249
275
 
250
276
  training_size = int((1 - self.test_percentage - self.val_percentage) * N)
251
277
  ids = np.random.choice(N, training_size, replace=False)
252
278
  ids = id_unique[ids]
253
279
  train_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val in ids]
254
280
  test_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val not in ids]
255
-
256
281
  df_train = x_data.loc[train_idx, :]
257
282
  df_test = x_data.loc[test_idx, :]
258
283
  y_train = y_data.loc[train_idx, :]
259
284
  y_test = y_data.loc[test_idx, :]
260
-
261
285
  else:
262
286
  N = len(x_data)
263
287
  training_size = int((1 - self.test_percentage - self.val_percentage) * N)
264
288
  ids = np.random.choice(N, training_size, replace=False)
265
289
  id_unique = np.array([i for i in range(N)])
266
290
  ids = id_unique[ids]
267
-
268
291
  train_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] in ids]
269
292
  test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
293
+ df_train = x_data.loc[train_idx, :]
294
+ df_test = x_data.loc[test_idx, :]
295
+ y_train = y_data.loc[train_idx, :]
296
+ y_test = y_data.loc[test_idx, :]
270
297
 
271
- try: # @IgnoreException
272
- df_train = x_data.loc[train_idx, :]
273
- df_test = x_data.loc[test_idx, :]
274
- y_train = y_data.loc[train_idx, :]
275
- y_test = y_data.loc[test_idx, :]
276
- except:
277
- # Convert all values to their real parts
278
- df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
279
-
280
- # Replace the original DataFrame's numerical columns with real-valued ones
281
- x_data[df_real.columns] = df_real
282
-
283
- df_train = x_data.iloc[train_idx, :]
284
- df_test = x_data.iloc[test_idx, :]
285
- y_train = y_data.iloc[train_idx, :]
286
- y_test = y_data.iloc[test_idx, :]
287
298
 
288
- self.n_obs = N
299
+ #self.n_obs = N
289
300
  self._characteristics_names = list(self._x_data.columns)
290
- if self.zi_force:
291
- self.alpha_hurdle = np.isin(self._characteristics_names,
292
- [item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
293
-
294
- self._max_group_all_means = 1
301
+ self._max_group_all_means = 2
295
302
 
296
303
  exclude_this_test = [4]
297
-
298
- if 'panels' in kwargs:
304
+
305
+ if 'panels' in kwargs and not (kwargs.get('panels') == None):
299
306
  self.panels = np.asarray(df_train[kwargs['panels']])
300
307
  self.panels_test = np.asarray(df_test[kwargs['panels']])
301
308
  self.ids = np.asarray(
@@ -309,9 +316,10 @@ class ObjectiveFunction(object):
309
316
  X, Y, panel, group = self._arrange_long_format(
310
317
  df_train, y_train, self.ids, self.ids, groupll)
311
318
  self.group_halton = group.copy()
312
- Y = Y.astype('float')
313
319
  self.group_dummies = pd.get_dummies(group)
314
320
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
321
+
322
+ Xnew = pd.DataFrame(Xnew, columns=X.columns)
315
323
  self.panel_info = panel_info
316
324
  self.N, self.P = panel_info.shape
317
325
  Xnew.drop(kwargs['panels'], axis=1, inplace=True)
@@ -319,12 +327,13 @@ class ObjectiveFunction(object):
319
327
  K = Xnew.shape[1]
320
328
  self._characteristics_names = list(Xnew.columns)
321
329
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
330
+ XX = XX.astype('float')
322
331
  self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
323
332
  self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
324
333
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
334
+ YY = YY.astype('float')
325
335
  self._x_data = XX.copy()
326
336
  self._y_data = YY.copy()
327
- # Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
328
337
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
329
338
  if np.max(group) > 50:
330
339
  exclude_this_test = [4]
@@ -344,6 +353,7 @@ class ObjectiveFunction(object):
344
353
  K = X.shape[1]
345
354
  self.columns_names = X.columns
346
355
  X = X.values.reshape(self.N_test, self.P_test, K)
356
+ X = X.astype('float')
347
357
  self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
348
358
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
349
359
  Y = Y.astype('float')
@@ -353,9 +363,10 @@ class ObjectiveFunction(object):
353
363
 
354
364
  self._samples, self._panels, self._characteristics = self._x_data.shape
355
365
 
356
- # i dont think i need this X, Y, group_info = self._balance_panels(XX, YY, group)
366
+
357
367
 
358
368
  else:
369
+ print('No Panels. Grouped Random Paramaters Will not be estimated')
359
370
  self.G = None
360
371
  self._Gnum = 1
361
372
  self._max_group_all_means = 0
@@ -372,82 +383,48 @@ class ObjectiveFunction(object):
372
383
  K = Xnew.shape[1]
373
384
  self._characteristics_names = list(Xnew.columns)
374
385
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
375
- # self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
376
- # self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
386
+ XX = XX.astype('float')
377
387
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
388
+ YY = YY.astype('float')
378
389
  self._x_data = XX.copy()
379
390
  self._y_data = YY.copy()
380
- # Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
391
+
381
392
  if self.is_multi:
382
393
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.ids_test, None)
383
394
  if np.max(group) > 50:
384
395
  exclude_this_test = [4]
385
396
  else:
386
397
  exclude_this_test = []
387
- # self.group_halton_test = group.copy()
388
398
  X, Y, panel_info = self._balance_panels(X, Y, panel)
389
-
399
+
390
400
  self.N_test, self.P_test = panel_info.shape
391
-
392
- # self.group_dummies_test = pd.get_dummies(group)
393
- # self.group_dummies_test = self.group_dummies_test.values.reshape(self.N_test, self.P_test, -1)
394
401
  K = X.shape[1]
395
402
  self.columns_names = X.columns
396
403
  X = X.values.reshape(self.N_test, self.P_test, K)
397
- # self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
404
+ X = X.astype('float')
398
405
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
406
+ Y = Y.astype('float')
399
407
  self._x_data_test = X.copy()
400
408
  self.y_data_test = Y.copy()
401
-
409
+
402
410
  self._samples, self._panels, self._characteristics = self._x_data.shape
403
411
 
404
- # draws and pvalue
405
-
406
- if 'Offset' in self._characteristics_names:
407
- offset = True
408
- self.have_offset = offset
409
- if self.have_offset is not None:
410
- try:
411
- # offset for training data
412
- # define offset
413
- val_od = self.get_named_indices(['Offset'])
414
- self._offsets = self._x_data[:, :, val_od]
415
-
416
- # drop the offset from the data
417
- self._x_data = np.delete(self._x_data, val_od, axis=2)
418
- self._characteristics_names = [x for x in self._characteristics_names if not 'Offset' in x]
419
- self._characteristics = len(self._characteristics_names)
420
- # self._x_data.drop(columns=['Offset'], inplace=True)
421
-
422
- # offset for testing data
423
- if self.is_multi:
424
- # define offset
425
- self._offsets_test = self._x_data_test[:, :, val_od]
426
- # self._offsets_test = self._x_data_test['Offset'].to_numpy()
427
- # self._offsets_test = np.reshape(
428
- # self._offsets_test, (-1, 1))
429
- # drop the offset from the data
430
- self._x_data_test = np.delete(self._x_data_test, val_od, axis=2)
431
- # self._x_data_test.drop(columns=['Offset'], inplace=True)
432
- except:
433
- # if no offset, set as 0
434
- self._offsets = np.zeros((self.N, self.P, 1))
435
- if self.is_multi:
436
- self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
437
- else:
438
- self._offsets = np.zeros((self.N, self.P, 1))
439
- if self.is_multi:
440
- self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
441
412
 
413
+ #Define the offset into the data
414
+ self.process_offset()
442
415
  if self.is_multi:
443
416
  self.pareto_printer = Pareto(self._obj_1, self._obj_2, True)
444
-
445
417
  self._pareto_population = list()
446
- self.Ndraws = 200 # todo: change back
418
+
419
+
420
+
421
+ self.Ndraws = kwargs.get('Ndraws', 200)
447
422
  self.draws1 = None
448
423
  self.initial_sig = 1 # pass the test of a single model
449
424
  self.pvalue_sig_value = .1
450
425
  self.observations = self._x_data.shape[0]
426
+ self.minimize_scaler = 1/self.observations # scale the minimization function to the observations
427
+
451
428
  self.batch_size = None
452
429
  # open the file in the write mode
453
430
  self.grab_transforms = 0
@@ -459,17 +436,19 @@ class ObjectiveFunction(object):
459
436
  print('Setup Complete...')
460
437
  else:
461
438
  print('No Panels Supplied')
439
+ print('Setup Complete...')
462
440
  self._characteristics_names = list(self._x_data.columns)
463
441
  # define the variables
464
442
  # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
465
443
  self._transformations = ["no", "sqrt", "log", "arcsinh"]
466
444
  self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
467
-
445
+ self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
468
446
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
469
447
 
470
- self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'lm_normal', 'tn_normal'])
448
+ self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
471
449
 
472
450
  if self.G is not None:
451
+ #TODO need to handle this for groups
473
452
  self._distribution = ["trad| " + item for item in self._distribution
474
453
  ] + ["grpd| " + item for item in self._distribution]
475
454
 
@@ -480,20 +459,33 @@ class ObjectiveFunction(object):
480
459
  self.coeff_ = None
481
460
 
482
461
  self.significant = 0
483
- # define the states of our explanaotory variables
462
+ # define the states of our explanatory variables
463
+
484
464
 
485
465
  self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
486
- kwargs.get('must_include', []))
466
+ kwargs.get('must_include', []), extra = kwargs.get('decisions', None))
467
+
468
+
469
+
487
470
  self._discrete_values = self._discrete_values + \
488
- [[x for x in self._distribution]] * self._characteristics
471
+ self.define_distributions_analyst(extra=kwargs.get('decisions', None))
489
472
 
490
473
  if 'model_types' in kwargs:
491
474
  model_types = kwargs['model_types']
492
475
  else:
493
- model_types = [[0, 1]] # add 2 for Generalized Poisson
494
476
 
477
+
478
+ model_types = [[0, 1]] # add 2 for Generalized Poisson
479
+ #model_types = [[0]]
480
+ #TODO change back and fix NB
481
+ model_t_dict = {'Poisson':0,
482
+ "NB":1}
483
+ # Retrieve the keys (model names) corresponding to the values in model_types
484
+ model_keys = [key for key, value in model_t_dict.items() if value in model_types[0]]
485
+ # Print the formatted result
486
+ print(f'The type of models possible will consider: {", ".join(model_keys)}')
495
487
  self._discrete_values = self._discrete_values + self.define_poissible_transforms(
496
- self._transformations) + model_types
488
+ self._transformations, kwargs.get('decisions',None)) + model_types
497
489
 
498
490
  self._model_type_codes = ['p', 'nb',
499
491
  'gp', "pl", ["nb-theta", 'nb-dis']]
@@ -506,21 +498,85 @@ class ObjectiveFunction(object):
506
498
  self.endog = None
507
499
  # solution parameters
508
500
  self._min_characteristics = 1
509
-
510
501
  self._max_hurdle = 4
511
502
 
512
- if "Manual_Fit" in kwargs and kwargs['Manual_Fit'] is not None:
503
+ #Manually fit from analyst specification
504
+ manual_fit = kwargs.get('Manual_Fit')
505
+ if manual_fit is not None:
506
+ print('fitting manual')
507
+ self.process_manual_fit(manual_fit)
508
+
509
+ self.solution_analyst = None
513
510
 
514
- self.initial_sig = 1 # pass the test of a single model
515
- self.pvalue_sig_value = 1
516
- # embed the solution to how you want it
517
- self.set_defined_seed(42)
518
- a = self.modify_initial_fit(kwargs['Manual_Fit'])
519
- self.makeRegression(a)
520
511
 
521
512
 
522
- find_constant = 0
523
- hard_code = 0
513
+
514
+ def over_ride_self(self, **kwargs):
515
+ """
516
+ Dynamically sets attributes on the instance based on the provided keyword arguments.
517
+ """
518
+ for key, value in kwargs.items():
519
+ setattr(self, key, value)
520
+ print(f"Updated attributes: {kwargs}")
521
+
522
+ def remove_offset(self, data, indices):
523
+ """ Remove offset data from the dataset """
524
+ new_data = np.delete(data, indices, axis=2)
525
+ return new_data
526
+
527
+ def process_offset(self):
528
+ """ Process offset if it exists in the characteristics """
529
+ try:
530
+ if 'Offset' in self._characteristics_names:
531
+ self.have_offset = True
532
+ val_od = self.get_named_indices(['Offset'])
533
+ self._offsets = self._x_data[:, :, val_od]
534
+ self._x_data = self.remove_offset(self._x_data, val_od)
535
+ self._characteristics_names = [x for x in self._characteristics_names if x != 'Offset']
536
+ self._characteristics = len(self._characteristics_names)
537
+
538
+ if self.is_multi:
539
+ self._offsets_test = self._x_data_test[:, :, val_od]
540
+ self._x_data_test = self.remove_offset(self._x_data_test, val_od)
541
+ print(self._offsets)
542
+ else:
543
+ self.initialize_empty_offsets()
544
+
545
+ except Exception as e:
546
+ print(f"An error occurred: {e}") # Better error handling
547
+ self.initialize_empty_offsets()
548
+
549
+ def initialize_empty_offsets(self):
550
+ """ Initialize offsets to zero if none are found or on error """
551
+ self._offsets = np.zeros((self.N, self.P, 1))
552
+ if self.is_multi:
553
+ self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
554
+
555
+
556
+ def copy_class_attributes(self, class_object):
557
+ '''
558
+ Loop through an
559
+ '''
560
+
561
+ # Loop through all attributes of the car object and copy them
562
+ for attr in vars(class_object):
563
+ setattr(self, attr, getattr(class_object, attr))
564
+
565
+
566
+ def process_manual_fit(self, manual_fit):
567
+ """Process the manual fit configuration."""
568
+ self.initial_sig = 1 # Example: Initialize some signal
569
+ self.pvalue_sig_value = 1 # Example: Initialize another signal
570
+ self.set_defined_seed(42) # Set a specific seed
571
+
572
+ modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
573
+ self.makeRegression(modified_fit) # Perform regression with the modified fit
574
+
575
+
576
+ def process_fit_specifications(self, find_constant, hard_code):
577
+ """
578
+ Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
579
+ """
524
580
  if hard_code:
525
581
  manual_fit_spec = {
526
582
  'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
@@ -559,7 +615,7 @@ class ObjectiveFunction(object):
559
615
  constant_values.append(self.beta_dict['const'][0][1])
560
616
  dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0, 0], [0, 0]])[0][1])
561
617
  except:
562
- print('d')
618
+ print('Error during regression analysis.')
563
619
  i += 1
564
620
 
565
621
  # Add the values of this iteration to the total
@@ -570,7 +626,7 @@ class ObjectiveFunction(object):
570
626
  constant_values_avg = [x / 100 for x in constant_values_total]
571
627
  dispersion_values_avg = [x / 100 for x in dispersion_values_total]
572
628
 
573
- self.solution_analyst = None
629
+ return constant_values_avg, dispersion_values_avg
574
630
 
575
631
 
576
632
  def _balance_panels(self, X, y, panels): # ToDO re
@@ -615,22 +671,7 @@ class ObjectiveFunction(object):
615
671
 
616
672
  return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
617
673
 
618
- def _random_forest_call_r(self):
619
- import rpy2.rinterface as rinterface
620
- import rpy2.robjects as robjects
621
- import rpy2.robjects as ro
622
- from rpy2.robjects import pandas2ri
623
- r = robjects.r
624
- r['source']('testML.R')
625
- pandas2ri.activate()
626
- RF_function_r = robjects.globalenv['RF_plot']
627
- RF_function_corr_r = robjects.globalenv['RF_plot_corr']
628
- r_df = ro.conversion.py2rpy(self._x_data)
629
- y_dy = ro.conversion.py2rpy(self._y_data)
630
- RF_function_r(r_df, y_dy)
631
-
632
- print('did this work')
633
- RF_function_corr_r(r_df, y_dy)
674
+
634
675
 
635
676
  def print_system_utilization(self):
636
677
  # Get CPU usage
@@ -647,7 +688,8 @@ class ObjectiveFunction(object):
647
688
  mem_free = round(mem_info.available /
648
689
  (1024 * 1024), 2) # Convert to MB
649
690
  print(
650
- f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total)")
691
+ f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total / "
692
+ f" mem free {mem_free})")
651
693
 
652
694
  def _arrange_long_format(self, X, y, ids, panels=None, groups=None):
653
695
  '''converts the data to long format'''
@@ -665,59 +707,14 @@ class ObjectiveFunction(object):
665
707
  if group is not None:
666
708
  group = group[sorted_idx]
667
709
 
668
- return X, y, pnl, group
669
-
670
- pandas_sort = 1
671
- if pandas_sort:
672
- if ids is not None:
673
-
674
- pnl = panels if panels is not None else np.ones(len(ids))
675
- df = X
676
-
677
- df['panels'], df['ids'] = pnl, ids
678
- new = 0
679
- if new:
680
- cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
681
- 'formats': ['<f4', '<f4']})
682
- cols['panels'], cols['ids'] = pnl, ids
683
- sorted_idx = np.argsort(cols, order=['panels', 'ids'])
684
- X, y = X[sorted_idx], y[sorted_idx]
685
- if panels is not None:
686
- panels = panels[sorted_idx]
687
- return X, y, panels
688
-
689
- df = pd.concat([X.reset_index(drop=True),
690
- y.reset_index(drop=True)], axis=1)
691
- sorted_df = df.sort_values(
692
- ['panels', 'ids']).reset_index(drop=True)
693
-
694
- X, y, panels = sorted_df.iloc[:, :-
695
- 3], sorted_df.iloc[:, -3:-2], sorted_df.iloc[:, -2]
696
- if panels is not None:
697
- # panels = panels[sorted_idx]
698
- P_i = (
699
- (np.unique(panels, return_counts=True)[1])).astype(int)
700
- P = np.max(P_i)
701
- N = len(P_i)
702
- print(1)
703
- return X, y, panels
704
-
705
- if ids is not None:
706
- X = np.asarray(X)
707
- y = np.asarray(y)
708
- pnl = panels if panels is not None else np.ones(len(ids))
710
+ return X, y.astype('float'), pnl, group
709
711
 
710
- cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
711
- 'formats': ['<f4', '<f4']})
712
- cols['panels'], cols['ids'] = pnl, ids
713
- sorted_idx = np.argsort(cols, order=['panels', 'ids'])
714
- X, y = X[sorted_idx], y[sorted_idx]
715
- if panels is not None:
716
- panels = panels[sorted_idx]
717
-
718
- return X, y, panels
712
+ return X, y.astype('float'), panels
719
713
 
720
714
  def _random_forest_identify_transformations(self, x_data, y_data):
715
+ '''
716
+ use the random forrest model to identify best feature
717
+ '''
721
718
  # let's use the pprint module for readability
722
719
  import inspect
723
720
  from pprint import pprint
@@ -814,6 +811,8 @@ class ObjectiveFunction(object):
814
811
  if dispersion == 0:
815
812
  return None, None
816
813
  elif dispersion == 2 or dispersion == 1:
814
+ if self.no_extra_param:
815
+ return self.nb_parma, None
817
816
  return betas[-1], None
818
817
 
819
818
  elif dispersion == 3:
@@ -841,14 +840,65 @@ class ObjectiveFunction(object):
841
840
  par = np.nan_to_num(par)
842
841
  return par
843
842
 
844
- def define_alphas(self, complexity_level=4, exclude=[], include=[]):
843
+ def rename_distro(self, distro):
844
+ # Mapping dictionary
845
+ mapping = {
846
+ 'normal': ['normal', 'n', 'Normal'],
847
+ 'triangular': ['triangular', 't', 'Triangular'],
848
+ 'uniform': ['uniform', 'u', 'Uniform'],
849
+ 'ln_normal': ['ln_normal', 'ln_n', 'Ln_Normal'],
850
+ 'tn_normal': ['tn_normal', 'tn_n', 'trunc_normal']
851
+ }
852
+
853
+ # Use list comprehension with the mapping
854
+ reversed_mapping = {value: key for key, values in mapping.items() for value in values}
855
+
856
+ # Use the reversed mapping to find the corresponding key
857
+ new_distro = [reversed_mapping.get(i, i) for i in distro]
858
+ return new_distro
859
+
860
+ def define_distributions_analyst(self, extra = None):
861
+
862
+ if extra is not None:
863
+ set_alpha = []
864
+ for col in self._characteristics_names:
865
+ if col in extra[('Column')].values:
866
+ matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
867
+ distro = ast.literal_eval(extra.iloc[matched_index, 7].values.tolist()[0])
868
+ distro = self.rename_distro(distro)
869
+ set_alpha = set_alpha+[distro]
870
+ elif col == 'const':
871
+ set_alpha = set_alpha +[['normal']]
872
+ return set_alpha
873
+ return [[x for x in self._distribution]] * self._characteristics
874
+
875
+
876
+
877
+
878
+ def define_alphas(self, complexity_level=4, exclude=[], include=[], extra = None):
845
879
  'complexity level'
846
880
  '''
847
881
  2 is feature selection,
848
- 3 is random paramaters
849
- 4 is correlated random paramaters
882
+ 3 is random parameters
883
+ 4 is correlated random parameters
884
+
885
+ extra is the stuff defined by the Meta APP
850
886
  '''
851
887
  set_alpha = []
888
+ if extra is not None:
889
+ for col in self._characteristics_names:
890
+ if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
891
+ set_alpha = set_alpha + [[1]]
892
+ elif col == 'Offset':
893
+ set_alpha = set_alpha + [[1]]
894
+
895
+ elif col in extra[('Column')].values:
896
+ matched_index = extra[('Column')].index[extra[('Column')] == col].tolist()
897
+ check = list(itertools.chain(*extra.iloc[matched_index, 1:7].values))
898
+ set_alpha = set_alpha + [[x for x in range(len(check)) if check[x] == True]]
899
+ return set_alpha
900
+
901
+
852
902
  for col in self._characteristics_names:
853
903
  if col == 'const' or col == 'Constant' or col == 'constant': # no random paramaters for const
854
904
  set_alpha = set_alpha + [[1]]
@@ -866,7 +916,6 @@ class ObjectiveFunction(object):
866
916
  def pvalue_asterix_add(self, pvalues):
867
917
  pvalue_ast = list()
868
918
  for i in range(len(pvalues)):
869
- signif = ""
870
919
  if float(pvalues[i]) < 0.001:
871
920
  signif = "***"
872
921
  elif float(pvalues[i]) < 0.01:
@@ -899,8 +948,14 @@ class ObjectiveFunction(object):
899
948
 
900
949
  return ([self._model_type_codes[dispersion]])
901
950
 
902
- def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
903
- zi_fit=None, obj_1=None, model_nature=None):
951
+ def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
952
+ '''
953
+ setup for naming of the model summary
954
+ '''
955
+ if self.no_extra_param and dispersion ==1:
956
+
957
+ betas = np.append(betas, self.nb_parma)
958
+
904
959
  self.name_deleter = []
905
960
  group_rpm = None
906
961
  group_dist = []
@@ -911,8 +966,7 @@ class ObjectiveFunction(object):
911
966
  rdm_fit = self.none_handler(self.rdm_fit)
912
967
  if rdm_cor_fit is None:
913
968
  rdm_cor_fit = self.none_handler(self.rdm_cor_fit)
914
- if zi_fit is None:
915
- zi_fit = self.none_handler(self.zi_fit)
969
+
916
970
  dis_fit = [x for x in self.none_handler(
917
971
  self.dist_fit)] # check if dis fit is name
918
972
 
@@ -977,18 +1031,18 @@ class ObjectiveFunction(object):
977
1031
  br_w_names = [randvars[i] + " (Std. Dev.) " + rand_vars_dis[i]
978
1032
  for i in range(len(randvars))]
979
1033
 
980
- zi_names = [x + ":inflated" for x in self.none_handler(self.zi_force_names)]
1034
+
981
1035
 
982
1036
  names = fixednames + randvars + chol_names + \
983
- br_w_names + chol_part_1 + chol + zi_names + hetro_long + dispersion_name
1037
+ br_w_names + chol_part_1 + chol + hetro_long + dispersion_name
984
1038
  self.name_deleter = fixednames + randvars + chol_names + randvars + [chol_names[i] for i
985
1039
  in range(len(chol_names)) for j in
986
1040
  range(
987
- i + 1)] + zi_names + dispersion_name # TODO does this break
1041
+ i + 1)] + dispersion_name # TODO does this break
988
1042
  name_delete_2 = fixednames + randvars + chol_names + randvars + [chol_names[i] + "/" +
989
1043
  chol_names[j] for i
990
1044
  in range(len(chol_names)) for j in
991
- range(i + 1)] + zi_names + dispersion_name
1045
+ range(i + 1)] + dispersion_name
992
1046
  index_dict = {}
993
1047
  for i, name in enumerate(name_delete_2):
994
1048
  split_names = name.split('/')
@@ -1012,22 +1066,24 @@ class ObjectiveFunction(object):
1012
1066
  randvars = [x for x in self.none_handler(rdm_fit)]
1013
1067
  chol_names = [x for x in self.none_handler(rdm_cor_fit)]
1014
1068
 
1015
- zi_names = [x + ': inflated' for x in self.none_handler(self.zi_force_names)]
1016
1069
 
1017
- names = fixednames + randvars + chol_names + zi_names + big_hetro + dispersion_name
1070
+
1071
+ names = fixednames + randvars + chol_names + big_hetro + dispersion_name
1018
1072
 
1019
1073
  names = np.array(names) # TODO check order
1020
1074
  self.print_transform = self.transform_id_names + \
1021
1075
  [''] * (len(names) - len(self.transform_id_names))
1022
1076
  self.coeff_names = names
1023
1077
 
1078
+ '''
1024
1079
  if betas is not None:
1025
1080
  try:
1026
1081
  if len(betas) != len(names):
1027
- print('no draws is', no_draws)
1028
- print('fix_theano')
1082
+ print('standard_model', no_draws)
1083
+
1029
1084
  except Exception as e:
1030
1085
  print(e)
1086
+ '''
1031
1087
 
1032
1088
 
1033
1089
 
@@ -1052,22 +1108,9 @@ class ObjectiveFunction(object):
1052
1108
  if not isinstance(self.pvalues, np.ndarray):
1053
1109
  raise Exception
1054
1110
 
1055
- for i in range(len(self.coeff_)):
1056
- signif = ""
1057
-
1058
- if float(self.pvalues[i]) < 0.01:
1059
- signif = "***"
1060
- elif float(self.pvalues[i]) < 0.05:
1061
- signif = "**"
1062
- elif float(self.pvalues[i]) < 0.1:
1063
- signif = "*"
1111
+ if 'nb' in self.coeff_names and self.no_extra_param:
1112
+ self.pvalues = np.append(self.pvalues,0)
1064
1113
 
1065
- '''
1066
- print(fmt.format(self.coeff_names[i][:coeff_name_str_length], self.print_transform[i], self.coeff_[i],
1067
- self.stderr[i], self.zvalues[i], self.pvalues[i],
1068
- signif
1069
- ))
1070
- '''
1071
1114
  if self.please_print or save_state:
1072
1115
 
1073
1116
  if self.convergance is not None:
@@ -1082,17 +1125,22 @@ class ObjectiveFunction(object):
1082
1125
 
1083
1126
  if solution is not None:
1084
1127
  print(f"{self._obj_2}: {self.round_with_padding(solution[self._obj_2], 2)}")
1085
-
1128
+
1086
1129
  self.pvalues = [self.round_with_padding(
1087
1130
  x, 2) for x in self.pvalues]
1088
1131
  signif_list = self.pvalue_asterix_add(self.pvalues)
1089
1132
  if model == 1:
1090
1133
 
1091
- self.coeff_[-1] = np.abs(self.coeff_[-1])
1092
- if self.coeff_[-1] < 0.25:
1134
+ #self.coeff_[-1] = 1/np.exp(self.coeff_[-1])
1135
+ if self.no_extra_param:
1136
+ self.coeff_ = np.append(self.coeff_, self.nb_parma)
1137
+ self.stderr = np.append(self.stderr, 0.00001)
1138
+ self.zvalues = np.append(self.zvalues, 50)
1139
+
1140
+ elif self.coeff_[-1] < 0.25:
1093
1141
  print(self.coeff_[-1], 'Warning Check Dispersion')
1094
1142
  print(np.exp(self.coeff_[-1]))
1095
- self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1143
+ #self.coeff_[-1] = np.exp(self.coeff_[-1]) # min possible value for negbinom
1096
1144
 
1097
1145
  self.coeff_ = [self.round_with_padding(x, 2) for x in self.coeff_]
1098
1146
 
@@ -1175,14 +1223,7 @@ class ObjectiveFunction(object):
1175
1223
  self.save_to_file(latextable.draw_latex(
1176
1224
  table, caption=caption, caption_above=True), file_name)
1177
1225
 
1178
- # print('change this')
1179
- # df = pd.read_csv("artificial_mixed_corr_2023_MOOF.csv")
1180
-
1181
- # updating the column value/data
1182
- # df['Y'] = np.mean(self.lam, axis = (1,2))
1183
1226
 
1184
- # writing into the file
1185
- # df.to_csv("artificial_mixed_corr_2023_MOOF.csv", index=False)
1186
1227
 
1187
1228
  def summary(self, model=None, algorithm=None, transform_list=None, long_print=0, solution=None):
1188
1229
  """
@@ -1311,7 +1352,7 @@ class ObjectiveFunction(object):
1311
1352
  with open(filename, 'w') as file:
1312
1353
  file.write(content)
1313
1354
 
1314
- def define_poissible_transforms(self, transforms) -> list:
1355
+ def define_poissible_transforms(self, transforms, extra= None) -> list:
1315
1356
  transform_set = []
1316
1357
  if not isinstance(self._x_data, pd.DataFrame):
1317
1358
  x_data = self._x_data.reshape(self.N * self.P, -1).copy()
@@ -1322,6 +1363,7 @@ class ObjectiveFunction(object):
1322
1363
 
1323
1364
  if 'AADT' in self._characteristics_names[col]:
1324
1365
  new_transform = [['log']]
1366
+ #new_transform = [['no']]
1325
1367
  transform_set = transform_set + new_transform
1326
1368
 
1327
1369
  elif all(x_data[col] <= 5):
@@ -1361,6 +1403,18 @@ class ObjectiveFunction(object):
1361
1403
 
1362
1404
  return transform_set
1363
1405
 
1406
+ def poisson_mean_get_dispersion(self, betas, X, y):
1407
+ eVy = self._loglik_gradient(betas, X, y, None, X, None, None, False, False, dispersion=0,
1408
+ return_EV=True,
1409
+ zi_list=None, draws_grouped=None, Xgroup=None)
1410
+
1411
+ ab = ((y - eVy)**2 - eVy)/eVy
1412
+ bb = eVy -1
1413
+ disp = sm.OLS(ab.ravel(), bb.ravel()).fit()
1414
+ gamma = disp.params[0]
1415
+ #print(f'dispersion is {gamma}')
1416
+ return gamma
1417
+
1364
1418
  def validation(self, betas, y, X, Xr=None, dispersion=0, rdm_cor_fit=None, zi_list=None, exog_infl=None,
1365
1419
  model_nature=None, halton=1, testing=1, validation=0):
1366
1420
  'validation if mu needs to be calculated'
@@ -1394,7 +1448,7 @@ class ObjectiveFunction(object):
1394
1448
  XG = model_nature.get('XGtest')[:total_percent, :, :]
1395
1449
  else:
1396
1450
  XG = model_nature.get('XGtest')[total_percent:, :, :]
1397
- print('chekc this is doing it wright')
1451
+
1398
1452
  else:
1399
1453
  if 'XG' in model_nature:
1400
1454
  XG = model_nature.get('XG')
@@ -1516,7 +1570,7 @@ class ObjectiveFunction(object):
1516
1570
  5: herogeneity_in _means
1517
1571
 
1518
1572
 
1519
- a: how to transofrm the original data
1573
+ a: how to transform the original data
1520
1574
  b: grab dispersion '''
1521
1575
 
1522
1576
  # todo: better way
@@ -1540,22 +1594,9 @@ class ObjectiveFunction(object):
1540
1594
  alpha_hetro = [
1541
1595
  0 if x != 5 else 1 for x in vector[:self._characteristics]]
1542
1596
 
1543
- if self.zi_force == True:
1544
1597
 
1545
- return {
1546
- 'alpha': alpha,
1547
- 'alpha_rdm': alpha_rdm,
1548
- 'alpha_cor_rdm': alpha_cor_rdm,
1549
- 'alpha_grouped': alpha_grouped,
1550
- 'alpha_hetro': alpha_hetro,
1551
- 'distributions': distributions,
1552
- 'transformations': transformations,
1553
- 'exog_infl': self.zi_force_names,
1554
- 'dispersion': dispersion
1555
- }
1556
1598
 
1557
- else:
1558
- return {
1599
+ return {
1559
1600
  'alpha': alpha,
1560
1601
  'alpha_rdm': alpha_rdm,
1561
1602
  'alpha_cor_rdm': alpha_cor_rdm,
@@ -1563,7 +1604,6 @@ class ObjectiveFunction(object):
1563
1604
  'alpha_hetro': alpha_hetro,
1564
1605
  'distributions': distributions,
1565
1606
  'transformations': transformations,
1566
-
1567
1607
  'dispersion': dispersion
1568
1608
  }
1569
1609
 
@@ -1599,7 +1639,7 @@ class ObjectiveFunction(object):
1599
1639
 
1600
1640
  def repair(self, vector, reduce_to_this=10000): # todo get the number of parameters
1601
1641
  'Method to repair the model so that the number of paramaters is held within the constraint'
1602
- b = 0
1642
+
1603
1643
  new_j = 0
1604
1644
  # extract explanatory vector
1605
1645
  prmVect = vector[:self._characteristics]
@@ -1618,7 +1658,6 @@ class ObjectiveFunction(object):
1618
1658
  int(np.min((5, self.complexity_level - 1)))])
1619
1659
 
1620
1660
  count_3 = prmVect.count(3)
1621
- this_many = count_3 * (count_3 + 1) / 2
1622
1661
 
1623
1662
  vector[:len(prmVect)] = prmVect.copy()
1624
1663
 
@@ -1637,8 +1676,7 @@ class ObjectiveFunction(object):
1637
1676
  # b = sum(prmVect) + self.is_dispersion(vector[-1])
1638
1677
  max_loops = 100 # Maximum number of loops
1639
1678
  counter = 0 # Counter variable to keep track of the number of loops
1640
- if any(isinstance(num, int) and num < 0 for num in vector):
1641
- raise Exception('fhfhfhf')
1679
+
1642
1680
 
1643
1681
  while b > self._max_characteristics and counter < max_loops or b > reduce_to_this:
1644
1682
 
@@ -1686,8 +1724,6 @@ class ObjectiveFunction(object):
1686
1724
  counter += 1
1687
1725
 
1688
1726
  counter = 0
1689
- if any(isinstance(num, int) and num < 0 for num in vector):
1690
- raise Exception('fhfhfhf')
1691
1727
  while b < self._min_characteristics and counter < max_loops:
1692
1728
 
1693
1729
  weights = [1 if x == 0 else 0 for x in only_ints_vals]
@@ -1734,13 +1770,13 @@ class ObjectiveFunction(object):
1734
1770
  cor_l = 0 if self.rdm_cor_fit is None else len(self.rdm_cor_fit)
1735
1771
  Kh = 0 if self.hetro_fit is None else len(self.hetro_fit) + len(set(self.dist_hetro))
1736
1772
 
1737
- zi_terms = 0 if self.zi_fit is None else len(self.zi_fit)
1773
+
1738
1774
  Kchol = int((cor_l *
1739
1775
  (cor_l + 1)) / 2)
1740
1776
  n_coeff = Kf + Kr + cor_l + Kchol + Kr_b + Kh
1741
1777
  if block:
1742
- return [Kf, Kr, cor_l, Kr_b, Kchol, Kh, zi_terms]
1743
- return Kf, Kr, cor_l, Kr_b, Kchol, Kh, zi_terms
1778
+ return [Kf, Kr, cor_l, Kr_b, Kchol, Kh]
1779
+ return Kf, Kr, cor_l, Kr_b, Kchol, Kh
1744
1780
 
1745
1781
  def find_index_of_block(self, lst, value):
1746
1782
  cumulative_sum = 0
@@ -1821,8 +1857,7 @@ class ObjectiveFunction(object):
1821
1857
  self.rdm_fit)):
1822
1858
  raise Exception('pop wrong for id names')
1823
1859
 
1824
- # return 'need to delete all of the dups'
1825
- # self.rdm_cor_fit.pop(self.name_deleter(idx))
1860
+
1826
1861
 
1827
1862
  def get_value_to_delete(self, idx, dispersion):
1828
1863
  block = self.get_num_params(True)
@@ -1858,8 +1893,7 @@ class ObjectiveFunction(object):
1858
1893
  self.dist_fit.pop(cc[b] + len(self.rdm_fit))
1859
1894
  self.transform_id_names.pop(
1860
1895
  cc[b] + len(self.none_handler(self.fixed_fit)) + len(self.none_handler(self.rdm_fit)))
1861
- # return 'need to delete all of the dups'
1862
- # self.rdm_cor_fit.pop(self.name_deleter(idx))
1896
+
1863
1897
 
1864
1898
  def get_param_num(self, dispersion=0):
1865
1899
  a = np.sum(self.get_num_params()) + \
@@ -1884,13 +1918,16 @@ class ObjectiveFunction(object):
1884
1918
  elif dispersion == 4:
1885
1919
  return 2
1886
1920
  else:
1887
- return 1
1921
+ if self.no_extra_param:
1922
+ return 0
1923
+ else:
1924
+ return 1
1888
1925
 
1889
1926
  def get_pvalue_info_alt(self, pvalues, names, sig_value=0.05, dispersion=0, is_halton=1, delete=0,
1890
1927
  return_violated_terms=0):
1891
1928
 
1892
1929
  num_params = len(pvalues)
1893
- Kf, Kr, Kc, Kr_b, Kchol, Kh, zi_b = self.get_num_params()
1930
+ Kf, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
1894
1931
 
1895
1932
  vio_counts = 0
1896
1933
  pvalues = np.array([float(string) for string in pvalues])
@@ -1899,6 +1936,7 @@ class ObjectiveFunction(object):
1899
1936
 
1900
1937
  else:
1901
1938
  slice_this_amount = self.num_dispersion_params(dispersion)
1939
+ slice_this_amount = 1 #TODO handle this
1902
1940
  if pvalues[-1] > sig_value:
1903
1941
  vio_counts += 1
1904
1942
  subpvalues = pvalues[:-slice_this_amount].copy()
@@ -1915,18 +1953,14 @@ class ObjectiveFunction(object):
1915
1953
  subpvalues[i] = 0
1916
1954
 
1917
1955
  sum_k += Kr_b
1918
- if Kchol > 0:
1919
- cc = [i for i
1920
- in range(len(self.rdm_cor_fit)) for j in range(i + 1)]
1921
-
1922
1956
  lower_triangular = subpvalues[sum_k:sum_k + Kchol]
1923
1957
 
1924
- n = Kc # compute matrix size
1958
+
1925
1959
  # initialize matrix with zeros
1926
- matrix_alt = [[0] * n for _ in range(n)]
1960
+ matrix_alt = [[0] * Kc for _ in range(Kc)]
1927
1961
  index = 0
1928
1962
 
1929
- for i in range(n):
1963
+ for i in range(Kc):
1930
1964
  for j in range(i + 1):
1931
1965
  # fill in lower triangular entries
1932
1966
  matrix_alt[i][j] = lower_triangular[index]
@@ -2327,7 +2361,7 @@ class ObjectiveFunction(object):
2327
2361
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2328
2362
 
2329
2363
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2330
- obj_1 = 10.0 ** 8
2364
+ obj_1 = 10.0 ** 5
2331
2365
  obj_best = None
2332
2366
  sub_slns = list()
2333
2367
 
@@ -2338,7 +2372,7 @@ class ObjectiveFunction(object):
2338
2372
  try:
2339
2373
  self.repair(vector)
2340
2374
  except Exception as e:
2341
- print('prob here')
2375
+ print('prolem repairing here')
2342
2376
  print(vector)
2343
2377
  print(e)
2344
2378
  layout = vector.copy()
@@ -2414,17 +2448,9 @@ class ObjectiveFunction(object):
2414
2448
 
2415
2449
  if obj_1 is not None:
2416
2450
  obj_1['layout'] = vector.copy()
2417
- # alpha, alpha_rdm, alpha_cor_rdm = self.modify(
2418
- # obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'])
2419
- # a = self.modifyn(model_mod)
2420
- # vector = self.modify_vector(
2421
- # vector, alpha, alpha_rdm, alpha_cor_rdm)
2422
2451
  sub_vector = vector[:self._characteristics]
2423
2452
  dispersion_parm = vector[-1]
2424
- if dispersion_parm == 0:
2425
- num_parm = sum(sub_vector)
2426
- else:
2427
- num_parm = sum(sub_vector) + 1
2453
+
2428
2454
 
2429
2455
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2430
2456
  obj_1[self._obj_1] = 10 ** 9
@@ -2457,7 +2483,7 @@ class ObjectiveFunction(object):
2457
2483
 
2458
2484
  self.Last_Sol = obj_1.copy()
2459
2485
 
2460
- #
2486
+
2461
2487
 
2462
2488
  self.reset_sln()
2463
2489
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
@@ -2495,7 +2521,7 @@ class ObjectiveFunction(object):
2495
2521
  self.coeff_names = None
2496
2522
  self.draws1 = None
2497
2523
  self.coeff_ = None
2498
- self.zi_fit = None
2524
+
2499
2525
  self.bic = None
2500
2526
  self.log_lik = None
2501
2527
  self.pvalues = None
@@ -2589,13 +2615,13 @@ class ObjectiveFunction(object):
2589
2615
  def set_defined_seed(self, seed):
2590
2616
  print('Benchmaking test with Seed', seed)
2591
2617
  np.random.seed(seed)
2592
- #pd.random.seed(seed)
2618
+
2593
2619
  random.seed(seed)
2594
2620
 
2595
2621
  def set_random_seed(self):
2596
- print('Imbdedding Seed', self._random_seed)
2622
+ print('Imbedding Seed', self._random_seed)
2597
2623
  np.random.seed(self._random_seed)
2598
- #pd.random.seed(self._random_seed)
2624
+
2599
2625
  random.seed(self._random_seed)
2600
2626
  return self._random_seed
2601
2627
 
@@ -2627,7 +2653,7 @@ class ObjectiveFunction(object):
2627
2653
  self._hmcr = (
2628
2654
  self._hmcr_min + ((self._hmcr_max - self._hmcr_min) / self._max_imp) * iteration)
2629
2655
 
2630
- # end def
2656
+
2631
2657
 
2632
2658
  def update_par(self, iteration, is_sin=False):
2633
2659
  """
@@ -2720,85 +2746,9 @@ class ObjectiveFunction(object):
2720
2746
  print(e)
2721
2747
  print('f')
2722
2748
 
2723
- def negbinom_gradients(r, p, k, a=None): # TODO: delete if wrong
2724
- """_summary_
2725
2749
 
2726
- Args:
2727
- r (_type_): rate paramaters or dispersion of the nb
2728
- p (_type_): probability
2729
- k (_type_): vector of (non-negative integer) quantiles.
2730
- a (_type_, optional): optional paramater, if none NB model, otherwise NB-Lindley model with Lindley paramater a.
2731
2750
 
2732
- Raises:
2733
- Exception: _description_
2734
- Exception: _description_
2735
- ValueError: _description_
2736
- Exception: _description_
2737
- Exception: _description_
2738
2751
 
2739
- Returns:
2740
- _type_: _description_
2741
- """
2742
- # fine the NegBinom PMF
2743
- import scipy.special as sps
2744
- negbinom_pmf = sps.comb(k + r - 1, k) * p ** r * (1 - p) ** k
2745
-
2746
- # Calculate the gradient of the NegBinom PMF with respect to r and p
2747
- d_negbinom_pmf_dr = sps.comb(
2748
- k + r - 1, k) * (np.log(p) - sps.digamma(r)) * p ** r * (1 - p) ** k
2749
- d_negbinom_pmf_dp = sps.comb(
2750
- k + r - 1, k) * (r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k
2751
-
2752
- if a is not None:
2753
- # Define the NegBinom-Lindley PMF
2754
- negbinom_lindley_pmf = sps.comb(a + k - 1, k) * p ** r * (1 - p) ** k
2755
-
2756
- # Calculate the gradient of the NegBinom-Lindley PMF with respect to r, p, and a
2757
- d_negbinom_lindley_pmf_dr = sps.comb(
2758
- a + k - 1, k) * (np.log(p) * p ** r * (1 - p) ** k)
2759
- d_negbinom_lindley_pmf_dp = sps.comb(
2760
- a + k - 1, k) * ((r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k)
2761
- d_negbinom_lindley_pmf_da = sps.comb(
2762
- a + k - 1, k) * (-sps.digamma(a) + sps.digamma(a + k)) * p ** r * (1 - p) ** k
2763
-
2764
- return [d_negbinom_pmf_dr, d_negbinom_pmf_dp], [d_negbinom_lindley_pmf_dr, d_negbinom_lindley_pmf_dp,
2765
- d_negbinom_lindley_pmf_da]
2766
- else:
2767
- return [d_negbinom_pmf_dr, d_negbinom_pmf_dp]
2768
-
2769
- def f(self, x, N, sig, mu):
2770
- return norm.pdf(x, 0, 1) * poisson.pmf(N, np.exp(x * sig + mu))
2771
-
2772
- def poilog(self, n, mu, sig):
2773
- from scipy import integrate
2774
- if len(mu) > 1 or len(sig) > 1:
2775
- raise ValueError(
2776
- "vectorization of mu and sig is currently not implemented")
2777
- if any((n[n != 0] / np.trunc(n[n != 0])) != 1):
2778
- raise ValueError("all n must be integers")
2779
- if any(n < 0):
2780
- raise ValueError("one or several values of n are negative")
2781
- if not np.all(np.isfinite(np.concatenate((mu, sig)))):
2782
- raise ValueError("all parameters should be finite")
2783
- if sig <= 0:
2784
- raise ValueError("sig is not larger than 0")
2785
- spos = np.where(n < 8)[0]
2786
- lpos = np.where(n >= 8)[0]
2787
- val = np.empty_like(n)
2788
-
2789
- if spos.size > 0:
2790
- vali = np.empty(spos.size)
2791
- for i in range(spos.size):
2792
- try:
2793
- vali[i] = integrate.quad(
2794
- self.f, -np.inf, np.inf, sig, mu, args=(n[spos[i]],))[0]
2795
- except:
2796
- vali[i] = 1e-300
2797
- valp = self.poilog(n[spos], mu, sig ** 2)[0]
2798
- val[spos] = np.maximum(vali, valp)
2799
- if lpos.size > 0:
2800
- val[lpos] = self.poilog(n[lpos], mu, sig ** 2)[0]
2801
- return val
2802
2752
 
2803
2753
  def negbinom_pmf(self, r, p, k, a=None): # TODO: delete if wrong
2804
2754
  """_summary_
@@ -2828,45 +2778,7 @@ class ObjectiveFunction(object):
2828
2778
  negbinom_lindley_pmf = sc.comb(a + k - 1, k) * p ** r * (1 - p) ** k
2829
2779
  return negbinom_lindley_pmf
2830
2780
 
2831
- def nbl_score(self, y, X, betas, alpha, theta):
2832
- from scipy.special import gammaln, psi
2833
- """
2834
- Calculate the Negative Binomial-lindley model score vector of the log-likelihood.
2835
-
2836
- Parameters:
2837
- -----------
2838
- y : numpy array
2839
- The dependent variable of the model.
2840
- X : numpy array
2841
- The independent variables of the model.
2842
- beta : numpy array
2843
- The coefficients of the model.
2844
- alpha : float
2845
- The dispersion parameter of the Negative Binomial-lindley distribution.
2846
- theta : float
2847
- The theta parameter of the Negative Binomial-lindley distribution.
2848
-
2849
- Returns:
2850
- --------
2851
- score : numpy array
2852
- The score vector of the Negative Binomial-lindley model log-likelihood.
2853
- """
2854
- alpha = betas[-1]
2855
- theta = betas[-2]
2856
- beta = betas[:-2]
2857
- zi = self.my_lindley(y, theta).ravel()
2858
-
2859
- eta = np.dot(X, beta)
2860
- mu = np.exp(eta) * zi
2861
- p = 1 / (1 + mu * theta / alpha)
2862
- q = 1 - p
2863
- score = np.zeros(len(betas))
2864
-
2865
- for i in range(len(y)):
2866
- score += (psi(y[i] + zi[i] * p[i]) - psi(alpha * p[i]) + np.log(zi[i])
2867
- - np.log(1 + zi * mu[i] / alpha)) * X[i, :]
2868
2781
 
2869
- return score
2870
2782
 
2871
2783
  def poisson_lognormal_glm_score(self, betas, Y, X, sigma, tau=1e-6):
2872
2784
  """
@@ -2909,7 +2821,7 @@ class ObjectiveFunction(object):
2909
2821
  grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
2910
2822
  return gradient, grad_n
2911
2823
 
2912
- def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
2824
+ def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
2913
2825
  """
2914
2826
  Negative Binomial model score (gradient) vector of the log-likelihood
2915
2827
  Parameters
@@ -2929,9 +2841,43 @@ class ObjectiveFunction(object):
2929
2841
 
2930
2842
  """
2931
2843
 
2932
- try:
2844
+ # Calculate common terms
2845
+ '''
2846
+ n = len(y)
2847
+ n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
2848
+
2849
+ # Flatten the data since there's only one panel, simplifying the operations
2850
+ X_flat = X.reshape(n * p, d)
2851
+ y_flat = y.flatten()
2852
+ mu_flat = mu.flatten()
2853
+
2854
+ # Prepare score array
2855
+ score = np.zeros(d + 1) # +1 for alpha
2856
+
2857
+ # Compute the gradient for regression coefficients
2858
+ for j in range(d): # Exclude the last parameter (alpha)
2859
+ score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
2933
2860
 
2934
- alpha = params[-1]
2861
+ # Compute the gradient for the dispersion parameter
2862
+ if obs_specific:
2863
+ # Adjust the calculation if observation-specific effects are considered
2864
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2865
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2866
+ score[-1] = np.sum(sum_terms)
2867
+ else:
2868
+ # Standard calculation
2869
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2870
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2871
+ score[-1] = np.sum(sum_terms)
2872
+ return score
2873
+ '''
2874
+ #return score
2875
+
2876
+ try:
2877
+ if alpha is None:
2878
+ alpha = np.exp(params[-1])
2879
+ else:
2880
+ alpha = np.exp(params[-1])
2935
2881
  a1 = 1 / alpha * mu ** Q
2936
2882
  prob = a1 / (a1 + mu)
2937
2883
  exog = X
@@ -2973,7 +2919,8 @@ class ObjectiveFunction(object):
2973
2919
  return np.concatenate((dparams, dalpha),
2974
2920
  axis=1)
2975
2921
  except Exception as e:
2976
- print('in ki nb probkemng')
2922
+ print(e)
2923
+ print('NB score exception problem..')
2977
2924
  exc_type, exc_obj, exc_tb = sys.exc_info()
2978
2925
  fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
2979
2926
  print(exc_type, fname, exc_tb.tb_lineno)
@@ -3066,7 +3013,7 @@ class ObjectiveFunction(object):
3066
3013
  argument = prob.mean(axis=1)
3067
3014
  # if less than 0 penalise
3068
3015
  if np.min(argument) < 0:
3069
- print('what the fuck')
3016
+ print('Error with args..')
3070
3017
  if np.min(argument) < limit:
3071
3018
  # add a penalty for too small argument of log
3072
3019
  log_lik += -np.sum(np.minimum(0.0, argument - limit)) / limit
@@ -3557,6 +3504,7 @@ class ObjectiveFunction(object):
3557
3504
  else:
3558
3505
  corr_pairs = list(itertools.combinations(corr_indices, 2))
3559
3506
 
3507
+
3560
3508
  for ii, corr_pair in enumerate(corr_pairs):
3561
3509
  # lower cholesky matrix
3562
3510
  chol_mat_temp[tuple(reversed(corr_pair))] = chol[chol_count]
@@ -3584,7 +3532,7 @@ class ObjectiveFunction(object):
3584
3532
  a = 0
3585
3533
  b = 0
3586
3534
  stuff = []
3587
- # todo get order
3535
+ # TODO get order
3588
3536
  for j, i in enumerate(list_sizes):
3589
3537
  br_mean = betas_hetro[a:i + a]
3590
3538
  a += i
@@ -3611,7 +3559,30 @@ class ObjectiveFunction(object):
3611
3559
  br_mean = betas_m
3612
3560
  br_sd = betas_sd # Last Kr positions
3613
3561
  # Compute: betas = mean + sd*draws
3614
- betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3562
+ if len(br_sd) != draws.shape[1]:
3563
+ #get the same size as the mean
3564
+ betas_random = self.Br.copy()
3565
+
3566
+ '''
3567
+ c = self.get_num_params()[3:5]
3568
+
3569
+ cor = []
3570
+ for i in range(c[0]):
3571
+ cor.append(i)
3572
+
3573
+ vall =[]
3574
+ for i, val in enumerate(reversed(br_sd)):
3575
+ vall.append()
3576
+
3577
+ remaining = draws.shape[1] - len(betas_sd)
3578
+ '''
3579
+
3580
+ else:
3581
+
3582
+
3583
+ betas_random = br_mean[None, :, None] + draws * br_sd[None, :, None]
3584
+
3585
+
3615
3586
  betas_random = self._apply_distribution(betas_random)
3616
3587
 
3617
3588
  return betas_random
@@ -3630,28 +3601,71 @@ class ObjectiveFunction(object):
3630
3601
  # if gamma <= 0.01: #min defined value for stable nb
3631
3602
  # gamma = 0.01
3632
3603
 
3604
+ #g = stats.gamma.rvs(gamma, scale = lam/gamma, size = 1.0 / gamma * lam ** Q )
3605
+
3606
+ #gg = stats.poisson.rvs(g)
3607
+
3608
+
3609
+
3610
+
3633
3611
  endog = y
3634
3612
  mu = lam
3613
+ ''''
3614
+ mu = lam*np.exp(gamma) #TODO check that this does not need to be multiplied
3615
+ alpha = np.exp(gamma)
3616
+
3617
+ '''
3635
3618
  alpha = gamma
3636
3619
  size = 1.0 / alpha * mu ** Q
3637
- alpha_size = alpha * mu ** Q
3638
- # prob = size/(size+mu)
3639
- prob = alpha / (alpha + mu)
3640
- # prob = 1/(1+mu*alpha)
3620
+
3621
+ prob = size/(size+mu)
3622
+
3623
+
3624
+
3625
+ '''test'''
3626
+
3627
+
3628
+ '''
3629
+ size = 1 / np.exp(gamma) * mu ** 0
3630
+ prob = size / (size + mu)
3631
+ coeff = (gammaln(size + y) - gammaln(y + 1) -
3632
+ gammaln(size))
3633
+ llf = coeff + size * np.log(prob) + y * np.log(1 - prob)
3634
+ '''
3635
+
3641
3636
  try:
3642
3637
  # print(np.shape(y),np.shape(size), np.shape(prob))
3643
- # gg2 = self.negbinom_pmf(alpha_size, prob, y)
3638
+ #gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
3639
+ #import time
3640
+ #start_time = time.time()
3641
+
3642
+
3643
+ # Measure time for negbinom_pmf
3644
+ #start_time = time.time()
3645
+ #for _ in range(10000):
3646
+
3644
3647
 
3648
+ #end_time = time.time()
3649
+ #print("Custom functieon time:", end_time - start_time)
3650
+ #start_time = time.time()
3651
+ #for _ in range(10000):
3652
+ '''
3645
3653
  gg = np.exp(
3646
3654
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
3647
3655
  y + alpha) * np.log(mu + alpha))
3648
-
3649
- # gg1 = self.negbinom_pmf(alpha_size, prob, y)
3650
- # gg = nbinom.pmf(y ,alpha, prob)
3656
+ gg[np.isnan(gg)] = 1
3657
+ '''
3658
+ gg_alt = nbinom.pmf(y ,1/alpha, prob)
3659
+ #gg_alt_2 = (gammaln(size + y) - gammaln(y + 1) -
3660
+ #gammaln(size)) + size * np.log(prob) + y * np.log(1 - prob)
3661
+ #print('check theses')
3662
+ #gg = nbinom.pmf(y ,alpha, prob)
3663
+ #end_time = time.time()
3664
+ #print("Custom functieon time:", end_time - start_time)
3651
3665
 
3652
3666
  except Exception as e:
3653
- print(e)
3654
- return gg
3667
+ print("Neg Binom error.")
3668
+ return gg_alt
3655
3669
 
3656
3670
  def lindley_pmf(self, x, r, theta, k=50):
3657
3671
  """
@@ -3718,7 +3732,7 @@ class ObjectiveFunction(object):
3718
3732
 
3719
3733
  endog = y
3720
3734
  mu = lam
3721
- alpha = gamma
3735
+ alpha = np.exp(gamma)
3722
3736
  alpha = alpha * mu ** Q
3723
3737
  size = 1 / alpha * mu ** Q # also r
3724
3738
  # self.rate_param = size
@@ -3798,21 +3812,8 @@ class ObjectiveFunction(object):
3798
3812
 
3799
3813
  if dispersion == 1 or dispersion == 4: # nb
3800
3814
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3801
- # if b_gam < 0.8*model_nature['dispersion_penalty']:
3802
- # penalty += model_nature['dispersion_penalty'] -b_gam
3803
-
3804
- # if abs(b_gam) < 0.01:
3805
- # penalty += 1/np.abs(b_gam)
3806
-
3807
- if b_gam >= 4.5:
3808
- penalty += b_gam
3809
- b_gam = 4.61
3810
- # b_gam = 7.9
3811
- # penalty += model_nature['dispersion_penalty'] -b_gam
3812
- # penalty += 1/np.max((0.01,abs(b_gam)))
3813
- # b_gam = model_nature['dispersion_penalty']
3814
-
3815
- """
3815
+ #b_gam = 1/np.exp(b_gam)
3816
+ #print(b_gam)
3816
3817
  if b_gam <= 0:
3817
3818
  #penalty += 100
3818
3819
  #penalty += abs(b_gam)
@@ -3820,21 +3821,21 @@ class ObjectiveFunction(object):
3820
3821
  #b_gam = 1
3821
3822
 
3822
3823
  # if b_gam < 0.03:
3823
- penalty += 10
3824
+ penalty += min(1, np.abs(b_gam), 0)
3824
3825
 
3825
- b_gam = 0.03
3826
+ #b_gam = 0.001
3826
3827
  #
3827
3828
 
3828
- if b_gam >= 10:
3829
- penalty+= b_gam
3829
+ #if b_gam >= 10:
3830
+ # penalty+= b_gam
3830
3831
 
3831
- if b_gam == 0:
3832
- b_gam = min_comp_val
3832
+ # if b_gam == 0:
3833
+ #b_gam = min_comp_val
3833
3834
  #b_gam = 0.03
3834
3835
 
3835
- b_gam = abs(b_gam)
3836
+ # b_gam = abs(b_gam)
3836
3837
 
3837
- """
3838
+
3838
3839
 
3839
3840
  elif dispersion == 2:
3840
3841
  if b_gam >= 1:
@@ -3854,8 +3855,15 @@ class ObjectiveFunction(object):
3854
3855
  def eXB_calc(self, params_main, Xd, offset, dispersion, b_gam=None):
3855
3856
 
3856
3857
  # print('this was 0')
3857
- eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3858
+ if dispersion:
3859
+ eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3860
+
3861
+ #eta= np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])+dispersion
3862
+ #print('check if this holds size')
3863
+ else:
3864
+ eta = np.dot(Xd, params_main)[:, :, None] + np.array(offset[:, :, :])
3858
3865
  eta = np.array(eta)
3866
+
3859
3867
  # eta = np.float64(eta)
3860
3868
  # eta = np.dot(Xd, params_main)+offset[:,:,0]
3861
3869
  # eta2 = np.dot(Xd, params_main)[:,:,None]+np.array(offset[:,:,:])
@@ -3874,7 +3882,7 @@ class ObjectiveFunction(object):
3874
3882
 
3875
3883
  else:
3876
3884
  # eVd = self.my_lindley(np.exp(np.clip(eta, None, EXP_UPPER_LIMIT)), 1.29)
3877
-
3885
+ eta = eta.astype('float')
3878
3886
  eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3879
3887
  return eVd
3880
3888
 
@@ -3918,195 +3926,7 @@ class ObjectiveFunction(object):
3918
3926
  # np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
3919
3927
  return -np.exp(XB) + y * XB - sc.gammaln(y + 1)
3920
3928
 
3921
- def loglik_zi(params, return_grad=False):
3922
- """
3923
- Loglikelihood for observations of Generic Zero Inflated model.
3924
-
3925
- Parameters
3926
- ----------
3927
- params : array_like
3928
- The parameters of the model.
3929
-
3930
- Returns
3931
- -------
3932
- loglike : ndarray
3933
- The log likelihood for each observation of the model evaluated
3934
- at `params`. See Notes for definition.
3935
-
3936
- Notes
3937
- -----
3938
- .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
3939
- \\ln(1-w_{i})+L_{main\\_model}
3940
- where P - pdf of main model, L - loglike function of main model.
3941
-
3942
- for observations :math:`i=1,...,n`
3943
- """
3944
- params_infl = params[:self.k_inflate]
3945
- params_main = params[self.k_inflate:]
3946
-
3947
- y = self.endog
3948
- w = predict_logit(params_infl, exog_infl)
3949
-
3950
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
3951
- llf_main = loglik_obs_poisson(params_main, y)
3952
- dispersion = 0
3953
- b_gam = None
3954
- Xd = exog
3955
- eta = np.tile(np.dot(Xd, params_main), (1, 1)).transpose()
3956
- eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3957
-
3958
- llf_main_og = self.loglik_obs(y, eVd.ravel(), dispersion, b_gam)
3959
- zero_idx = np.nonzero(y == 0)[0]
3960
- nonzero_idx = np.nonzero(y)[0] # type: ignore
3961
-
3962
- llf = np.zeros_like(y, dtype=np.float64)
3963
- llf[zero_idx] = (np.log(w[zero_idx] +
3964
- (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
3965
- llf[nonzero_idx] = np.log(
3966
- 1 - w[nonzero_idx]) + llf_main[nonzero_idx]
3967
- if return_grad:
3968
- score_main = Xd.T @ (y - eVd.ravel())
3969
- L = np.exp(np.dot(Xd, params_main))
3970
- score_main = (self.endog - L)[:, None] * Xd
3971
-
3972
- dldp = np.zeros(
3973
- (exog.shape[0], len(params_main)), dtype=np.float64)
3974
- dldw = np.zeros_like(exog_infl, dtype=np.float64)
3975
-
3976
- dldp[zero_idx, :] = (score_main[zero_idx].T *
3977
- (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
3978
- dldp[nonzero_idx, :] = score_main[nonzero_idx]
3979
-
3980
- dldw[zero_idx, :] = (exog_infl[zero_idx].T * w[zero_idx] *
3981
- (1 - w[zero_idx]) *
3982
- (1 - np.exp(llf_main[zero_idx])) /
3983
- np.exp(llf[zero_idx])).T
3984
- dldw[nonzero_idx, :] = -(exog_infl[nonzero_idx].T *
3985
- w[nonzero_idx]).T
3986
-
3987
- return llf, np.hstack((dldw, dldp)).sum(axis=0)
3988
-
3989
- else:
3990
-
3991
- return llf
3992
-
3993
- def zipoisson_logpmf(x, mu, w):
3994
- return _lazywhere(x != 0, (x, mu, w),
3995
- (lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
3996
- sc.gammaln(x + 1.) - mu),
3997
- np.log(w + (1. - w) * np.exp(-mu)))
3998
-
3999
- def zipoisson_pmf(x, mu, w):
4000
- return np.exp(zipoisson_logpmf(x, mu, w))
4001
-
4002
- def loglik_logit(params, endog_y, exog_x): # this is predict I think
4003
- q = 2 * endog_y - 1
4004
- X = exog_x
4005
- return np.sum(np.log(cdf(q * np.dot(X, params))))
4006
-
4007
- def predict_logit(params, exog=None, linear=False):
4008
- if exog is None:
4009
- exog = self.exog
4010
- if not linear:
4011
- return (cdf(np.dot(exog, params)))
4012
- else:
4013
- return (np.dot(exog, params))
4014
-
4015
- def cdf(X):
4016
- """
4017
- The logistic cumulative distribution function
4018
-
4019
- Parameters
4020
- ----------
4021
- X : array_like
4022
- `X` is the linear predictor of the logit model. See notes.
4023
-
4024
- Returns
4025
- -------
4026
- 1/(1 + exp(-X))
4027
-
4028
- Notes
4029
- -----
4030
- In the logit model,
4031
-
4032
- .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
4033
- \\text{Prob}\\left(Y=1|x\\right)=
4034
- \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
4035
- """
4036
- X = np.asarray(X)
4037
- return 1 / (1 + np.exp(-X))
4038
-
4039
- llobs, grad = loglik_zi(betas, return_grad)
4040
- llf = np.sum(llobs)
4041
- if return_grad:
4042
- return -llf, -grad
4043
- else:
4044
- return -llf
4045
-
4046
- def cdf_logit(self, X):
4047
- """
4048
- The logistic cumulative distribution function
4049
-
4050
- Parameters
4051
- ----------
4052
- X : array_like
4053
- `X` is the linear predictor of the logit model. See notes.
4054
-
4055
- Returns
4056
- -------
4057
- 1/(1 + exp(-X))
4058
-
4059
- Notes
4060
- -----
4061
- In the logit model,
4062
-
4063
- .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
4064
- \\text{Prob}\\left(Y=1|x\\right)=
4065
- \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
4066
- """
4067
- X = np.asarray(X)
4068
- return 1 / (1 + np.exp(-X))
4069
-
4070
- def predict_logit_part(self, params_infl, exog_infl, linear=False):
4071
-
4072
- if not linear:
4073
- return (self.cdf_logit(np.dot(exog_infl, params_infl)))
4074
- else:
4075
- return (np.dot(exog_infl, params_infl))
4076
-
4077
- def ZeroInflate_W_setup(self, exog_infl, y, params_infl):
4078
-
4079
- w = self.predict_logit_part(params_infl, exog_infl)
4080
-
4081
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4082
-
4083
- llf_main = [1, 2, 3] # TODO ge
4084
- zero_idx = np.nonzero(y == 0)[0]
4085
- nonzero_idx = np.nonzero(y)[0]
4086
-
4087
- llf = np.zeros_like(y, dtype=np.float64)
4088
- llf[zero_idx] = (np.log(w[zero_idx] +
4089
- (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4090
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4091
-
4092
- return llf
4093
-
4094
- def dPXL(self, x, alpha):
4095
- return ((alpha ** 2) * (x + 3 * alpha + (alpha ** 2) + 3)) / (1 + alpha) ** (4 + x)
4096
-
4097
- # Define the gradient function
4098
3929
 
4099
- def poisson_lindley_gradient(self, params, exog, endog):
4100
- beta = params[-1]
4101
- mu = np.exp(np.dot(exog, params[:-1]))
4102
- q = beta / (1 + beta)
4103
- d_beta = (endog.ravel() + 1) / (mu + endog.ravel() + 1) - q / (1 - q)
4104
- d_beta = self.dpoisl(endog, beta).ravel()
4105
- d_mu = np.dot((endog - mu) * (1 - q) / (mu + endog + 1), exog)
4106
-
4107
- grad_n = np.concatenate((d_mu, np.atleast_2d(d_beta).T), axis=1)
4108
- der = np.sum(grad_n, axis=0)
4109
- return der, grad_n
4110
3930
 
4111
3931
  def dpoisl(self, x, theta, log=False):
4112
3932
  # if theta < 0:
@@ -4175,29 +3995,18 @@ class ObjectiveFunction(object):
4175
3995
  elif dispersion == 1:
4176
3996
 
4177
3997
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
4178
- # print(1)
3998
+
3999
+
4179
4000
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
4180
- # print('fuck if this actually works')
4001
+
4181
4002
 
4182
4003
  elif dispersion == 2:
4183
4004
 
4184
4005
  proba_r = self.general_poisson_pmf(eVd, y, b_gam)
4185
4006
 
4186
- elif dispersion == 3:
4187
- fa, ba = self.get_dispersion_paramaters(betas, dispersion)
4188
- zi = self.my_lindley(y, ba)
4189
- proba_r = poisson.pmf(y, zi * eVd.ravel())
4190
- # proba_r = self.lindl_pmf_chatgpt(y, l_pam)
4191
- # prob_2 = self.dpoisl(y, l_pam)
4192
- # proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
4193
- # proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
4194
- # print(1)
4195
- # proba_r = self.dpoisl(y, eVd)
4196
4007
 
4197
- elif dispersion == 4:
4198
- fa, ba = self.get_dispersion_paramaters(betas, dispersion)
4199
- self.zi = self.my_lindley(eVd, ba)
4200
- proba_r = self._nonlog_nbin(y, eVd + self.zi, b_gam)
4008
+
4009
+
4201
4010
  # proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
4202
4011
 
4203
4012
  elif dispersion == 'poisson_lognormal':
@@ -4210,7 +4019,7 @@ class ObjectiveFunction(object):
4210
4019
  # proba_r = self.poisson_lognormal_pmf(y, eVd, sig)
4211
4020
  proba_r = np.array(store)
4212
4021
  proba_r = np.atleast_2d(proba_r).T
4213
- print(1)
4022
+
4214
4023
 
4215
4024
  else:
4216
4025
  raise Exception('not implemented other modeling forms')
@@ -4219,7 +4028,7 @@ class ObjectiveFunction(object):
4219
4028
  proba_p = self._prob_product_across_panels(
4220
4029
  proba_r, self.panel_info)
4221
4030
  proba_r = proba_p
4222
- proba_r = np.clip(proba_r, min_comp_val, None)
4031
+ proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
4223
4032
  loglik = np.log(proba_r)
4224
4033
  return loglik
4225
4034
 
@@ -4227,6 +4036,8 @@ class ObjectiveFunction(object):
4227
4036
  if dispersion == 0 or dispersion == 3:
4228
4037
  return 0
4229
4038
  else:
4039
+
4040
+
4230
4041
  return 1
4231
4042
 
4232
4043
  def _prob_product_across_panels(self, pch, panel_info):
@@ -4267,7 +4078,7 @@ class ObjectiveFunction(object):
4267
4078
  # if (len(betas) -Kf-Kr-self.is_dispersion(dispersion)) != (Kchol + Kr):
4268
4079
 
4269
4080
  # gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros((N, Kr)), np.zeros((N, len(betas) -Kf-Kr-self.is_dispersion(dispersion))) #FIX
4270
- Kf2, Kr, Kc, Kr_b, Kchol, Kh, zi_terms = self.get_num_params()
4081
+ Kf2, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
4271
4082
 
4272
4083
  gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
4273
4084
  (N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
@@ -4282,7 +4093,7 @@ class ObjectiveFunction(object):
4282
4093
  if y[i] == 0:
4283
4094
  gr_e[i] = 0
4284
4095
 
4285
- if self.is_dispersion(dispersion):
4096
+ if self.is_dispersion(dispersion) and not self.no_extra_param:
4286
4097
  gr_d = np.zeros((N, 1))
4287
4098
  if dispersion == 1:
4288
4099
  # trying alt
@@ -4386,12 +4197,13 @@ class ObjectiveFunction(object):
4386
4197
  br, draws_, brstd, dis_fit_long) # (N,K,R)
4387
4198
  dprod_r = dev.np.einsum("njk,njr -> nkr", Xdr,
4388
4199
  einsum_model_form, dtype=np.float64) # (N,K,R)
4389
- der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4390
- der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4391
- der_prod_r = dprod_r[:, X_tril_idx, :] * der * proba_n[:, None, :] # or this one
4392
- print('which one of these')
4200
+ #der_prod_r = dprod_r * der * proba_n[:, None, :] # (N,K,R)
4201
+ #der_prod_r = dprod_r * der * proba_n[:, X_tril_idx, :] # I think this is the case check
4202
+
4203
+ der_prod_r = dprod_r * der * proba_n[:, None, :] # or this one
4204
+
4393
4205
  der_t = self._compute_derivatives(
4394
- br, draws_[:, draws_tril_idx, :], brstd, self.dist_fit) # (N,K,R)
4206
+ br[draws_tril_idx], draws_[:, draws_tril_idx, :], brstd, np.array(self.dist_fit)[draws_tril_idx]) # (N,K,R)
4395
4207
  # er_t = self._compute_derivatives(br, draws_, brstd[:, draws_tril_idx,: ], self.dist_fit, draws_tril_idx)
4396
4208
  der_prod_r_t = dprod_r[:, draws_tril_idx, :] * \
4397
4209
  der_t * proba_n[:, None, :] # (N,K,R)
@@ -4452,14 +4264,18 @@ class ObjectiveFunction(object):
4452
4264
  grad_n = self._concat_gradients(
4453
4265
  (gr_f, gr_u, gr_s, gr_e)) / Rlik # (N,K)
4454
4266
  else:
4455
- grad_n = self._concat_gradients(
4456
- (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4457
- grad_n = np.nan_to_num(grad_n, nan=0, posinf=10000, neginf=-10000)
4458
- grad_n = np.clip(grad_n, -1000, 1000)
4267
+ if self.no_extra_param:
4268
+ grad_n = self._concat_gradients(
4269
+ (gr_f, gr_u, gr_s, gr_h, gr_hs)) / Rlik # (N,K)
4270
+ else:
4271
+ grad_n = self._concat_gradients(
4272
+ (gr_f, gr_u, gr_s, gr_h, gr_hs, gr_d[:, None])) / Rlik # (N,K)
4273
+ grad_n = np.nan_to_num(grad_n, nan=0, posinf=1000, neginf=-1000)
4274
+ grad_n = np.clip(grad_n, -100, 100)
4459
4275
  n = np.shape(grad_n)[0]
4460
4276
  # subtract out mean gradient value
4461
- # grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4462
- # grad_n = grad_n_sub
4277
+ grad_n_sub = grad_n-(np.sum(grad_n, axis=0)/n)
4278
+ grad_n = grad_n_sub
4463
4279
  grad = grad_n.sum(axis=0)
4464
4280
  return grad, grad_n
4465
4281
 
@@ -4521,9 +4337,9 @@ class ObjectiveFunction(object):
4521
4337
 
4522
4338
  elif dispersion == 1:
4523
4339
 
4524
- der = -self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4340
+ der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4525
4341
  if both:
4526
- grad_n = -self.NB_Score(betas, y, eVd, Xd, 0, True)
4342
+ grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
4527
4343
  return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
4528
4344
  neginf=-140)
4529
4345
 
@@ -4610,7 +4426,7 @@ class ObjectiveFunction(object):
4610
4426
  return proba_r.sum(axis=1), np.squeeze(proba_r)
4611
4427
 
4612
4428
  def _penalty_betas(self, betas, dispersion, penalty, penalty_ap=100.0):
4613
- penalty_val = 0.05
4429
+ penalty_val = 0.1
4614
4430
  penalty_val_max = 130
4615
4431
 
4616
4432
  # print('change_later')
@@ -4626,8 +4442,8 @@ class ObjectiveFunction(object):
4626
4442
  if abs(i) > penalty_val_max:
4627
4443
  penalty += abs(i)
4628
4444
 
4629
- # if abs(i) < penalty_val:
4630
- # penalty += 5
4445
+ #if abs(i) < penalty_val:
4446
+ # penalty += 5
4631
4447
 
4632
4448
  # penalty = 0
4633
4449
  return penalty
@@ -4716,8 +4532,10 @@ class ObjectiveFunction(object):
4716
4532
  return self._loglik_gradient(self, betas, *stuff)
4717
4533
 
4718
4534
  def get_br_and_bstd(betas, self):
4719
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
4720
- br = betas[Kf:Kf + Kr]
4535
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4536
+ Kr = Kr_a + Kr_c #todo check if this works
4537
+ print('check if this works')
4538
+ br = betas[Kf_a:Kf_a + Kr]
4721
4539
  # Calculate the size of the br matrix
4722
4540
  br_size = int((1 + np.sqrt(1 + 8 * Kr_b_a)) / 2)
4723
4541
 
@@ -4728,12 +4546,11 @@ class ObjectiveFunction(object):
4728
4546
  index = 0
4729
4547
  for i in range(br_size):
4730
4548
  for j in range(i, br_size):
4731
- br_std[j, i] = betas[Kf + Kr + index]
4549
+ br_std[j, i] = betas[Kf_a + Kr + index]
4732
4550
  index += 1
4733
4551
 
4734
4552
  brstd = br_std
4735
- print(brstd)
4736
- print(brstd)
4553
+
4737
4554
 
4738
4555
 
4739
4556
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
@@ -4765,9 +4582,9 @@ class ObjectiveFunction(object):
4765
4582
  penalty = self._penalty_betas(
4766
4583
  betas, dispersion, penalty, float(len(y) / 10.0))
4767
4584
  self.n_obs = len(y) # feeds into gradient
4768
- if draws is None and draws_grouped is None and (
4585
+ if draws is None and draws_grouped is None and (model_nature is None or
4769
4586
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4770
-
4587
+ #TODO do i shuffle the draws
4771
4588
  if type(Xd) == dict:
4772
4589
  N, Kf, P = 0, 0, 0
4773
4590
  for key in Xd:
@@ -4775,13 +4592,13 @@ class ObjectiveFunction(object):
4775
4592
  P += Xd[key].shape[1]
4776
4593
  Kf += Xd[key].shape[2]
4777
4594
  else:
4778
- self.naming_for_printing(betas, 1, dispersion, zi_fit=zi_list, model_nature=model_nature)
4595
+ self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
4779
4596
  N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
4780
4597
  betas = np.array(betas)
4781
4598
  Bf = betas[0:Kf] # Fixed betas
4782
4599
 
4783
4600
  main_disper, lindley_disp = self.get_dispersion_paramaters(
4784
- betas, dispersion)
4601
+ betas, dispersion) #todo fix this up
4785
4602
  if lindley_disp is not None:
4786
4603
  if lindley_disp <= 0:
4787
4604
  penalty += 1
@@ -4805,36 +4622,20 @@ class ObjectiveFunction(object):
4805
4622
  llf_main = self.loglik_obs(
4806
4623
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4807
4624
 
4808
- # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4625
+ llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4809
4626
 
4810
4627
  loglik = llf_main.sum()
4811
- if 'exog_infl' in model_nature:
4812
- params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
4813
- params_main = Bf
4814
- # ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
4815
- # exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
4816
- exog_infl = model_nature.get('exog_inflX')
4817
- llf_main = llf_main # TODO test this
4818
- w = self.predict_logit_part(params_infl, exog_infl)
4819
-
4820
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4821
-
4822
- zero_idx = np.nonzero(y == 0)[0]
4823
- nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
4824
4628
 
4825
- llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
4826
- llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4827
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4828
- loglik = llf.sum()
4829
4629
 
4830
4630
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4831
4631
  if self.power_up_ll:
4832
4632
 
4833
4633
  loglik += 2*loglik
4634
+ print('am i powering up')
4834
4635
  penalty = self.regularise_l2(betas)
4835
4636
 
4836
4637
  if not np.isreal(loglik):
4837
- loglik = - 1000000000.0
4638
+ loglik = - 10000000.0
4838
4639
 
4839
4640
  output = (-loglik + penalty,)
4840
4641
  if return_gradient:
@@ -4842,16 +4643,21 @@ class ObjectiveFunction(object):
4842
4643
  if return_gradient_n:
4843
4644
  der, grad_n = self.simple_score_grad(
4844
4645
  betas, y, eVd, Xd, dispersion, both=True)
4845
- return (-loglik + penalty, -der, grad_n)
4646
+ #return (-loglik + penalty, -der, grad_n)*self.minimize_scaler
4647
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel(), grad_n))
4648
+ return scaled_tuple
4846
4649
  else:
4847
4650
  der = self.simple_score_grad(
4848
4651
  betas, y, eVd, Xd, dispersion, both=False)
4849
-
4850
- return (-loglik + penalty, -der.ravel())
4652
+ scaled_tuple = tuple(
4653
+ x * self.minimize_scaler for x in (-loglik + penalty, -der.ravel()))
4654
+ return scaled_tuple
4655
+ #return (-loglik + penalty, -der.ravel())*self.minimize_scaler
4851
4656
  else:
4852
- return -loglik + penalty
4657
+
4658
+ return (-loglik + penalty)*self.minimize_scaler
4853
4659
  # Else, we have draws
4854
- self.n_obs = len(y) * self.Ndraws
4660
+ self.n_obs = len(y) * self.Ndraws #todo is this problematic
4855
4661
  penalty += self._penalty_betas(
4856
4662
  betas, dispersion, penalty, float(len(y) / 10.0))
4857
4663
 
@@ -4860,7 +4666,7 @@ class ObjectiveFunction(object):
4860
4666
  # Kf =0
4861
4667
  betas = np.array(betas)
4862
4668
  betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
4863
- self.naming_for_printing(betas, 0, dispersion, zi_fit=zi_list, model_nature=model_nature)
4669
+ self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
4864
4670
  y = dev.to_gpu(y)
4865
4671
  if draws is not None and draws_grouped is not None:
4866
4672
  draws = np.concatenate((draws_grouped, draws), axis=1)
@@ -4908,7 +4714,7 @@ class ObjectiveFunction(object):
4908
4714
  # if (Kchol +Kr) != (len(betas) -Kf-Kr -self.is_dispersion(dispersion)):
4909
4715
  # print('I think this is fine')
4910
4716
  n_coeff = self.get_param_num(dispersion)
4911
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
4717
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4912
4718
  if Kchol_a != Kchol:
4913
4719
  print('hold')
4914
4720
 
@@ -4923,7 +4729,9 @@ class ObjectiveFunction(object):
4923
4729
  Kf = 0
4924
4730
  else:
4925
4731
  if n_coeff != len(betas):
4926
- raise Exception
4732
+ raise Exception(
4733
+
4734
+ )
4927
4735
  Bf = betas[0:Kf] # Fixed betas
4928
4736
 
4929
4737
 
@@ -4949,11 +4757,11 @@ class ObjectiveFunction(object):
4949
4757
  # brstd), draws_) # Get random coefficients, old method
4950
4758
  Br = self._transform_rand_betas(br,
4951
4759
  brstd, draws_) # Get random coefficients
4952
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4760
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4953
4761
  self.Br = Br.copy()
4954
4762
 
4955
4763
  else:
4956
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4764
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4957
4765
  chol_mat = self._chol_mat(
4958
4766
  len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
4959
4767
  self.chol_mat = chol_mat.copy()
@@ -5025,7 +4833,8 @@ class ObjectiveFunction(object):
5025
4833
  eVd = self.lam_transform(eVd, dispersion, betas[-1])
5026
4834
 
5027
4835
  if self.is_dispersion(dispersion):
5028
- penalty, betas[-1] = self._penalty_dispersion(
4836
+ if not self.no_extra_param:
4837
+ penalty, betas[-1] = self._penalty_dispersion(
5029
4838
  dispersion, betas[-1], eVd, y, penalty, model_nature)
5030
4839
 
5031
4840
  '''
@@ -5069,38 +4878,22 @@ class ObjectiveFunction(object):
5069
4878
  proba.append(dev.to_cpu(proba_))
5070
4879
 
5071
4880
  lik = np.stack(proba).sum(axis=0) / R # (N, )
5072
- lik = np.clip(lik, min_comp_val, 10000)
4881
+ lik = np.clip(lik, min_comp_val, max_comp_val)
5073
4882
  # lik = np.nan_to_num(lik, )
5074
4883
  loglik = np.log(lik)
5075
4884
  llf_main = loglik
5076
- if 'exog_infl' in model_nature:
5077
- params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
5078
- params_main = Bf
5079
- exog_infl = model_nature.get('exog_inflX')
5080
- llf_main = llf_main.ravel() # TODO test this
5081
- w = self.predict_logit_part(params_infl, exog_infl)
5082
-
5083
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
5084
-
5085
- zero_idx = np.nonzero(y == 0)[0]
5086
- nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
5087
-
5088
- llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
5089
- llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
5090
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
5091
- loglik = llf.sum()
5092
- else:
5093
4885
 
5094
- loglik = loglik.sum()
4886
+
4887
+ loglik = loglik.sum()
5095
4888
 
5096
4889
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
5097
4890
  if self.power_up_ll:
5098
4891
  penalty += self.regularise_l2(betas)
5099
- loglik = 2*loglik
4892
+
5100
4893
  penalty += self.regularise_l2(betas)
5101
4894
  if not return_gradient:
5102
4895
 
5103
- output = (-loglik + penalty,)
4896
+ output = ((-loglik + penalty)*self.minimize_scaler,)
5104
4897
  if verbose > 1:
5105
4898
  print(
5106
4899
  f"Evaluation {self.total_fun_eval} Log-Lik.={-loglik:.2f}")
@@ -5130,19 +4923,24 @@ class ObjectiveFunction(object):
5130
4923
  # Hinv = np.linalg.inv(H)
5131
4924
  # except Exception:
5132
4925
  # Hinv = np.linalg.pinv(H)
5133
- output = (-loglik + penalty, -grad, grad_n)
4926
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad, grad_n))
4927
+ return scaled_tuple
4928
+ #output = (-loglik + penalty, -grad, grad_n)*self.minimize_scaler
5134
4929
 
5135
- return output
4930
+ #return output
5136
4931
  else:
4932
+ scaled_tuple = tuple(x * self.minimize_scaler for x in (-loglik + penalty, -grad))
4933
+ return scaled_tuple
4934
+ #output = (-loglik + penalty, -grad)*self.minimize_scaler
5137
4935
 
5138
- output = (-loglik + penalty, -grad)
5139
-
5140
- return output
4936
+ #return output
5141
4937
  except Exception as e:
5142
4938
  traceback.print_exc()
5143
4939
  print(e)
5144
4940
 
5145
-
4941
+ def minimize_function(self, loglike):
4942
+ r'Takes the logliklihood function and tranforms it to a more handed minimization function'
4943
+ return loglike/self.n_obs
5146
4944
  def print_chol_mat(self, betas):
5147
4945
  print(self.chol_mat)
5148
4946
  self.get_br_and_bstd(betas)
@@ -5378,12 +5176,16 @@ class ObjectiveFunction(object):
5378
5176
  return H
5379
5177
 
5380
5178
  def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
5381
-
5179
+ #method = 'BFGS'
5382
5180
  if method == "BFGS":
5383
5181
 
5384
5182
  try:
5183
+ argbs = list(args)
5385
5184
 
5386
- return self._bfgs(loglik_fn, x, args=args, tol=tol, **options) # @IgnoreException
5185
+ argbs[7] = True
5186
+ argsb = tuple(argbs)
5187
+ a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
5188
+ return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
5387
5189
 
5388
5190
  except:
5389
5191
  return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
@@ -5406,7 +5208,7 @@ class ObjectiveFunction(object):
5406
5208
  H = self.numerical_hessian(lambda x: self._loglik_gradient(x, *argbs), result.x, eps=1e-7 * self.n_obs)
5407
5209
  result['Hessian'] = H
5408
5210
  result['hess_inv'] = np.linalg.pinv(H)
5409
- print('to do, only if hessian is fhfhfhf')
5211
+
5410
5212
  standard_errors = np.sqrt(np.diag(np.linalg.pinv(H)))
5411
5213
  return result
5412
5214
  # return minimize(loglik_fn, x, args=args, jac=args[6], hess=args[7], method='BFGS', options= {'gtol':1e-7*self.N}*self.Ndraws)
@@ -5630,7 +5432,7 @@ class ObjectiveFunction(object):
5630
5432
  if self.power_up_ll:
5631
5433
  loglikelihood =-optim_res['fun']/2 - penalty
5632
5434
  else:
5633
- loglikelihood = -optim_res['fun'] - penalty
5435
+ loglikelihood = -optim_res['fun']/self.minimize_scaler - penalty
5634
5436
 
5635
5437
  # self.coeff_names = coeff_names
5636
5438
  # self.total_iter = optim_res['nit']
@@ -5677,7 +5479,7 @@ class ObjectiveFunction(object):
5677
5479
  return a
5678
5480
 
5679
5481
  def fitRegression(self, mod,
5680
- dispersion=0, maxiter=2000, batch_size=None, num_hess=False):
5482
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5681
5483
 
5682
5484
  """
5683
5485
  Fits a poisson regression given data and outcomes if dispersion is not declared
@@ -5689,12 +5491,12 @@ class ObjectiveFunction(object):
5689
5491
  betas_est - array. Coefficients which maximize the negative log-liklihood.
5690
5492
  """
5691
5493
  # Set defualt method
5692
- sub_zi = None
5693
- exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5694
- inf_betas = 0 if exog_infl is None else len(exog_infl)
5494
+ #TODO, the inital fit worked but it throws
5495
+
5496
+
5695
5497
 
5696
5498
  sol = Solution()
5697
- log_ll = 10.0 ** 9
5499
+
5698
5500
  tol = {'ftol': 1e-8, 'gtol': 1e-6}
5699
5501
  is_delete = 0
5700
5502
  dispersion = mod.get('dispersion')
@@ -5706,10 +5508,7 @@ class ObjectiveFunction(object):
5706
5508
  if self.hess_yes == False:
5707
5509
  method2 = 'BFGS_2'
5708
5510
  method2 = self.method_ll
5709
- # method2 = 'BFGS_2'
5710
5511
 
5711
- # method2 = 'BFGS_2'
5712
- # method2 = 'dogleg'
5713
5512
  bic = None
5714
5513
  pvalue_alt = None
5715
5514
  zvalues = None
@@ -5726,8 +5525,10 @@ class ObjectiveFunction(object):
5726
5525
  _g, pg, kg = 0, 0, 0
5727
5526
 
5728
5527
  dispersion_param_num = self.is_dispersion(dispersion)
5528
+ if self.no_extra_param:
5529
+ dispersion_param_num =0
5729
5530
 
5730
- paramNum = self.get_param_num(dispersion)
5531
+ #paramNum = self.get_param_num(dispersion)
5731
5532
  self.no_random_paramaters = 0
5732
5533
  if 'XG' in mod:
5733
5534
  XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
@@ -5753,7 +5554,7 @@ class ObjectiveFunction(object):
5753
5554
  XX_test = mod.get('Xr_test')
5754
5555
 
5755
5556
  bb = np.random.uniform(
5756
- -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num + inf_betas)
5557
+ -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
5757
5558
 
5758
5559
  if method == 'L-BFGS-B':
5759
5560
  if dispersion == 0:
@@ -5780,17 +5581,28 @@ class ObjectiveFunction(object):
5780
5581
  else:
5781
5582
  bb[0] = self.constant_value
5782
5583
  if dispersion == 1:
5783
- bb[-1] = self.negative_binomial_value
5584
+ if not self.no_extra_param:
5585
+ bb[-1] = self.negative_binomial_value
5784
5586
  bounds = None
5785
5587
 
5588
+
5589
+
5786
5590
  # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5787
5591
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5788
- initial_beta = self._minimize(self._loglik_gradient, bb,
5592
+
5593
+ if self.no_extra_param:
5594
+ dispersion_poisson = 0
5595
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5789
5596
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5790
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None,
5597
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5791
5598
  mod),
5792
5599
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5793
5600
  bounds=bounds)
5601
+ if dispersion:
5602
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5603
+
5604
+
5605
+
5794
5606
 
5795
5607
  if method2 == 'L-BFGS-B':
5796
5608
  if hasattr(initial_beta.hess_inv, 'todense'):
@@ -5803,7 +5615,7 @@ class ObjectiveFunction(object):
5803
5615
  if initial_beta is not None and np.isnan(initial_beta['fun']):
5804
5616
  initial_beta = self._minimize(self._loglik_gradient, bb,
5805
5617
  args=(XX, y, None, None, None, None, True, True, dispersion,
5806
- 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5618
+ 0, False, 0, None, None, None, None, None, mod),
5807
5619
  method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
5808
5620
 
5809
5621
  if initial_beta is not None and not np.isnan(initial_beta['fun']):
@@ -5827,24 +5639,24 @@ class ObjectiveFunction(object):
5827
5639
  loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
5828
5640
 
5829
5641
  self.naming_for_printing(
5830
- initial_beta['x'], 1, dispersion, zi_fit=sub_zi, model_nature=mod)
5642
+ initial_beta['x'], 1, dispersion, model_nature=mod)
5831
5643
 
5832
5644
  if self.is_multi:
5833
5645
  in_sample_mae = self.validation(
5834
5646
  initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
5835
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5647
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5836
5648
  testing=0)
5837
5649
 
5838
5650
  sol.add_objective(TRAIN=in_sample_mae)
5839
5651
  MAE_out = self.validation(
5840
5652
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5841
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0)
5653
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
5842
5654
  sol.add_objective(TEST=MAE_out)
5843
5655
 
5844
5656
  if self.val_percentage >0:
5845
5657
  MAE_VAL = self.validation(
5846
5658
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5847
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5659
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5848
5660
  validation=1)
5849
5661
  sol.add_objective(VAL=MAE_VAL)
5850
5662
  if sol[self._obj_1] <= self.best_obj_1:
@@ -5888,7 +5700,7 @@ class ObjectiveFunction(object):
5888
5700
 
5889
5701
  b = [b[i] if i > len(self.none_handler(self.fixed_fit)) + len(
5890
5702
  self.none_handler(self.rdm_fit)) + len(
5891
- self.none_handler(self.rdm_cor_fit)) else b[i] / 1.3 for i in range(len(b))]
5703
+ self.none_handler(self.rdm_cor_fit)) else b[i] / 1 for i in range(len(b))]
5892
5704
  else:
5893
5705
  b = bb
5894
5706
 
@@ -5898,14 +5710,15 @@ class ObjectiveFunction(object):
5898
5710
  else:
5899
5711
  b = np.insert(b, -1, np.random.uniform(0.05, 0.1))
5900
5712
  if dispersion == 1:
5901
- b[-1] = np.abs(b[-1])
5902
- if b[-1] > 10:
5903
- b[-1] = 5
5713
+ if not self.no_extra_param:
5714
+ b[-1] = np.abs(b[-1])
5715
+ if b[-1] > 10:
5716
+ b[-1] = 5
5904
5717
  elif dispersion == 2:
5905
5718
  b[-1] = .5
5906
5719
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
5907
5720
 
5908
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
5721
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh= self.get_num_params()
5909
5722
  if Kh > 0:
5910
5723
  Kh_e = mod.get('XH').shape[-1]
5911
5724
  Kh_range = Kh - Kh_e
@@ -5949,9 +5762,6 @@ class ObjectiveFunction(object):
5949
5762
 
5950
5763
  bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
5951
5764
  count += 1
5952
-
5953
-
5954
-
5955
5765
  elif ii < jj:
5956
5766
  if bob2[count] > 0:
5957
5767
 
@@ -6024,18 +5834,35 @@ class ObjectiveFunction(object):
6024
5834
  mod['dispersion_penalty'] = np.abs(b[-1])
6025
5835
  grad_args = (
6026
5836
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
6027
- self.zi_fit, exog_infl, draws_grouped, XG, mod)
5837
+ None, None, draws_grouped, XG, mod)
6028
5838
  # self.gradients_est_yes = (1, 1)
6029
5839
 
6030
5840
  if draws is None and draws_hetro is not None:
6031
5841
  print('hold')
6032
- betas_est = self._minimize(self._loglik_gradient, b, args=(
6033
- X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
6034
- self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
6035
- method=method2, tol=tol['ftol'],
6036
- options={'gtol': tol['gtol']}, bounds=bounds,
6037
- hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5842
+ #self.grad_yes = True
5843
+ #self.hess_yes = True
6038
5844
 
5845
+ if self.no_extra_param:
5846
+ dispersion_poisson = 0
5847
+ betas_est = self._minimize(self._loglik_gradient, b, args=(
5848
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion_poisson, 0, False, 0,
5849
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5850
+ method=method2, tol=tol['ftol'],
5851
+ options={'gtol': tol['gtol']}, bounds=bounds,
5852
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5853
+ if dispersion:
5854
+ initial_fit_beta = betas_est.x
5855
+ parmas = np.append(initial_fit_beta, nb_parma)
5856
+ self.nb_parma = nb_parma
5857
+ #print(f'neg binomi,{self.nb_parma}')
5858
+ betas_est = self._minimize(self._loglik_gradient, initial_fit_beta, args=(
5859
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5860
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
5861
+ method=method2, tol=tol['ftol'],
5862
+ options={'gtol': tol['gtol']}, bounds=bounds,
5863
+ hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
5864
+
5865
+ #print('refit with estimation of NB')
6039
5866
  # self.numerical_hessian_calc = True
6040
5867
  if self.numerical_hessian_calc:
6041
5868
  try:
@@ -6050,7 +5877,7 @@ class ObjectiveFunction(object):
6050
5877
  betas_est = self._minimize(self._loglik_gradient, b, args=(
6051
5878
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
6052
5879
  self.rdm_cor_fit,
6053
- self.zi_fit, exog_infl, draws_grouped, XG, mod),
5880
+ None, None, draws_grouped, XG, mod),
6054
5881
  method=method2, tol=tol['ftol'],
6055
5882
  options={'gtol': tol['gtol']})
6056
5883
 
@@ -6059,7 +5886,7 @@ class ObjectiveFunction(object):
6059
5886
 
6060
5887
  if np.isfinite(betas_est['fun']):
6061
5888
  self.naming_for_printing(
6062
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5889
+ betas_est['x'], 0, dispersion, model_nature=mod)
6063
5890
 
6064
5891
  if method2 == 'L-BFGS-B':
6065
5892
 
@@ -6086,7 +5913,7 @@ class ObjectiveFunction(object):
6086
5913
 
6087
5914
  paramNum = len(betas_est['x'])
6088
5915
  self.naming_for_printing(
6089
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5916
+ betas_est['x'], 0, dispersion, model_nature=mod)
6090
5917
 
6091
5918
  sol.add_objective(bic=bic, aic=aic,
6092
5919
  loglik=log_ll, num_parm=paramNum, GOF=other_measures)
@@ -6096,19 +5923,19 @@ class ObjectiveFunction(object):
6096
5923
  try:
6097
5924
 
6098
5925
  in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
6099
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5926
+ rdm_cor_fit=self.rdm_cor_fit,
6100
5927
  model_nature=mod, testing=0)
6101
5928
  sol.add_objective(TRAIN=in_sample_mae)
6102
5929
  y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
6103
5930
  Xr_grouped_test = mod.get('Xrtest')
6104
5931
  MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
6105
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5932
+ rdm_cor_fit=self.rdm_cor_fit,
6106
5933
  model_nature=mod)
6107
5934
 
6108
5935
  sol.add_objective(TEST=MAE_test)
6109
- if self.val_percentage >0:
5936
+ if self.val_percentage > 0:
6110
5937
  MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
6111
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5938
+ rdm_cor_fit=self.rdm_cor_fit,
6112
5939
  model_nature=mod, validation=1)
6113
5940
  sol.add_objective(VAL=MAE_val)
6114
5941
 
@@ -6226,8 +6053,7 @@ class ObjectiveFunction(object):
6226
6053
  self.rdm_cor_fit = [x for x, y in zip(
6227
6054
  select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
6228
6055
 
6229
- # if self.zi_force:
6230
- # self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
6056
+
6231
6057
  # if alpha_grouped is not None:
6232
6058
  self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
6233
6059
  self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
@@ -6335,7 +6161,7 @@ class ObjectiveFunction(object):
6335
6161
  return delim + self._model_type_codes[dispersion]
6336
6162
 
6337
6163
  def self_standardize_positive(self, X):
6338
- scaler = StandardScaler()
6164
+ scaler = MinMaxScaler()
6339
6165
  if type(X) == list:
6340
6166
  return X
6341
6167
 
@@ -6345,12 +6171,26 @@ class ObjectiveFunction(object):
6345
6171
  # Reshaping to 2D - combining the last two dimensions
6346
6172
  df_tf_reshaped = X.reshape(original_shape[0], -1)
6347
6173
  df_tf_scaled = scaler.fit_transform(df_tf_reshaped)
6348
- df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6174
+ #df_tf_scaled = df_tf_scaled - df_tf_scaled.min()
6349
6175
  # Reshape back to original 3D shape if necessary
6350
6176
  df_tf = df_tf_scaled.reshape(original_shape)
6351
6177
  return df_tf
6352
6178
  else:
6353
- raise X
6179
+ # Initialize the MinMaxScaler
6180
+ scaler = MinMaxScaler()
6181
+ float_columns = X.select_dtypes(include=['float64', 'float32', 'int']).columns.difference(['const', 'offset, "EXPOSE', 'Constant', 'constant'])
6182
+ non_numeric_columns = X.select_dtypes(exclude=['float64', 'float32', 'int']).columns
6183
+
6184
+ # Fit the scaler to the float columns and transform them
6185
+ X[float_columns] = scaler.fit_transform(X[float_columns])
6186
+ # Fit the scaler to the data and transform it
6187
+ #scaled_data = scaler.fit_transform(X)
6188
+
6189
+ # Convert the result back to a DataFrame
6190
+ #scaled_df = pd.DataFrame(scaled_data, columns=X.columns)
6191
+
6192
+
6193
+ return X
6354
6194
 
6355
6195
  def make_regression_from_terms(self, fixed=None, rdm=None, rdm_cor_fit=None, distribution=None, dispersion=None,
6356
6196
  *args, **kwargs):
@@ -6405,14 +6245,15 @@ class ObjectiveFunction(object):
6405
6245
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
6406
6246
  t, idx, df_test[:, :, idx])
6407
6247
  if np.max(df_tf[:, :, idx]) >= 77000:
6408
- raise Exception('should not be possible')
6248
+ #TODO need to normalise the data
6249
+
6250
+ print('should not be possible')
6409
6251
 
6410
6252
  self.define_selfs_fixed_rdm_cor(model_nature)
6411
6253
  indices = self.get_named_indices(self.fixed_fit)
6412
6254
  indices5 = self.get_named_indices(self.hetro_fit)
6413
6255
 
6414
- if self.zi_force:
6415
- model_nature['exog_inflX'] = df_tf[:, :, self.get_named_indices(self.zi_force_names)]
6256
+
6416
6257
 
6417
6258
  x_h_storage = []
6418
6259
  x_h_storage_test = []
@@ -6445,7 +6286,7 @@ class ObjectiveFunction(object):
6445
6286
  if XG is not None:
6446
6287
  indices4_test = np.repeat(self.get_named_indices(self.grouped_rpm),
6447
6288
  self.group_dummies_test.shape[2]) if self.grouped_rpm != [] else []
6448
- XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4]
6289
+ XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4_test]
6449
6290
  model_nature['XG'] = XG
6450
6291
  model_nature['XGtest'] = XGtest
6451
6292
 
@@ -6463,7 +6304,7 @@ class ObjectiveFunction(object):
6463
6304
  model_nature['XH'] = XH
6464
6305
  X_test = None
6465
6306
  if np.isin(X, [np.inf, -np.inf, None, np.nan]).any(): # type ignore
6466
- raise Exception('there is some kind of error')
6307
+ raise Exception('there is some kind of error in X')
6467
6308
 
6468
6309
  # numpy data setup fpr estimation
6469
6310
  indices2 = self.get_named_indices(self.rdm_fit)
@@ -6488,7 +6329,8 @@ class ObjectiveFunction(object):
6488
6329
  Xr_test = None
6489
6330
  model_nature['Xr_test'] = Xr_test
6490
6331
  if (Xr.ndim <= 1) or (Xr.shape[0] <= 11) or np.isin(Xr, [np.inf, -np.inf, None, np.nan]).any():
6491
- print('what the actual fuck')
6332
+ print('Not Possible')
6333
+ raise Exception
6492
6334
  if Xr.size == 0:
6493
6335
  Xr = None
6494
6336
  Xr_test = None
@@ -6509,10 +6351,10 @@ class ObjectiveFunction(object):
6509
6351
  obj_1.add_layout(layout)
6510
6352
 
6511
6353
  model_form_name = self.check_complexity(
6512
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, self.zi_fit, dispersion, is_halton, model_nature)
6354
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, None, dispersion, is_halton, model_nature)
6513
6355
 
6514
6356
  obj_1.add_names(self.fixed_fit.copy(), self.rdm_fit.copy(),
6515
- self.rdm_cor_fit.copy(), model_form_name, self.zi_fit, pvalues)
6357
+ self.rdm_cor_fit.copy(), model_form_name, None, pvalues)
6516
6358
  if not isinstance(obj_1, dict):
6517
6359
  raise Exception('should not be possible')
6518
6360
 
@@ -6540,31 +6382,32 @@ class ObjectiveFunction(object):
6540
6382
  else:
6541
6383
  obj_1 = Solution()
6542
6384
  self.significant = 3
6543
- print('not_implented yet')
6385
+ print('not_implemented yet') #TODO check this for exciddeing values
6544
6386
 
6545
6387
  if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
6546
6388
  self.bic = obj_1['bic']
6547
6389
  self.pvalues = pvalues
6548
- if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", "zi", 'grp', 'xh']):
6390
+ if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
6549
6391
  # todo: probably delete
6550
6392
  self.naming_for_printing(
6551
- pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'], obj_1['zi_fit'],
6393
+ pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6552
6394
  obj_1, model_nature)
6553
6395
  else:
6554
6396
  if is_delete == 0:
6555
6397
  # todo: probably delete
6556
6398
  self.naming_for_printing(
6557
6399
  pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6558
- obj_1['zi_fit'], obj_1, model_nature)
6400
+ obj_1, model_nature)
6559
6401
  self.coeff_ = betas
6560
6402
  self.stderr = stderr
6561
6403
  self.zvalues = zvalues
6562
6404
  self.log_lik = log_lik
6563
6405
  if self.significant == 0:
6564
6406
 
6565
- print(self.full_model, 'full model is')
6566
- alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6567
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6407
+
6408
+ if not self.test_flag:
6409
+ alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6410
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6568
6411
 
6569
6412
  return obj_1, model_nature
6570
6413
 
@@ -6581,8 +6424,9 @@ class ObjectiveFunction(object):
6581
6424
  self.significant = 3
6582
6425
 
6583
6426
  return obj_1, model_nature
6584
- alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6585
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6427
+ if not self.test_flag:
6428
+ alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6429
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6586
6430
  if self.grab_transforms:
6587
6431
 
6588
6432
  if is_halton and self.significant == 1:
@@ -6611,6 +6455,53 @@ class ObjectiveFunction(object):
6611
6455
 
6612
6456
  return obj_1, model_nature
6613
6457
 
6458
+ def get_X_tril(self):
6459
+ '''For correlations find the repeating terms'''
6460
+ varnames = self.none_join([self.rdm_grouped_fit, self.rdm_fit, self.rdm_cor_fit])
6461
+ rv_count_all = 0
6462
+ chol_count = 0
6463
+ rv_count = 0
6464
+ corr_indices = []
6465
+ rv_indices = []
6466
+ for ii, var in enumerate(varnames): # TODO: BUGFIXf
6467
+ if var in self.none_handler(self.rdm_cor_fit):
6468
+ is_correlated = True
6469
+ else:
6470
+ is_correlated = False
6471
+
6472
+ rv_count_all += 1
6473
+ if is_correlated:
6474
+ chol_count += 1
6475
+ else:
6476
+ rv_count += 1
6477
+
6478
+ if var in self.none_handler(self.rdm_cor_fit):
6479
+
6480
+ corr_indices.append(rv_count_all - 1) # TODO: what does tis do
6481
+
6482
+ else:
6483
+ rv_indices.append(rv_count_all - 1)
6484
+
6485
+ # for s.d.: gr_w = (Obs prob. minus predicted probability) * obs. var * random draw
6486
+ draws_tril_idx = np.array([corr_indices[j]
6487
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6488
+ for j in range(i + 1)]) # varnames pos.
6489
+ X_tril_idx = np.array([corr_indices[i]
6490
+ for i in range(len(self.none_handler(self.rdm_cor_fit)))
6491
+ for j in range(i + 1)])
6492
+ # Find the s.d. for random variables that are not correlated
6493
+ var_uncor = self.none_join([self.rdm_grouped_fit, self.rdm_fit])
6494
+ range_var = [x for x in
6495
+ range(len(self.none_handler(var_uncor)))]
6496
+ range_var = sorted(range_var)
6497
+ draws_tril_idx = np.array(np.concatenate((range_var, draws_tril_idx)))
6498
+ X_tril_idx = np.array(np.concatenate((range_var, X_tril_idx)))
6499
+ draws_tril_idx = draws_tril_idx.astype(int)
6500
+ X_tril_idx = X_tril_idx.astype(int)
6501
+ return X_tril_idx
6502
+
6503
+
6504
+
6614
6505
  def modifyn(self, data):
6615
6506
  select_data = self._characteristics_names
6616
6507
  alpha = np.isin(select_data, [item.split(':')[0] for item in data['fixed_fit']]).astype(int).tolist()
@@ -6692,7 +6583,7 @@ class ObjectiveFunction(object):
6692
6583
  alpha_cor_rdm = np.in1d(select_data, cor_rdm) * 1
6693
6584
  alpha_cor_rdm = alpha_cor_rdm.tolist()
6694
6585
  alpha_group_rdm = np.in1d(select_data, group_rdm) * 1
6695
- alpha_group_rdm = alpha_group_rdm.tolist()
6586
+ alpha_group_rdm = alpha_group_rdm.tolist() #todo will this ever trigger
6696
6587
  return alpha, alpha_rdm, alpha_cor_rdm
6697
6588
 
6698
6589
  def show_transforms(self, fix, rdm):
@@ -6818,28 +6709,39 @@ class ObjectiveFunction(object):
6818
6709
  # N, D = draws.shape[0], draws.shape[1]
6819
6710
  N, R, Kr = draws.shape[0], draws.shape[2], draws.shape[1]
6820
6711
  der = dev.np.ones((N, Kr, R), dtype=draws.dtype)
6821
- if len(self.none_handler(self.rdm_cor_fit)) == 0:
6822
- Br_come_one = self.Br.copy()
6823
- # Br_come_one =
6824
- else:
6825
6712
 
6826
- Br_come_one = self.Br.copy()
6827
6713
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6828
-
6714
+ #todo make sure this works for ln and truncated normal
6829
6715
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6830
- Br_come_one = self._transform_rand_betas()
6716
+
6717
+ #print('check this, intesection shouldn not happen for all')
6718
+
6719
+ if der.shape[1] != draws.shape[1]:
6720
+ print('why')
6721
+ Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6722
+ if der.shape[1] != draws.shape[1]:
6723
+ print('why')
6724
+ #TODO need to get the stuction of the rdms
6831
6725
  for k, dist_k in enumerate(distribution):
6832
6726
  if dist_k == 'ln_normal':
6727
+ if der.shape[1] != draws.shape[1]:
6728
+ print('why')
6833
6729
  der[:, k, :] = Br_come_one[:, k, :]
6730
+ if der.shape[1] != draws.shape[1]:
6731
+ print('why')
6834
6732
  elif dist_k == 'tn_normal':
6733
+ if der.shape[1] != draws.shape[1]:
6734
+ print('why')
6835
6735
  der[:, k, :] = 1 * (Br_come_one[:, k, :] > 0)
6736
+ if der.shape[1] != draws.shape[1]:
6737
+ print('why')
6836
6738
 
6739
+ if der.shape[1] != draws.shape[1]:
6740
+ print('why')
6837
6741
  return der
6838
6742
 
6839
6743
  def _copy_size_display_as_ones(self, matrix):
6840
- # grab the shape to copy
6841
- please = matrix.shape
6842
- der = dev.np.ones((please), dtype=matrix.dtype)
6744
+ der = dev.np.ones(matrix.shape, dtype=matrix.dtype)
6843
6745
  return der
6844
6746
 
6845
6747
  def prepare_halton(self, dim, n_sample, draws, distribution, long=False, slice_this_way=None):