metacountregressor 0.1.73__py3-none-any.whl → 0.1.88__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,12 +9,10 @@ import math
9
9
  import os
10
10
  import random
11
11
  import sys
12
- import time
13
12
  import warnings
14
13
  from collections import Counter
15
14
  from functools import wraps
16
15
 
17
- from tempfile import TemporaryFile
18
16
  import traceback
19
17
  import latextable
20
18
  import numpy as np
@@ -35,15 +33,22 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
35
33
  from sklearn.preprocessing import StandardScaler
36
34
  from texttable import Texttable
37
35
 
38
- from ._device_cust import device as dev
39
- from .pareto_file import Pareto, Solution
36
+ try:
37
+ from ._device_cust import device as dev
38
+ from .pareto_file import Pareto, Solution
39
+ from .data_split_helper import DataProcessor
40
+ except ImportError:
41
+ from metacountregressor._device_cust import device as dev
42
+ from metacountregressor.pareto_file import Pareto, Solution
43
+ from data_split_helper import DataProcessor
44
+
40
45
 
41
46
  np.seterr(divide='ignore', invalid='ignore')
42
47
  warnings.simplefilter("ignore")
43
48
 
44
- # defube the computation boundary limits
45
- min_comp_val = 1e-200
46
- max_comp_val = 1e+300
49
+ # define the computation boundary limits
50
+ min_comp_val = 1e-160
51
+ max_comp_val = 1e+200
47
52
  log_lik_min = -1e+200
48
53
  log_lik_max = 1e+200
49
54
 
@@ -117,21 +122,19 @@ class ObjectiveFunction(object):
117
122
 
118
123
  def __init__(self, x_data, y_data, **kwargs):
119
124
 
120
- self.reg_penalty = .5
125
+ self.reg_penalty = 1
121
126
  self.power_up_ll = False
122
127
  self.bic = None
123
128
  self.other_bic = False
129
+ self.test_flag = 1
124
130
  if self.other_bic:
125
131
  print('change this to false latter ')
126
- offset = None
127
132
 
128
- # initi
129
- self.constant_value = -5.5
130
- self.negative_binomial_value = 0.05
133
+ # initialize values
134
+ self.constant_value = 0
135
+ self.negative_binomial_value = 1
131
136
 
132
137
  self.verbose_safe = True
133
- self.zi_force = None # Analst want a zi model and formally declares the zi components below
134
- self.zi_force_names = None # delare the zi components
135
138
  self.please_print = kwargs.get('please_print', 0)
136
139
  self.group_halton = None
137
140
  self.grad_yes = False
@@ -145,7 +148,7 @@ class ObjectiveFunction(object):
145
148
  self.rdm_fit = None
146
149
  self.rdm_cor_fit = None
147
150
  self.dist_fit = None
148
- self.zi_fit = None
151
+
149
152
  self.MAE = None
150
153
  self.best_obj_1 = 100000000.0
151
154
  self._obj_1 = 'bic'
@@ -158,7 +161,7 @@ class ObjectiveFunction(object):
158
161
  self._max_iterations_improvement = 100
159
162
  self.generated_sln = set()
160
163
  self.ave_mae = 0
161
- # defualt paraamaters for hs
164
+ # defalt paramaters for hs #TODO unpack into harmony search class
162
165
  self.algorithm = 'hs' # 'sa' 'de' also avialable
163
166
  self._hms = 20
164
167
  self._max_time = 60 * 60 * 24
@@ -166,7 +169,7 @@ class ObjectiveFunction(object):
166
169
  self._par = 0.3
167
170
  self._mpai = 1
168
171
  self._max_imp = 100000
169
- self._WIC = 1000 # Number of ITerations without Multiobjective Improvement
172
+ self._WIC = 1000 # Number of Iterations without Multiobjective Improvement #tod chuck into solution
170
173
  self._panels = None
171
174
  self.is_multi = True
172
175
  self.method_ll = 'Nelder-Mead-BFGS'
@@ -190,11 +193,6 @@ class ObjectiveFunction(object):
190
193
  if k in acceptable_keys_list:
191
194
  self.__setattr__(k, self.tryeval(kwargs[k]))
192
195
 
193
- if self.zi_force_names is not None:
194
- self.zi_force = True
195
- if 'const' not in self.zi_force_names:
196
- self.zi_force_names = ['const'] + self.zi_force_names
197
- print('did this work?')
198
196
 
199
197
  if 'complexity_level' in kwargs:
200
198
  self.complexity_level = kwargs['complexity_level']
@@ -211,17 +209,22 @@ class ObjectiveFunction(object):
211
209
  raise Exception
212
210
 
213
211
  self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
214
-
215
212
  self.pvalue_exceed = 0
216
213
  self._maximize = False # do we maximize or minimize?
217
- # self._random_forest_identify_transformations(x_data, y_data)
218
- # data_names = self._random_forest_preprocess(x_data, y_data)
214
+
219
215
  x_data = sm.add_constant(x_data)
220
216
  self._input_data(x_data, y_data)
217
+
218
+
221
219
  if y_data.ndim == 1:
222
220
  y_data = pd.DataFrame(y_data)
223
221
 
224
- # split the data for testing
222
+ '''
223
+ #TODO ADD THIS IN LATER
224
+ splitter = DataProcessor(x_data, y_data, kwargs)
225
+ self.copy_class_attributes(splitter) #inherit the self objects
226
+ '''
227
+
225
228
  if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MAE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
226
229
  self.test_percentage = float(kwargs.get('test_percentage', 0))
227
230
  self.val_percentage = float(kwargs.get('val_percentage', 0))
@@ -229,8 +232,7 @@ class ObjectiveFunction(object):
229
232
  self.is_multi = False
230
233
 
231
234
  if 'panels' in kwargs:
232
- self.group_names = np.asarray(x_data[kwargs['group']].astype(
233
- 'category').cat._parent.dtype.categories)
235
+ self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
234
236
 
235
237
  x_data[kwargs['group']] = x_data[kwargs['group']].astype(
236
238
  'category').cat.codes
@@ -243,58 +245,39 @@ class ObjectiveFunction(object):
243
245
 
244
246
  N = len(np.unique(x_data[kwargs['panels']].values))
245
247
  id_unique = np.unique(x_data[kwargs['panels']].values)
246
-
247
248
  except KeyError:
248
249
  N = len(np.unique(x_data[kwargs['panels']]))
250
+ id_unique = np.unique(x_data[kwargs['panels']].values)
249
251
 
250
252
  training_size = int((1 - self.test_percentage - self.val_percentage) * N)
251
253
  ids = np.random.choice(N, training_size, replace=False)
252
254
  ids = id_unique[ids]
253
255
  train_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val in ids]
254
256
  test_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val not in ids]
255
-
256
257
  df_train = x_data.loc[train_idx, :]
257
258
  df_test = x_data.loc[test_idx, :]
258
259
  y_train = y_data.loc[train_idx, :]
259
260
  y_test = y_data.loc[test_idx, :]
260
-
261
261
  else:
262
262
  N = len(x_data)
263
263
  training_size = int((1 - self.test_percentage - self.val_percentage) * N)
264
264
  ids = np.random.choice(N, training_size, replace=False)
265
265
  id_unique = np.array([i for i in range(N)])
266
266
  ids = id_unique[ids]
267
-
268
267
  train_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] in ids]
269
268
  test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
269
+ df_train = x_data.loc[train_idx, :]
270
+ df_test = x_data.loc[test_idx, :]
271
+ y_train = y_data.loc[train_idx, :]
272
+ y_test = y_data.loc[test_idx, :]
270
273
 
271
- try: # @IgnoreException
272
- df_train = x_data.loc[train_idx, :]
273
- df_test = x_data.loc[test_idx, :]
274
- y_train = y_data.loc[train_idx, :]
275
- y_test = y_data.loc[test_idx, :]
276
- except:
277
- # Convert all values to their real parts
278
- df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
279
-
280
- # Replace the original DataFrame's numerical columns with real-valued ones
281
- x_data[df_real.columns] = df_real
282
-
283
- df_train = x_data.iloc[train_idx, :]
284
- df_test = x_data.iloc[test_idx, :]
285
- y_train = y_data.iloc[train_idx, :]
286
- y_test = y_data.iloc[test_idx, :]
287
274
 
288
- self.n_obs = N
275
+ #self.n_obs = N
289
276
  self._characteristics_names = list(self._x_data.columns)
290
- if self.zi_force:
291
- self.alpha_hurdle = np.isin(self._characteristics_names,
292
- [item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
293
-
294
277
  self._max_group_all_means = 1
295
278
 
296
279
  exclude_this_test = [4]
297
-
280
+
298
281
  if 'panels' in kwargs:
299
282
  self.panels = np.asarray(df_train[kwargs['panels']])
300
283
  self.panels_test = np.asarray(df_test[kwargs['panels']])
@@ -309,7 +292,6 @@ class ObjectiveFunction(object):
309
292
  X, Y, panel, group = self._arrange_long_format(
310
293
  df_train, y_train, self.ids, self.ids, groupll)
311
294
  self.group_halton = group.copy()
312
- Y = Y.astype('float')
313
295
  self.group_dummies = pd.get_dummies(group)
314
296
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
315
297
  self.panel_info = panel_info
@@ -324,7 +306,6 @@ class ObjectiveFunction(object):
324
306
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
325
307
  self._x_data = XX.copy()
326
308
  self._y_data = YY.copy()
327
- # Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
328
309
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
329
310
  if np.max(group) > 50:
330
311
  exclude_this_test = [4]
@@ -353,7 +334,7 @@ class ObjectiveFunction(object):
353
334
 
354
335
  self._samples, self._panels, self._characteristics = self._x_data.shape
355
336
 
356
- # i dont think i need this X, Y, group_info = self._balance_panels(XX, YY, group)
337
+
357
338
 
358
339
  else:
359
340
  self.G = None
@@ -372,77 +353,37 @@ class ObjectiveFunction(object):
372
353
  K = Xnew.shape[1]
373
354
  self._characteristics_names = list(Xnew.columns)
374
355
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
375
- # self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
376
- # self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
377
356
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
378
357
  self._x_data = XX.copy()
379
358
  self._y_data = YY.copy()
380
- # Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
359
+
381
360
  if self.is_multi:
382
361
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.ids_test, None)
383
362
  if np.max(group) > 50:
384
363
  exclude_this_test = [4]
385
364
  else:
386
365
  exclude_this_test = []
387
- # self.group_halton_test = group.copy()
388
366
  X, Y, panel_info = self._balance_panels(X, Y, panel)
389
-
367
+
390
368
  self.N_test, self.P_test = panel_info.shape
391
-
392
- # self.group_dummies_test = pd.get_dummies(group)
393
- # self.group_dummies_test = self.group_dummies_test.values.reshape(self.N_test, self.P_test, -1)
394
369
  K = X.shape[1]
395
370
  self.columns_names = X.columns
396
371
  X = X.values.reshape(self.N_test, self.P_test, K)
397
- # self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
398
372
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
399
373
  self._x_data_test = X.copy()
400
374
  self.y_data_test = Y.copy()
401
-
375
+
402
376
  self._samples, self._panels, self._characteristics = self._x_data.shape
403
377
 
404
- # draws and pvalue
405
-
406
- if 'Offset' in self._characteristics_names:
407
- offset = True
408
- self.have_offset = offset
409
- if self.have_offset is not None:
410
- try:
411
- # offset for training data
412
- # define offset
413
- val_od = self.get_named_indices(['Offset'])
414
- self._offsets = self._x_data[:, :, val_od]
415
-
416
- # drop the offset from the data
417
- self._x_data = np.delete(self._x_data, val_od, axis=2)
418
- self._characteristics_names = [x for x in self._characteristics_names if not 'Offset' in x]
419
- self._characteristics = len(self._characteristics_names)
420
- # self._x_data.drop(columns=['Offset'], inplace=True)
421
-
422
- # offset for testing data
423
- if self.is_multi:
424
- # define offset
425
- self._offsets_test = self._x_data_test[:, :, val_od]
426
- # self._offsets_test = self._x_data_test['Offset'].to_numpy()
427
- # self._offsets_test = np.reshape(
428
- # self._offsets_test, (-1, 1))
429
- # drop the offset from the data
430
- self._x_data_test = np.delete(self._x_data_test, val_od, axis=2)
431
- # self._x_data_test.drop(columns=['Offset'], inplace=True)
432
- except:
433
- # if no offset, set as 0
434
- self._offsets = np.zeros((self.N, self.P, 1))
435
- if self.is_multi:
436
- self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
437
- else:
438
- self._offsets = np.zeros((self.N, self.P, 1))
439
- if self.is_multi:
440
- self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
441
378
 
379
+ #Define the offset into the data
380
+ self.process_offset()
442
381
  if self.is_multi:
443
382
  self.pareto_printer = Pareto(self._obj_1, self._obj_2, True)
444
-
445
383
  self._pareto_population = list()
384
+
385
+
386
+
446
387
  self.Ndraws = 200 # todo: change back
447
388
  self.draws1 = None
448
389
  self.initial_sig = 1 # pass the test of a single model
@@ -480,8 +421,7 @@ class ObjectiveFunction(object):
480
421
  self.coeff_ = None
481
422
 
482
423
  self.significant = 0
483
- # define the states of our explanaotory variables
484
-
424
+ # define the states of our explanatory variables
485
425
  self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
486
426
  kwargs.get('must_include', []))
487
427
  self._discrete_values = self._discrete_values + \
@@ -506,21 +446,83 @@ class ObjectiveFunction(object):
506
446
  self.endog = None
507
447
  # solution parameters
508
448
  self._min_characteristics = 1
509
-
510
449
  self._max_hurdle = 4
511
450
 
512
- if "Manual_Fit" in kwargs and kwargs['Manual_Fit'] is not None:
451
+ #Manually fit from analyst specification
452
+ manual_fit = kwargs.get('Manual_Fit')
453
+ if manual_fit is not None:
454
+ self.process_manual_fit(manual_fit)
513
455
 
514
- self.initial_sig = 1 # pass the test of a single model
515
- self.pvalue_sig_value = 1
516
- # embed the solution to how you want it
517
- self.set_defined_seed(42)
518
- a = self.modify_initial_fit(kwargs['Manual_Fit'])
519
- self.makeRegression(a)
456
+ self.solution_analyst = None
520
457
 
521
458
 
522
- find_constant = 0
523
- hard_code = 0
459
+
460
+
461
+ def over_ride_self(self, **kwargs):
462
+ """
463
+ Dynamically sets attributes on the instance based on the provided keyword arguments.
464
+ """
465
+ for key, value in kwargs.items():
466
+ setattr(self, key, value)
467
+ print(f"Updated attributes: {kwargs}")
468
+
469
+ def remove_offset(self, data, indices):
470
+ """ Remove offset data from the dataset """
471
+ new_data = np.delete(data, indices, axis=2)
472
+ return new_data
473
+
474
+ def process_offset(self):
475
+ """ Process offset if it exists in the characteristics """
476
+ try:
477
+ if 'Offset' in self._characteristics_names:
478
+ self.have_offset = True
479
+ val_od = self.get_named_indices(['Offset'])
480
+ self._offsets = self._x_data[:, :, val_od]
481
+ self._x_data = self.remove_offset(self._x_data, val_od)
482
+ self._characteristics_names = [x for x in self._characteristics_names if x != 'Offset']
483
+ self._characteristics = len(self._characteristics_names)
484
+
485
+ if self.is_multi:
486
+ self._offsets_test = self._x_data_test[:, :, val_od]
487
+ self._x_data_test = self.remove_offset(self._x_data_test, val_od)
488
+ else:
489
+ self.initialize_empty_offsets()
490
+
491
+ except Exception as e:
492
+ print(f"An error occurred: {e}") # Better error handling
493
+ self.initialize_empty_offsets()
494
+
495
+ def initialize_empty_offsets(self):
496
+ """ Initialize offsets to zero if none are found or on error """
497
+ self._offsets = np.zeros((self.N, self.P, 1))
498
+ if self.is_multi:
499
+ self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
500
+
501
+
502
+ def copy_class_attributes(self, class_object):
503
+ '''
504
+ Loop through an
505
+ '''
506
+
507
+ # Loop through all attributes of the car object and copy them
508
+ for attr in vars(class_object):
509
+ setattr(self, attr, getattr(class_object, attr))
510
+
511
+
512
+ def process_manual_fit(self, manual_fit):
513
+ """Process the manual fit configuration."""
514
+ self.initial_sig = 1 # Example: Initialize some signal
515
+ self.pvalue_sig_value = 1 # Example: Initialize another signal
516
+ self.set_defined_seed(42) # Set a specific seed
517
+
518
+ modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
519
+ self.makeRegression(modified_fit) # Perform regression with the modified fit
520
+
521
+
522
+ def process_fit_specifications(self, find_constant, hard_code):
523
+ """
524
+ Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
525
+ """
524
526
  if hard_code:
525
527
  manual_fit_spec = {
526
528
  'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
@@ -559,7 +561,7 @@ class ObjectiveFunction(object):
559
561
  constant_values.append(self.beta_dict['const'][0][1])
560
562
  dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0, 0], [0, 0]])[0][1])
561
563
  except:
562
- print('d')
564
+ print('Error during regression analysis.')
563
565
  i += 1
564
566
 
565
567
  # Add the values of this iteration to the total
@@ -570,7 +572,7 @@ class ObjectiveFunction(object):
570
572
  constant_values_avg = [x / 100 for x in constant_values_total]
571
573
  dispersion_values_avg = [x / 100 for x in dispersion_values_total]
572
574
 
573
- self.solution_analyst = None
575
+ return constant_values_avg, dispersion_values_avg
574
576
 
575
577
 
576
578
  def _balance_panels(self, X, y, panels): # ToDO re
@@ -615,22 +617,7 @@ class ObjectiveFunction(object):
615
617
 
616
618
  return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
617
619
 
618
- def _random_forest_call_r(self):
619
- import rpy2.rinterface as rinterface
620
- import rpy2.robjects as robjects
621
- import rpy2.robjects as ro
622
- from rpy2.robjects import pandas2ri
623
- r = robjects.r
624
- r['source']('testML.R')
625
- pandas2ri.activate()
626
- RF_function_r = robjects.globalenv['RF_plot']
627
- RF_function_corr_r = robjects.globalenv['RF_plot_corr']
628
- r_df = ro.conversion.py2rpy(self._x_data)
629
- y_dy = ro.conversion.py2rpy(self._y_data)
630
- RF_function_r(r_df, y_dy)
631
-
632
- print('did this work')
633
- RF_function_corr_r(r_df, y_dy)
620
+
634
621
 
635
622
  def print_system_utilization(self):
636
623
  # Get CPU usage
@@ -647,7 +634,8 @@ class ObjectiveFunction(object):
647
634
  mem_free = round(mem_info.available /
648
635
  (1024 * 1024), 2) # Convert to MB
649
636
  print(
650
- f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total)")
637
+ f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total / "
638
+ f" mem free {mem_free})")
651
639
 
652
640
  def _arrange_long_format(self, X, y, ids, panels=None, groups=None):
653
641
  '''converts the data to long format'''
@@ -665,59 +653,14 @@ class ObjectiveFunction(object):
665
653
  if group is not None:
666
654
  group = group[sorted_idx]
667
655
 
668
- return X, y, pnl, group
669
-
670
- pandas_sort = 1
671
- if pandas_sort:
672
- if ids is not None:
673
-
674
- pnl = panels if panels is not None else np.ones(len(ids))
675
- df = X
676
-
677
- df['panels'], df['ids'] = pnl, ids
678
- new = 0
679
- if new:
680
- cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
681
- 'formats': ['<f4', '<f4']})
682
- cols['panels'], cols['ids'] = pnl, ids
683
- sorted_idx = np.argsort(cols, order=['panels', 'ids'])
684
- X, y = X[sorted_idx], y[sorted_idx]
685
- if panels is not None:
686
- panels = panels[sorted_idx]
687
- return X, y, panels
688
-
689
- df = pd.concat([X.reset_index(drop=True),
690
- y.reset_index(drop=True)], axis=1)
691
- sorted_df = df.sort_values(
692
- ['panels', 'ids']).reset_index(drop=True)
693
-
694
- X, y, panels = sorted_df.iloc[:, :-
695
- 3], sorted_df.iloc[:, -3:-2], sorted_df.iloc[:, -2]
696
- if panels is not None:
697
- # panels = panels[sorted_idx]
698
- P_i = (
699
- (np.unique(panels, return_counts=True)[1])).astype(int)
700
- P = np.max(P_i)
701
- N = len(P_i)
702
- print(1)
703
- return X, y, panels
704
-
705
- if ids is not None:
706
- X = np.asarray(X)
707
- y = np.asarray(y)
708
- pnl = panels if panels is not None else np.ones(len(ids))
709
-
710
- cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
711
- 'formats': ['<f4', '<f4']})
712
- cols['panels'], cols['ids'] = pnl, ids
713
- sorted_idx = np.argsort(cols, order=['panels', 'ids'])
714
- X, y = X[sorted_idx], y[sorted_idx]
715
- if panels is not None:
716
- panels = panels[sorted_idx]
656
+ return X, y.astype('float'), pnl, group
717
657
 
718
- return X, y, panels
658
+ return X, y.astype('float'), panels
719
659
 
720
660
  def _random_forest_identify_transformations(self, x_data, y_data):
661
+ '''
662
+ use the random forrest model to identify best feature
663
+ '''
721
664
  # let's use the pprint module for readability
722
665
  import inspect
723
666
  from pprint import pprint
@@ -866,7 +809,6 @@ class ObjectiveFunction(object):
866
809
  def pvalue_asterix_add(self, pvalues):
867
810
  pvalue_ast = list()
868
811
  for i in range(len(pvalues)):
869
- signif = ""
870
812
  if float(pvalues[i]) < 0.001:
871
813
  signif = "***"
872
814
  elif float(pvalues[i]) < 0.01:
@@ -899,8 +841,7 @@ class ObjectiveFunction(object):
899
841
 
900
842
  return ([self._model_type_codes[dispersion]])
901
843
 
902
- def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None,
903
- zi_fit=None, obj_1=None, model_nature=None):
844
+ def naming_for_printing(self, betas=None, no_draws=0, dispersion=0, fixed_fit=None, rdm_fit=None, rdm_cor_fit=None, obj_1=None, model_nature=None):
904
845
  self.name_deleter = []
905
846
  group_rpm = None
906
847
  group_dist = []
@@ -911,8 +852,7 @@ class ObjectiveFunction(object):
911
852
  rdm_fit = self.none_handler(self.rdm_fit)
912
853
  if rdm_cor_fit is None:
913
854
  rdm_cor_fit = self.none_handler(self.rdm_cor_fit)
914
- if zi_fit is None:
915
- zi_fit = self.none_handler(self.zi_fit)
855
+
916
856
  dis_fit = [x for x in self.none_handler(
917
857
  self.dist_fit)] # check if dis fit is name
918
858
 
@@ -977,18 +917,18 @@ class ObjectiveFunction(object):
977
917
  br_w_names = [randvars[i] + " (Std. Dev.) " + rand_vars_dis[i]
978
918
  for i in range(len(randvars))]
979
919
 
980
- zi_names = [x + ":inflated" for x in self.none_handler(self.zi_force_names)]
920
+
981
921
 
982
922
  names = fixednames + randvars + chol_names + \
983
- br_w_names + chol_part_1 + chol + zi_names + hetro_long + dispersion_name
923
+ br_w_names + chol_part_1 + chol + hetro_long + dispersion_name
984
924
  self.name_deleter = fixednames + randvars + chol_names + randvars + [chol_names[i] for i
985
925
  in range(len(chol_names)) for j in
986
926
  range(
987
- i + 1)] + zi_names + dispersion_name # TODO does this break
927
+ i + 1)] + dispersion_name # TODO does this break
988
928
  name_delete_2 = fixednames + randvars + chol_names + randvars + [chol_names[i] + "/" +
989
929
  chol_names[j] for i
990
930
  in range(len(chol_names)) for j in
991
- range(i + 1)] + zi_names + dispersion_name
931
+ range(i + 1)] + dispersion_name
992
932
  index_dict = {}
993
933
  for i, name in enumerate(name_delete_2):
994
934
  split_names = name.split('/')
@@ -1012,9 +952,9 @@ class ObjectiveFunction(object):
1012
952
  randvars = [x for x in self.none_handler(rdm_fit)]
1013
953
  chol_names = [x for x in self.none_handler(rdm_cor_fit)]
1014
954
 
1015
- zi_names = [x + ': inflated' for x in self.none_handler(self.zi_force_names)]
1016
955
 
1017
- names = fixednames + randvars + chol_names + zi_names + big_hetro + dispersion_name
956
+
957
+ names = fixednames + randvars + chol_names + big_hetro + dispersion_name
1018
958
 
1019
959
  names = np.array(names) # TODO check order
1020
960
  self.print_transform = self.transform_id_names + \
@@ -1052,22 +992,8 @@ class ObjectiveFunction(object):
1052
992
  if not isinstance(self.pvalues, np.ndarray):
1053
993
  raise Exception
1054
994
 
1055
- for i in range(len(self.coeff_)):
1056
- signif = ""
1057
995
 
1058
- if float(self.pvalues[i]) < 0.01:
1059
- signif = "***"
1060
- elif float(self.pvalues[i]) < 0.05:
1061
- signif = "**"
1062
- elif float(self.pvalues[i]) < 0.1:
1063
- signif = "*"
1064
996
 
1065
- '''
1066
- print(fmt.format(self.coeff_names[i][:coeff_name_str_length], self.print_transform[i], self.coeff_[i],
1067
- self.stderr[i], self.zvalues[i], self.pvalues[i],
1068
- signif
1069
- ))
1070
- '''
1071
997
  if self.please_print or save_state:
1072
998
 
1073
999
  if self.convergance is not None:
@@ -1175,14 +1101,7 @@ class ObjectiveFunction(object):
1175
1101
  self.save_to_file(latextable.draw_latex(
1176
1102
  table, caption=caption, caption_above=True), file_name)
1177
1103
 
1178
- # print('change this')
1179
- # df = pd.read_csv("artificial_mixed_corr_2023_MOOF.csv")
1180
-
1181
- # updating the column value/data
1182
- # df['Y'] = np.mean(self.lam, axis = (1,2))
1183
1104
 
1184
- # writing into the file
1185
- # df.to_csv("artificial_mixed_corr_2023_MOOF.csv", index=False)
1186
1105
 
1187
1106
  def summary(self, model=None, algorithm=None, transform_list=None, long_print=0, solution=None):
1188
1107
  """
@@ -1540,22 +1459,9 @@ class ObjectiveFunction(object):
1540
1459
  alpha_hetro = [
1541
1460
  0 if x != 5 else 1 for x in vector[:self._characteristics]]
1542
1461
 
1543
- if self.zi_force == True:
1544
1462
 
1545
- return {
1546
- 'alpha': alpha,
1547
- 'alpha_rdm': alpha_rdm,
1548
- 'alpha_cor_rdm': alpha_cor_rdm,
1549
- 'alpha_grouped': alpha_grouped,
1550
- 'alpha_hetro': alpha_hetro,
1551
- 'distributions': distributions,
1552
- 'transformations': transformations,
1553
- 'exog_infl': self.zi_force_names,
1554
- 'dispersion': dispersion
1555
- }
1556
1463
 
1557
- else:
1558
- return {
1464
+ return {
1559
1465
  'alpha': alpha,
1560
1466
  'alpha_rdm': alpha_rdm,
1561
1467
  'alpha_cor_rdm': alpha_cor_rdm,
@@ -1563,7 +1469,6 @@ class ObjectiveFunction(object):
1563
1469
  'alpha_hetro': alpha_hetro,
1564
1470
  'distributions': distributions,
1565
1471
  'transformations': transformations,
1566
-
1567
1472
  'dispersion': dispersion
1568
1473
  }
1569
1474
 
@@ -1599,7 +1504,7 @@ class ObjectiveFunction(object):
1599
1504
 
1600
1505
  def repair(self, vector, reduce_to_this=10000): # todo get the number of parameters
1601
1506
  'Method to repair the model so that the number of paramaters is held within the constraint'
1602
- b = 0
1507
+
1603
1508
  new_j = 0
1604
1509
  # extract explanatory vector
1605
1510
  prmVect = vector[:self._characteristics]
@@ -1618,7 +1523,6 @@ class ObjectiveFunction(object):
1618
1523
  int(np.min((5, self.complexity_level - 1)))])
1619
1524
 
1620
1525
  count_3 = prmVect.count(3)
1621
- this_many = count_3 * (count_3 + 1) / 2
1622
1526
 
1623
1527
  vector[:len(prmVect)] = prmVect.copy()
1624
1528
 
@@ -1637,8 +1541,7 @@ class ObjectiveFunction(object):
1637
1541
  # b = sum(prmVect) + self.is_dispersion(vector[-1])
1638
1542
  max_loops = 100 # Maximum number of loops
1639
1543
  counter = 0 # Counter variable to keep track of the number of loops
1640
- if any(isinstance(num, int) and num < 0 for num in vector):
1641
- raise Exception('fhfhfhf')
1544
+
1642
1545
 
1643
1546
  while b > self._max_characteristics and counter < max_loops or b > reduce_to_this:
1644
1547
 
@@ -1686,8 +1589,6 @@ class ObjectiveFunction(object):
1686
1589
  counter += 1
1687
1590
 
1688
1591
  counter = 0
1689
- if any(isinstance(num, int) and num < 0 for num in vector):
1690
- raise Exception('fhfhfhf')
1691
1592
  while b < self._min_characteristics and counter < max_loops:
1692
1593
 
1693
1594
  weights = [1 if x == 0 else 0 for x in only_ints_vals]
@@ -1734,13 +1635,13 @@ class ObjectiveFunction(object):
1734
1635
  cor_l = 0 if self.rdm_cor_fit is None else len(self.rdm_cor_fit)
1735
1636
  Kh = 0 if self.hetro_fit is None else len(self.hetro_fit) + len(set(self.dist_hetro))
1736
1637
 
1737
- zi_terms = 0 if self.zi_fit is None else len(self.zi_fit)
1638
+
1738
1639
  Kchol = int((cor_l *
1739
1640
  (cor_l + 1)) / 2)
1740
1641
  n_coeff = Kf + Kr + cor_l + Kchol + Kr_b + Kh
1741
1642
  if block:
1742
- return [Kf, Kr, cor_l, Kr_b, Kchol, Kh, zi_terms]
1743
- return Kf, Kr, cor_l, Kr_b, Kchol, Kh, zi_terms
1643
+ return [Kf, Kr, cor_l, Kr_b, Kchol, Kh]
1644
+ return Kf, Kr, cor_l, Kr_b, Kchol, Kh
1744
1645
 
1745
1646
  def find_index_of_block(self, lst, value):
1746
1647
  cumulative_sum = 0
@@ -1821,8 +1722,7 @@ class ObjectiveFunction(object):
1821
1722
  self.rdm_fit)):
1822
1723
  raise Exception('pop wrong for id names')
1823
1724
 
1824
- # return 'need to delete all of the dups'
1825
- # self.rdm_cor_fit.pop(self.name_deleter(idx))
1725
+
1826
1726
 
1827
1727
  def get_value_to_delete(self, idx, dispersion):
1828
1728
  block = self.get_num_params(True)
@@ -1858,8 +1758,7 @@ class ObjectiveFunction(object):
1858
1758
  self.dist_fit.pop(cc[b] + len(self.rdm_fit))
1859
1759
  self.transform_id_names.pop(
1860
1760
  cc[b] + len(self.none_handler(self.fixed_fit)) + len(self.none_handler(self.rdm_fit)))
1861
- # return 'need to delete all of the dups'
1862
- # self.rdm_cor_fit.pop(self.name_deleter(idx))
1761
+
1863
1762
 
1864
1763
  def get_param_num(self, dispersion=0):
1865
1764
  a = np.sum(self.get_num_params()) + \
@@ -1890,7 +1789,7 @@ class ObjectiveFunction(object):
1890
1789
  return_violated_terms=0):
1891
1790
 
1892
1791
  num_params = len(pvalues)
1893
- Kf, Kr, Kc, Kr_b, Kchol, Kh, zi_b = self.get_num_params()
1792
+ Kf, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
1894
1793
 
1895
1794
  vio_counts = 0
1896
1795
  pvalues = np.array([float(string) for string in pvalues])
@@ -1915,18 +1814,14 @@ class ObjectiveFunction(object):
1915
1814
  subpvalues[i] = 0
1916
1815
 
1917
1816
  sum_k += Kr_b
1918
- if Kchol > 0:
1919
- cc = [i for i
1920
- in range(len(self.rdm_cor_fit)) for j in range(i + 1)]
1921
-
1922
1817
  lower_triangular = subpvalues[sum_k:sum_k + Kchol]
1923
1818
 
1924
- n = Kc # compute matrix size
1819
+
1925
1820
  # initialize matrix with zeros
1926
- matrix_alt = [[0] * n for _ in range(n)]
1821
+ matrix_alt = [[0] * Kc for _ in range(Kc)]
1927
1822
  index = 0
1928
1823
 
1929
- for i in range(n):
1824
+ for i in range(Kc):
1930
1825
  for j in range(i + 1):
1931
1826
  # fill in lower triangular entries
1932
1827
  matrix_alt[i][j] = lower_triangular[index]
@@ -2414,17 +2309,9 @@ class ObjectiveFunction(object):
2414
2309
 
2415
2310
  if obj_1 is not None:
2416
2311
  obj_1['layout'] = vector.copy()
2417
- # alpha, alpha_rdm, alpha_cor_rdm = self.modify(
2418
- # obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'])
2419
- # a = self.modifyn(model_mod)
2420
- # vector = self.modify_vector(
2421
- # vector, alpha, alpha_rdm, alpha_cor_rdm)
2422
2312
  sub_vector = vector[:self._characteristics]
2423
2313
  dispersion_parm = vector[-1]
2424
- if dispersion_parm == 0:
2425
- num_parm = sum(sub_vector)
2426
- else:
2427
- num_parm = sum(sub_vector) + 1
2314
+
2428
2315
 
2429
2316
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2430
2317
  obj_1[self._obj_1] = 10 ** 9
@@ -2457,7 +2344,7 @@ class ObjectiveFunction(object):
2457
2344
 
2458
2345
  self.Last_Sol = obj_1.copy()
2459
2346
 
2460
- #
2347
+
2461
2348
 
2462
2349
  self.reset_sln()
2463
2350
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
@@ -2495,7 +2382,7 @@ class ObjectiveFunction(object):
2495
2382
  self.coeff_names = None
2496
2383
  self.draws1 = None
2497
2384
  self.coeff_ = None
2498
- self.zi_fit = None
2385
+
2499
2386
  self.bic = None
2500
2387
  self.log_lik = None
2501
2388
  self.pvalues = None
@@ -2589,13 +2476,13 @@ class ObjectiveFunction(object):
2589
2476
  def set_defined_seed(self, seed):
2590
2477
  print('Benchmaking test with Seed', seed)
2591
2478
  np.random.seed(seed)
2592
- #pd.random.seed(seed)
2479
+
2593
2480
  random.seed(seed)
2594
2481
 
2595
2482
  def set_random_seed(self):
2596
2483
  print('Imbdedding Seed', self._random_seed)
2597
2484
  np.random.seed(self._random_seed)
2598
- #pd.random.seed(self._random_seed)
2485
+
2599
2486
  random.seed(self._random_seed)
2600
2487
  return self._random_seed
2601
2488
 
@@ -2720,85 +2607,9 @@ class ObjectiveFunction(object):
2720
2607
  print(e)
2721
2608
  print('f')
2722
2609
 
2723
- def negbinom_gradients(r, p, k, a=None): # TODO: delete if wrong
2724
- """_summary_
2725
-
2726
- Args:
2727
- r (_type_): rate paramaters or dispersion of the nb
2728
- p (_type_): probability
2729
- k (_type_): vector of (non-negative integer) quantiles.
2730
- a (_type_, optional): optional paramater, if none NB model, otherwise NB-Lindley model with Lindley paramater a.
2731
-
2732
- Raises:
2733
- Exception: _description_
2734
- Exception: _description_
2735
- ValueError: _description_
2736
- Exception: _description_
2737
- Exception: _description_
2738
2610
 
2739
- Returns:
2740
- _type_: _description_
2741
- """
2742
- # fine the NegBinom PMF
2743
- import scipy.special as sps
2744
- negbinom_pmf = sps.comb(k + r - 1, k) * p ** r * (1 - p) ** k
2745
2611
 
2746
- # Calculate the gradient of the NegBinom PMF with respect to r and p
2747
- d_negbinom_pmf_dr = sps.comb(
2748
- k + r - 1, k) * (np.log(p) - sps.digamma(r)) * p ** r * (1 - p) ** k
2749
- d_negbinom_pmf_dp = sps.comb(
2750
- k + r - 1, k) * (r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k
2751
2612
 
2752
- if a is not None:
2753
- # Define the NegBinom-Lindley PMF
2754
- negbinom_lindley_pmf = sps.comb(a + k - 1, k) * p ** r * (1 - p) ** k
2755
-
2756
- # Calculate the gradient of the NegBinom-Lindley PMF with respect to r, p, and a
2757
- d_negbinom_lindley_pmf_dr = sps.comb(
2758
- a + k - 1, k) * (np.log(p) * p ** r * (1 - p) ** k)
2759
- d_negbinom_lindley_pmf_dp = sps.comb(
2760
- a + k - 1, k) * ((r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k)
2761
- d_negbinom_lindley_pmf_da = sps.comb(
2762
- a + k - 1, k) * (-sps.digamma(a) + sps.digamma(a + k)) * p ** r * (1 - p) ** k
2763
-
2764
- return [d_negbinom_pmf_dr, d_negbinom_pmf_dp], [d_negbinom_lindley_pmf_dr, d_negbinom_lindley_pmf_dp,
2765
- d_negbinom_lindley_pmf_da]
2766
- else:
2767
- return [d_negbinom_pmf_dr, d_negbinom_pmf_dp]
2768
-
2769
- def f(self, x, N, sig, mu):
2770
- return norm.pdf(x, 0, 1) * poisson.pmf(N, np.exp(x * sig + mu))
2771
-
2772
- def poilog(self, n, mu, sig):
2773
- from scipy import integrate
2774
- if len(mu) > 1 or len(sig) > 1:
2775
- raise ValueError(
2776
- "vectorization of mu and sig is currently not implemented")
2777
- if any((n[n != 0] / np.trunc(n[n != 0])) != 1):
2778
- raise ValueError("all n must be integers")
2779
- if any(n < 0):
2780
- raise ValueError("one or several values of n are negative")
2781
- if not np.all(np.isfinite(np.concatenate((mu, sig)))):
2782
- raise ValueError("all parameters should be finite")
2783
- if sig <= 0:
2784
- raise ValueError("sig is not larger than 0")
2785
- spos = np.where(n < 8)[0]
2786
- lpos = np.where(n >= 8)[0]
2787
- val = np.empty_like(n)
2788
-
2789
- if spos.size > 0:
2790
- vali = np.empty(spos.size)
2791
- for i in range(spos.size):
2792
- try:
2793
- vali[i] = integrate.quad(
2794
- self.f, -np.inf, np.inf, sig, mu, args=(n[spos[i]],))[0]
2795
- except:
2796
- vali[i] = 1e-300
2797
- valp = self.poilog(n[spos], mu, sig ** 2)[0]
2798
- val[spos] = np.maximum(vali, valp)
2799
- if lpos.size > 0:
2800
- val[lpos] = self.poilog(n[lpos], mu, sig ** 2)[0]
2801
- return val
2802
2613
 
2803
2614
  def negbinom_pmf(self, r, p, k, a=None): # TODO: delete if wrong
2804
2615
  """_summary_
@@ -2828,45 +2639,7 @@ class ObjectiveFunction(object):
2828
2639
  negbinom_lindley_pmf = sc.comb(a + k - 1, k) * p ** r * (1 - p) ** k
2829
2640
  return negbinom_lindley_pmf
2830
2641
 
2831
- def nbl_score(self, y, X, betas, alpha, theta):
2832
- from scipy.special import gammaln, psi
2833
- """
2834
- Calculate the Negative Binomial-lindley model score vector of the log-likelihood.
2835
-
2836
- Parameters:
2837
- -----------
2838
- y : numpy array
2839
- The dependent variable of the model.
2840
- X : numpy array
2841
- The independent variables of the model.
2842
- beta : numpy array
2843
- The coefficients of the model.
2844
- alpha : float
2845
- The dispersion parameter of the Negative Binomial-lindley distribution.
2846
- theta : float
2847
- The theta parameter of the Negative Binomial-lindley distribution.
2848
-
2849
- Returns:
2850
- --------
2851
- score : numpy array
2852
- The score vector of the Negative Binomial-lindley model log-likelihood.
2853
- """
2854
- alpha = betas[-1]
2855
- theta = betas[-2]
2856
- beta = betas[:-2]
2857
- zi = self.my_lindley(y, theta).ravel()
2858
-
2859
- eta = np.dot(X, beta)
2860
- mu = np.exp(eta) * zi
2861
- p = 1 / (1 + mu * theta / alpha)
2862
- q = 1 - p
2863
- score = np.zeros(len(betas))
2864
2642
 
2865
- for i in range(len(y)):
2866
- score += (psi(y[i] + zi[i] * p[i]) - psi(alpha * p[i]) + np.log(zi[i])
2867
- - np.log(1 + zi * mu[i] / alpha)) * X[i, :]
2868
-
2869
- return score
2870
2643
 
2871
2644
  def poisson_lognormal_glm_score(self, betas, Y, X, sigma, tau=1e-6):
2872
2645
  """
@@ -2909,7 +2682,7 @@ class ObjectiveFunction(object):
2909
2682
  grad_n = np.concatenate((grad_n_sub, grad_n_sub1), axis=1)
2910
2683
  return gradient, grad_n
2911
2684
 
2912
- def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False):
2685
+ def NB_Score(self, params, y, mu, X, Q=0, obs_specific=False, alpha = None):
2913
2686
  """
2914
2687
  Negative Binomial model score (gradient) vector of the log-likelihood
2915
2688
  Parameters
@@ -2928,10 +2701,48 @@ class ObjectiveFunction(object):
2928
2701
 
2929
2702
 
2930
2703
  """
2704
+ #print('delete this later')
2705
+ if alpha is None:
2706
+ alpha = params[-1]
2707
+ # Calculate common terms
2708
+ '''
2709
+ n = len(y)
2710
+ n, p, d = X.shape # n: observations, p: panels (1 in your case), d: explanatory variables
2931
2711
 
2932
- try:
2712
+ # Flatten the data since there's only one panel, simplifying the operations
2713
+ X_flat = X.reshape(n * p, d)
2714
+ y_flat = y.flatten()
2715
+ mu_flat = mu.flatten()
2933
2716
 
2934
- alpha = params[-1]
2717
+ # Prepare score array
2718
+ score = np.zeros(d + 1) # +1 for alpha
2719
+
2720
+ # Compute the gradient for regression coefficients
2721
+ for j in range(d): # Exclude the last parameter (alpha)
2722
+ score[j] = np.dot(X_flat[:, j], (y_flat - mu_flat))
2723
+
2724
+ # Compute the gradient for the dispersion parameter
2725
+ if obs_specific:
2726
+ # Adjust the calculation if observation-specific effects are considered
2727
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2728
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2729
+ score[-1] = np.sum(sum_terms)
2730
+ else:
2731
+ # Standard calculation
2732
+ sum_terms = (y_flat - mu_flat) ** 2 / (mu_flat + alpha * mu_flat ** 2) - (
2733
+ y_flat + mu_flat ** 2 / (mu_flat + alpha * mu_flat ** 2))
2734
+ score[-1] = np.sum(sum_terms)
2735
+ return score
2736
+ '''
2737
+ #return score
2738
+
2739
+
2740
+
2741
+
2742
+
2743
+ try:
2744
+ if alpha is None:
2745
+ alpha = params[-1]
2935
2746
  a1 = 1 / alpha * mu ** Q
2936
2747
  prob = a1 / (a1 + mu)
2937
2748
  exog = X
@@ -2973,7 +2784,8 @@ class ObjectiveFunction(object):
2973
2784
  return np.concatenate((dparams, dalpha),
2974
2785
  axis=1)
2975
2786
  except Exception as e:
2976
- print('in ki nb probkemng')
2787
+ print(e)
2788
+ print('NB score exception problem..')
2977
2789
  exc_type, exc_obj, exc_tb = sys.exc_info()
2978
2790
  fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
2979
2791
  print(exc_type, fname, exc_tb.tb_lineno)
@@ -3640,7 +3452,7 @@ class ObjectiveFunction(object):
3640
3452
  # prob = 1/(1+mu*alpha)
3641
3453
  try:
3642
3454
  # print(np.shape(y),np.shape(size), np.shape(prob))
3643
- # gg2 = self.negbinom_pmf(alpha_size, prob, y)
3455
+ gg2 = self.negbinom_pmf(alpha_size, size/(size+mu), y)
3644
3456
 
3645
3457
  gg = np.exp(
3646
3458
  gammaln(y + alpha) - gammaln(y + 1) - gammaln(alpha) + y * np.log(mu) + alpha * np.log(alpha) - (
@@ -3798,21 +3610,8 @@ class ObjectiveFunction(object):
3798
3610
 
3799
3611
  if dispersion == 1 or dispersion == 4: # nb
3800
3612
  # if model_nature is not None and 'dispersion_penalty' in model_nature:
3801
- # if b_gam < 0.8*model_nature['dispersion_penalty']:
3802
- # penalty += model_nature['dispersion_penalty'] -b_gam
3803
3613
 
3804
- # if abs(b_gam) < 0.01:
3805
- # penalty += 1/np.abs(b_gam)
3806
3614
 
3807
- if b_gam >= 4.5:
3808
- penalty += b_gam
3809
- b_gam = 4.61
3810
- # b_gam = 7.9
3811
- # penalty += model_nature['dispersion_penalty'] -b_gam
3812
- # penalty += 1/np.max((0.01,abs(b_gam)))
3813
- # b_gam = model_nature['dispersion_penalty']
3814
-
3815
- """
3816
3615
  if b_gam <= 0:
3817
3616
  #penalty += 100
3818
3617
  #penalty += abs(b_gam)
@@ -3820,21 +3619,21 @@ class ObjectiveFunction(object):
3820
3619
  #b_gam = 1
3821
3620
 
3822
3621
  # if b_gam < 0.03:
3823
- penalty += 10
3622
+ penalty += min(1, np.abs(b_gam))
3824
3623
 
3825
- b_gam = 0.03
3624
+ b_gam = 0.001
3826
3625
  #
3827
3626
 
3828
- if b_gam >= 10:
3829
- penalty+= b_gam
3627
+ #if b_gam >= 10:
3628
+ # penalty+= b_gam
3830
3629
 
3831
- if b_gam == 0:
3832
- b_gam = min_comp_val
3630
+ # if b_gam == 0:
3631
+ #b_gam = min_comp_val
3833
3632
  #b_gam = 0.03
3834
3633
 
3835
- b_gam = abs(b_gam)
3634
+ # b_gam = abs(b_gam)
3836
3635
 
3837
- """
3636
+
3838
3637
 
3839
3638
  elif dispersion == 2:
3840
3639
  if b_gam >= 1:
@@ -3918,195 +3717,7 @@ class ObjectiveFunction(object):
3918
3717
  # np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
3919
3718
  return -np.exp(XB) + y * XB - sc.gammaln(y + 1)
3920
3719
 
3921
- def loglik_zi(params, return_grad=False):
3922
- """
3923
- Loglikelihood for observations of Generic Zero Inflated model.
3924
-
3925
- Parameters
3926
- ----------
3927
- params : array_like
3928
- The parameters of the model.
3929
-
3930
- Returns
3931
- -------
3932
- loglike : ndarray
3933
- The log likelihood for each observation of the model evaluated
3934
- at `params`. See Notes for definition.
3935
-
3936
- Notes
3937
- -----
3938
- .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
3939
- \\ln(1-w_{i})+L_{main\\_model}
3940
- where P - pdf of main model, L - loglike function of main model.
3941
3720
 
3942
- for observations :math:`i=1,...,n`
3943
- """
3944
- params_infl = params[:self.k_inflate]
3945
- params_main = params[self.k_inflate:]
3946
-
3947
- y = self.endog
3948
- w = predict_logit(params_infl, exog_infl)
3949
-
3950
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
3951
- llf_main = loglik_obs_poisson(params_main, y)
3952
- dispersion = 0
3953
- b_gam = None
3954
- Xd = exog
3955
- eta = np.tile(np.dot(Xd, params_main), (1, 1)).transpose()
3956
- eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3957
-
3958
- llf_main_og = self.loglik_obs(y, eVd.ravel(), dispersion, b_gam)
3959
- zero_idx = np.nonzero(y == 0)[0]
3960
- nonzero_idx = np.nonzero(y)[0] # type: ignore
3961
-
3962
- llf = np.zeros_like(y, dtype=np.float64)
3963
- llf[zero_idx] = (np.log(w[zero_idx] +
3964
- (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
3965
- llf[nonzero_idx] = np.log(
3966
- 1 - w[nonzero_idx]) + llf_main[nonzero_idx]
3967
- if return_grad:
3968
- score_main = Xd.T @ (y - eVd.ravel())
3969
- L = np.exp(np.dot(Xd, params_main))
3970
- score_main = (self.endog - L)[:, None] * Xd
3971
-
3972
- dldp = np.zeros(
3973
- (exog.shape[0], len(params_main)), dtype=np.float64)
3974
- dldw = np.zeros_like(exog_infl, dtype=np.float64)
3975
-
3976
- dldp[zero_idx, :] = (score_main[zero_idx].T *
3977
- (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
3978
- dldp[nonzero_idx, :] = score_main[nonzero_idx]
3979
-
3980
- dldw[zero_idx, :] = (exog_infl[zero_idx].T * w[zero_idx] *
3981
- (1 - w[zero_idx]) *
3982
- (1 - np.exp(llf_main[zero_idx])) /
3983
- np.exp(llf[zero_idx])).T
3984
- dldw[nonzero_idx, :] = -(exog_infl[nonzero_idx].T *
3985
- w[nonzero_idx]).T
3986
-
3987
- return llf, np.hstack((dldw, dldp)).sum(axis=0)
3988
-
3989
- else:
3990
-
3991
- return llf
3992
-
3993
- def zipoisson_logpmf(x, mu, w):
3994
- return _lazywhere(x != 0, (x, mu, w),
3995
- (lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
3996
- sc.gammaln(x + 1.) - mu),
3997
- np.log(w + (1. - w) * np.exp(-mu)))
3998
-
3999
- def zipoisson_pmf(x, mu, w):
4000
- return np.exp(zipoisson_logpmf(x, mu, w))
4001
-
4002
- def loglik_logit(params, endog_y, exog_x): # this is predict I think
4003
- q = 2 * endog_y - 1
4004
- X = exog_x
4005
- return np.sum(np.log(cdf(q * np.dot(X, params))))
4006
-
4007
- def predict_logit(params, exog=None, linear=False):
4008
- if exog is None:
4009
- exog = self.exog
4010
- if not linear:
4011
- return (cdf(np.dot(exog, params)))
4012
- else:
4013
- return (np.dot(exog, params))
4014
-
4015
- def cdf(X):
4016
- """
4017
- The logistic cumulative distribution function
4018
-
4019
- Parameters
4020
- ----------
4021
- X : array_like
4022
- `X` is the linear predictor of the logit model. See notes.
4023
-
4024
- Returns
4025
- -------
4026
- 1/(1 + exp(-X))
4027
-
4028
- Notes
4029
- -----
4030
- In the logit model,
4031
-
4032
- .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
4033
- \\text{Prob}\\left(Y=1|x\\right)=
4034
- \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
4035
- """
4036
- X = np.asarray(X)
4037
- return 1 / (1 + np.exp(-X))
4038
-
4039
- llobs, grad = loglik_zi(betas, return_grad)
4040
- llf = np.sum(llobs)
4041
- if return_grad:
4042
- return -llf, -grad
4043
- else:
4044
- return -llf
4045
-
4046
- def cdf_logit(self, X):
4047
- """
4048
- The logistic cumulative distribution function
4049
-
4050
- Parameters
4051
- ----------
4052
- X : array_like
4053
- `X` is the linear predictor of the logit model. See notes.
4054
-
4055
- Returns
4056
- -------
4057
- 1/(1 + exp(-X))
4058
-
4059
- Notes
4060
- -----
4061
- In the logit model,
4062
-
4063
- .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
4064
- \\text{Prob}\\left(Y=1|x\\right)=
4065
- \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
4066
- """
4067
- X = np.asarray(X)
4068
- return 1 / (1 + np.exp(-X))
4069
-
4070
- def predict_logit_part(self, params_infl, exog_infl, linear=False):
4071
-
4072
- if not linear:
4073
- return (self.cdf_logit(np.dot(exog_infl, params_infl)))
4074
- else:
4075
- return (np.dot(exog_infl, params_infl))
4076
-
4077
- def ZeroInflate_W_setup(self, exog_infl, y, params_infl):
4078
-
4079
- w = self.predict_logit_part(params_infl, exog_infl)
4080
-
4081
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4082
-
4083
- llf_main = [1, 2, 3] # TODO ge
4084
- zero_idx = np.nonzero(y == 0)[0]
4085
- nonzero_idx = np.nonzero(y)[0]
4086
-
4087
- llf = np.zeros_like(y, dtype=np.float64)
4088
- llf[zero_idx] = (np.log(w[zero_idx] +
4089
- (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4090
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4091
-
4092
- return llf
4093
-
4094
- def dPXL(self, x, alpha):
4095
- return ((alpha ** 2) * (x + 3 * alpha + (alpha ** 2) + 3)) / (1 + alpha) ** (4 + x)
4096
-
4097
- # Define the gradient function
4098
-
4099
- def poisson_lindley_gradient(self, params, exog, endog):
4100
- beta = params[-1]
4101
- mu = np.exp(np.dot(exog, params[:-1]))
4102
- q = beta / (1 + beta)
4103
- d_beta = (endog.ravel() + 1) / (mu + endog.ravel() + 1) - q / (1 - q)
4104
- d_beta = self.dpoisl(endog, beta).ravel()
4105
- d_mu = np.dot((endog - mu) * (1 - q) / (mu + endog + 1), exog)
4106
-
4107
- grad_n = np.concatenate((d_mu, np.atleast_2d(d_beta).T), axis=1)
4108
- der = np.sum(grad_n, axis=0)
4109
- return der, grad_n
4110
3721
 
4111
3722
  def dpoisl(self, x, theta, log=False):
4112
3723
  # if theta < 0:
@@ -4175,7 +3786,8 @@ class ObjectiveFunction(object):
4175
3786
  elif dispersion == 1:
4176
3787
 
4177
3788
  proba_r = self._nonlog_nbin(y, eVd, b_gam)
4178
- # print(1)
3789
+
3790
+
4179
3791
  # proba_d = self.dnegbimonli(y, eVd, b_gam )
4180
3792
  # print('fuck if this actually works')
4181
3793
 
@@ -4183,21 +3795,9 @@ class ObjectiveFunction(object):
4183
3795
 
4184
3796
  proba_r = self.general_poisson_pmf(eVd, y, b_gam)
4185
3797
 
4186
- elif dispersion == 3:
4187
- fa, ba = self.get_dispersion_paramaters(betas, dispersion)
4188
- zi = self.my_lindley(y, ba)
4189
- proba_r = poisson.pmf(y, zi * eVd.ravel())
4190
- # proba_r = self.lindl_pmf_chatgpt(y, l_pam)
4191
- # prob_2 = self.dpoisl(y, l_pam)
4192
- # proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
4193
- # proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
4194
- # print(1)
4195
- # proba_r = self.dpoisl(y, eVd)
4196
3798
 
4197
- elif dispersion == 4:
4198
- fa, ba = self.get_dispersion_paramaters(betas, dispersion)
4199
- self.zi = self.my_lindley(eVd, ba)
4200
- proba_r = self._nonlog_nbin(y, eVd + self.zi, b_gam)
3799
+
3800
+
4201
3801
  # proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
4202
3802
 
4203
3803
  elif dispersion == 'poisson_lognormal':
@@ -4219,7 +3819,7 @@ class ObjectiveFunction(object):
4219
3819
  proba_p = self._prob_product_across_panels(
4220
3820
  proba_r, self.panel_info)
4221
3821
  proba_r = proba_p
4222
- proba_r = np.clip(proba_r, min_comp_val, None)
3822
+ proba_r = np.clip(proba_r, min_comp_val, max_comp_val)
4223
3823
  loglik = np.log(proba_r)
4224
3824
  return loglik
4225
3825
 
@@ -4267,7 +3867,7 @@ class ObjectiveFunction(object):
4267
3867
  # if (len(betas) -Kf-Kr-self.is_dispersion(dispersion)) != (Kchol + Kr):
4268
3868
 
4269
3869
  # gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros((N, Kr)), np.zeros((N, len(betas) -Kf-Kr-self.is_dispersion(dispersion))) #FIX
4270
- Kf2, Kr, Kc, Kr_b, Kchol, Kh, zi_terms = self.get_num_params()
3870
+ Kf2, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
4271
3871
 
4272
3872
  gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
4273
3873
  (N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
@@ -4521,9 +4121,9 @@ class ObjectiveFunction(object):
4521
4121
 
4522
4122
  elif dispersion == 1:
4523
4123
 
4524
- der = -self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4124
+ der = self.NB_Score(betas, y, eVd, Xd, 0, obs_specific)
4525
4125
  if both:
4526
- grad_n = -self.NB_Score(betas, y, eVd, Xd, 0, True)
4126
+ grad_n = self.NB_Score(betas, y, eVd, Xd, 0, True)
4527
4127
  return np.nan_to_num(der, nan=200, posinf=200, neginf=-200), np.nan_to_num(grad_n, nan=140, posinf=140,
4528
4128
  neginf=-140)
4529
4129
 
@@ -4716,8 +4316,10 @@ class ObjectiveFunction(object):
4716
4316
  return self._loglik_gradient(self, betas, *stuff)
4717
4317
 
4718
4318
  def get_br_and_bstd(betas, self):
4719
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
4720
- br = betas[Kf:Kf + Kr]
4319
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4320
+ Kr = Kr_a + Kr_c #todo check if this works
4321
+ print('check if this works')
4322
+ br = betas[Kf_a:Kf_a + Kr]
4721
4323
  # Calculate the size of the br matrix
4722
4324
  br_size = int((1 + np.sqrt(1 + 8 * Kr_b_a)) / 2)
4723
4325
 
@@ -4728,7 +4330,7 @@ class ObjectiveFunction(object):
4728
4330
  index = 0
4729
4331
  for i in range(br_size):
4730
4332
  for j in range(i, br_size):
4731
- br_std[j, i] = betas[Kf + Kr + index]
4333
+ br_std[j, i] = betas[Kf_a + Kr + index]
4732
4334
  index += 1
4733
4335
 
4734
4336
  brstd = br_std
@@ -4767,7 +4369,7 @@ class ObjectiveFunction(object):
4767
4369
  self.n_obs = len(y) # feeds into gradient
4768
4370
  if draws is None and draws_grouped is None and (
4769
4371
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4770
-
4372
+ #TODO do i shuffle the draws
4771
4373
  if type(Xd) == dict:
4772
4374
  N, Kf, P = 0, 0, 0
4773
4375
  for key in Xd:
@@ -4775,13 +4377,13 @@ class ObjectiveFunction(object):
4775
4377
  P += Xd[key].shape[1]
4776
4378
  Kf += Xd[key].shape[2]
4777
4379
  else:
4778
- self.naming_for_printing(betas, 1, dispersion, zi_fit=zi_list, model_nature=model_nature)
4380
+ self.naming_for_printing(betas, 1, dispersion, model_nature=model_nature)
4779
4381
  N, P, Kf = Xd.shape[0], Xd.shape[1], Xd.shape[2]
4780
4382
  betas = np.array(betas)
4781
4383
  Bf = betas[0:Kf] # Fixed betas
4782
4384
 
4783
4385
  main_disper, lindley_disp = self.get_dispersion_paramaters(
4784
- betas, dispersion)
4386
+ betas, dispersion) #todo fix this up
4785
4387
  if lindley_disp is not None:
4786
4388
  if lindley_disp <= 0:
4787
4389
  penalty += 1
@@ -4805,32 +4407,16 @@ class ObjectiveFunction(object):
4805
4407
  llf_main = self.loglik_obs(
4806
4408
  y, eVd, dispersion, main_disper, lindley_disp, betas)
4807
4409
 
4808
- # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4410
+ llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4809
4411
 
4810
4412
  loglik = llf_main.sum()
4811
- if 'exog_infl' in model_nature:
4812
- params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
4813
- params_main = Bf
4814
- # ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
4815
- # exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
4816
- exog_infl = model_nature.get('exog_inflX')
4817
- llf_main = llf_main # TODO test this
4818
- w = self.predict_logit_part(params_infl, exog_infl)
4819
-
4820
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4821
-
4822
- zero_idx = np.nonzero(y == 0)[0]
4823
- nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
4824
4413
 
4825
- llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
4826
- llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4827
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4828
- loglik = llf.sum()
4829
4414
 
4830
4415
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4831
4416
  if self.power_up_ll:
4832
4417
 
4833
4418
  loglik += 2*loglik
4419
+ print('am i powering up')
4834
4420
  penalty = self.regularise_l2(betas)
4835
4421
 
4836
4422
  if not np.isreal(loglik):
@@ -4851,7 +4437,7 @@ class ObjectiveFunction(object):
4851
4437
  else:
4852
4438
  return -loglik + penalty
4853
4439
  # Else, we have draws
4854
- self.n_obs = len(y) * self.Ndraws
4440
+ self.n_obs = len(y) * self.Ndraws #todo is this problematic
4855
4441
  penalty += self._penalty_betas(
4856
4442
  betas, dispersion, penalty, float(len(y) / 10.0))
4857
4443
 
@@ -4860,7 +4446,7 @@ class ObjectiveFunction(object):
4860
4446
  # Kf =0
4861
4447
  betas = np.array(betas)
4862
4448
  betas = dev.to_gpu(betas) # TODO fix mepotnetially problem
4863
- self.naming_for_printing(betas, 0, dispersion, zi_fit=zi_list, model_nature=model_nature)
4449
+ self.naming_for_printing(betas, 0, dispersion, model_nature=model_nature)
4864
4450
  y = dev.to_gpu(y)
4865
4451
  if draws is not None and draws_grouped is not None:
4866
4452
  draws = np.concatenate((draws_grouped, draws), axis=1)
@@ -4908,7 +4494,7 @@ class ObjectiveFunction(object):
4908
4494
  # if (Kchol +Kr) != (len(betas) -Kf-Kr -self.is_dispersion(dispersion)):
4909
4495
  # print('I think this is fine')
4910
4496
  n_coeff = self.get_param_num(dispersion)
4911
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
4497
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4912
4498
  if Kchol_a != Kchol:
4913
4499
  print('hold')
4914
4500
 
@@ -4949,11 +4535,11 @@ class ObjectiveFunction(object):
4949
4535
  # brstd), draws_) # Get random coefficients, old method
4950
4536
  Br = self._transform_rand_betas(br,
4951
4537
  brstd, draws_) # Get random coefficients
4952
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4538
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4953
4539
  self.Br = Br.copy()
4954
4540
 
4955
4541
  else:
4956
- self.naming_for_printing(betas, dispersion=dispersion, zi_fit=zi_list, model_nature=model_nature)
4542
+ self.naming_for_printing(betas, dispersion=dispersion, model_nature=model_nature)
4957
4543
  chol_mat = self._chol_mat(
4958
4544
  len(self.rdm_cor_fit), br, brstd, self.rdm_cor_fit)
4959
4545
  self.chol_mat = chol_mat.copy()
@@ -5378,12 +4964,16 @@ class ObjectiveFunction(object):
5378
4964
  return H
5379
4965
 
5380
4966
  def _minimize(self, loglik_fn, x, args, method, tol, options, bounds=None, hess_calc=None):
5381
-
4967
+ #method = 'BFGS'
5382
4968
  if method == "BFGS":
5383
4969
 
5384
4970
  try:
4971
+ argbs = list(args)
5385
4972
 
5386
- return self._bfgs(loglik_fn, x, args=args, tol=tol, **options) # @IgnoreException
4973
+ argbs[7] = True
4974
+ argsb = tuple(argbs)
4975
+ a = self._bfgs(loglik_fn, x, args=argsb, tol=tol, **options)
4976
+ return self._bfgs(loglik_fn, x, args=args, tol=tol, **options)
5387
4977
 
5388
4978
  except:
5389
4979
  return minimize(loglik_fn, x, args=args, jac=args[6], method='BFGS', tol=tol, options=options)
@@ -5689,9 +5279,9 @@ class ObjectiveFunction(object):
5689
5279
  betas_est - array. Coefficients which maximize the negative log-liklihood.
5690
5280
  """
5691
5281
  # Set defualt method
5692
- sub_zi = None
5693
- exog_infl = None if 'exog_infl' not in mod else mod['exog_infl']
5694
- inf_betas = 0 if exog_infl is None else len(exog_infl)
5282
+ #TODO, the inital fit worked but it throws
5283
+
5284
+
5695
5285
 
5696
5286
  sol = Solution()
5697
5287
  log_ll = 10.0 ** 9
@@ -5706,10 +5296,7 @@ class ObjectiveFunction(object):
5706
5296
  if self.hess_yes == False:
5707
5297
  method2 = 'BFGS_2'
5708
5298
  method2 = self.method_ll
5709
- # method2 = 'BFGS_2'
5710
5299
 
5711
- # method2 = 'BFGS_2'
5712
- # method2 = 'dogleg'
5713
5300
  bic = None
5714
5301
  pvalue_alt = None
5715
5302
  zvalues = None
@@ -5727,7 +5314,7 @@ class ObjectiveFunction(object):
5727
5314
 
5728
5315
  dispersion_param_num = self.is_dispersion(dispersion)
5729
5316
 
5730
- paramNum = self.get_param_num(dispersion)
5317
+ #paramNum = self.get_param_num(dispersion)
5731
5318
  self.no_random_paramaters = 0
5732
5319
  if 'XG' in mod:
5733
5320
  XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
@@ -5753,7 +5340,7 @@ class ObjectiveFunction(object):
5753
5340
  XX_test = mod.get('Xr_test')
5754
5341
 
5755
5342
  bb = np.random.uniform(
5756
- -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num + inf_betas)
5343
+ -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
5757
5344
 
5758
5345
  if method == 'L-BFGS-B':
5759
5346
  if dispersion == 0:
@@ -5787,10 +5374,12 @@ class ObjectiveFunction(object):
5787
5374
  hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5788
5375
  initial_beta = self._minimize(self._loglik_gradient, bb,
5789
5376
  args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5790
- dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None,
5377
+ dispersion, 0, False, 0, None, None, None, None, None,
5791
5378
  mod),
5792
5379
  method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5793
5380
  bounds=bounds)
5381
+ print(1)
5382
+
5794
5383
 
5795
5384
  if method2 == 'L-BFGS-B':
5796
5385
  if hasattr(initial_beta.hess_inv, 'todense'):
@@ -5803,7 +5392,7 @@ class ObjectiveFunction(object):
5803
5392
  if initial_beta is not None and np.isnan(initial_beta['fun']):
5804
5393
  initial_beta = self._minimize(self._loglik_gradient, bb,
5805
5394
  args=(XX, y, None, None, None, None, True, True, dispersion,
5806
- 0, False, 0, None, sub_zi, exog_infl, None, None, mod),
5395
+ 0, False, 0, None, None, None, None, None, mod),
5807
5396
  method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
5808
5397
 
5809
5398
  if initial_beta is not None and not np.isnan(initial_beta['fun']):
@@ -5827,24 +5416,24 @@ class ObjectiveFunction(object):
5827
5416
  loglik=log_ll_fixed, num_parm=paramNum, GOF=other_measures)
5828
5417
 
5829
5418
  self.naming_for_printing(
5830
- initial_beta['x'], 1, dispersion, zi_fit=sub_zi, model_nature=mod)
5419
+ initial_beta['x'], 1, dispersion, model_nature=mod)
5831
5420
 
5832
5421
  if self.is_multi:
5833
5422
  in_sample_mae = self.validation(
5834
5423
  initial_beta['x'], mod.get('y'), mod.get('X'), dispersion=dispersion,
5835
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5424
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5836
5425
  testing=0)
5837
5426
 
5838
5427
  sol.add_objective(TRAIN=in_sample_mae)
5839
5428
  MAE_out = self.validation(
5840
5429
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5841
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0)
5430
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0)
5842
5431
  sol.add_objective(TEST=MAE_out)
5843
5432
 
5844
5433
  if self.val_percentage >0:
5845
5434
  MAE_VAL = self.validation(
5846
5435
  initial_beta['x'], mod.get('y_test'), mod.get('X_test'), dispersion=dispersion,
5847
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None, model_nature=mod, halton=0,
5436
+ rdm_cor_fit=self.rdm_cor_fit, exog_infl=None, model_nature=mod, halton=0,
5848
5437
  validation=1)
5849
5438
  sol.add_objective(VAL=MAE_VAL)
5850
5439
  if sol[self._obj_1] <= self.best_obj_1:
@@ -5905,7 +5494,7 @@ class ObjectiveFunction(object):
5905
5494
  b[-1] = .5
5906
5495
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
5907
5496
 
5908
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
5497
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh= self.get_num_params()
5909
5498
  if Kh > 0:
5910
5499
  Kh_e = mod.get('XH').shape[-1]
5911
5500
  Kh_range = Kh - Kh_e
@@ -5949,9 +5538,6 @@ class ObjectiveFunction(object):
5949
5538
 
5950
5539
  bounds.append((np.random.uniform(0.05, .15), bob2[count] + 7))
5951
5540
  count += 1
5952
-
5953
-
5954
-
5955
5541
  elif ii < jj:
5956
5542
  if bob2[count] > 0:
5957
5543
 
@@ -6024,14 +5610,14 @@ class ObjectiveFunction(object):
6024
5610
  mod['dispersion_penalty'] = np.abs(b[-1])
6025
5611
  grad_args = (
6026
5612
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
6027
- self.zi_fit, exog_infl, draws_grouped, XG, mod)
5613
+ None, None, draws_grouped, XG, mod)
6028
5614
  # self.gradients_est_yes = (1, 1)
6029
5615
 
6030
5616
  if draws is None and draws_hetro is not None:
6031
5617
  print('hold')
6032
5618
  betas_est = self._minimize(self._loglik_gradient, b, args=(
6033
5619
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
6034
- self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
5620
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod),
6035
5621
  method=method2, tol=tol['ftol'],
6036
5622
  options={'gtol': tol['gtol']}, bounds=bounds,
6037
5623
  hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
@@ -6050,7 +5636,7 @@ class ObjectiveFunction(object):
6050
5636
  betas_est = self._minimize(self._loglik_gradient, b, args=(
6051
5637
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
6052
5638
  self.rdm_cor_fit,
6053
- self.zi_fit, exog_infl, draws_grouped, XG, mod),
5639
+ None, None, draws_grouped, XG, mod),
6054
5640
  method=method2, tol=tol['ftol'],
6055
5641
  options={'gtol': tol['gtol']})
6056
5642
 
@@ -6059,7 +5645,7 @@ class ObjectiveFunction(object):
6059
5645
 
6060
5646
  if np.isfinite(betas_est['fun']):
6061
5647
  self.naming_for_printing(
6062
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5648
+ betas_est['x'], 0, dispersion, model_nature=mod)
6063
5649
 
6064
5650
  if method2 == 'L-BFGS-B':
6065
5651
 
@@ -6086,7 +5672,7 @@ class ObjectiveFunction(object):
6086
5672
 
6087
5673
  paramNum = len(betas_est['x'])
6088
5674
  self.naming_for_printing(
6089
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5675
+ betas_est['x'], 0, dispersion, model_nature=mod)
6090
5676
 
6091
5677
  sol.add_objective(bic=bic, aic=aic,
6092
5678
  loglik=log_ll, num_parm=paramNum, GOF=other_measures)
@@ -6096,19 +5682,19 @@ class ObjectiveFunction(object):
6096
5682
  try:
6097
5683
 
6098
5684
  in_sample_mae = self.validation(betas_est['x'], y, X, Xr, dispersion=dispersion,
6099
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5685
+ rdm_cor_fit=self.rdm_cor_fit,
6100
5686
  model_nature=mod, testing=0)
6101
5687
  sol.add_objective(TRAIN=in_sample_mae)
6102
5688
  y_test, X_test, Xr_test = mod.get('y_test'), mod.get('X_test'), mod.get('Xr_test')
6103
5689
  Xr_grouped_test = mod.get('Xrtest')
6104
5690
  MAE_test = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
6105
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5691
+ rdm_cor_fit=self.rdm_cor_fit,
6106
5692
  model_nature=mod)
6107
5693
 
6108
5694
  sol.add_objective(TEST=MAE_test)
6109
- if self.val_percentage >0:
5695
+ if self.val_percentage > 0:
6110
5696
  MAE_val = self.validation(betas_est['x'], y_test, X_test, Xr_test, dispersion=dispersion,
6111
- rdm_cor_fit=self.rdm_cor_fit, zi_list=sub_zi, exog_infl=None,
5697
+ rdm_cor_fit=self.rdm_cor_fit,
6112
5698
  model_nature=mod, validation=1)
6113
5699
  sol.add_objective(VAL=MAE_val)
6114
5700
 
@@ -6226,8 +5812,7 @@ class ObjectiveFunction(object):
6226
5812
  self.rdm_cor_fit = [x for x, y in zip(
6227
5813
  select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
6228
5814
 
6229
- # if self.zi_force:
6230
- # self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
5815
+
6231
5816
  # if alpha_grouped is not None:
6232
5817
  self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
6233
5818
  self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
@@ -6405,14 +5990,14 @@ class ObjectiveFunction(object):
6405
5990
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
6406
5991
  t, idx, df_test[:, :, idx])
6407
5992
  if np.max(df_tf[:, :, idx]) >= 77000:
5993
+
6408
5994
  raise Exception('should not be possible')
6409
5995
 
6410
5996
  self.define_selfs_fixed_rdm_cor(model_nature)
6411
5997
  indices = self.get_named_indices(self.fixed_fit)
6412
5998
  indices5 = self.get_named_indices(self.hetro_fit)
6413
5999
 
6414
- if self.zi_force:
6415
- model_nature['exog_inflX'] = df_tf[:, :, self.get_named_indices(self.zi_force_names)]
6000
+
6416
6001
 
6417
6002
  x_h_storage = []
6418
6003
  x_h_storage_test = []
@@ -6445,7 +6030,7 @@ class ObjectiveFunction(object):
6445
6030
  if XG is not None:
6446
6031
  indices4_test = np.repeat(self.get_named_indices(self.grouped_rpm),
6447
6032
  self.group_dummies_test.shape[2]) if self.grouped_rpm != [] else []
6448
- XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4]
6033
+ XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4_test]
6449
6034
  model_nature['XG'] = XG
6450
6035
  model_nature['XGtest'] = XGtest
6451
6036
 
@@ -6488,7 +6073,8 @@ class ObjectiveFunction(object):
6488
6073
  Xr_test = None
6489
6074
  model_nature['Xr_test'] = Xr_test
6490
6075
  if (Xr.ndim <= 1) or (Xr.shape[0] <= 11) or np.isin(Xr, [np.inf, -np.inf, None, np.nan]).any():
6491
- print('what the actual fuck')
6076
+ print('Not Possible')
6077
+ raise Exception
6492
6078
  if Xr.size == 0:
6493
6079
  Xr = None
6494
6080
  Xr_test = None
@@ -6509,10 +6095,10 @@ class ObjectiveFunction(object):
6509
6095
  obj_1.add_layout(layout)
6510
6096
 
6511
6097
  model_form_name = self.check_complexity(
6512
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, self.zi_fit, dispersion, is_halton, model_nature)
6098
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, None, dispersion, is_halton, model_nature)
6513
6099
 
6514
6100
  obj_1.add_names(self.fixed_fit.copy(), self.rdm_fit.copy(),
6515
- self.rdm_cor_fit.copy(), model_form_name, self.zi_fit, pvalues)
6101
+ self.rdm_cor_fit.copy(), model_form_name, None, pvalues)
6516
6102
  if not isinstance(obj_1, dict):
6517
6103
  raise Exception('should not be possible')
6518
6104
 
@@ -6540,22 +6126,22 @@ class ObjectiveFunction(object):
6540
6126
  else:
6541
6127
  obj_1 = Solution()
6542
6128
  self.significant = 3
6543
- print('not_implented yet')
6129
+ print('not_implemented yet') #TODO check this for exciddeing values
6544
6130
 
6545
6131
  if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
6546
6132
  self.bic = obj_1['bic']
6547
6133
  self.pvalues = pvalues
6548
- if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", "zi", 'grp', 'xh']):
6134
+ if any(sub_string in obj_1['simple'] for sub_string in ["rp", "c", 'grp', 'xh']):
6549
6135
  # todo: probably delete
6550
6136
  self.naming_for_printing(
6551
- pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'], obj_1['zi_fit'],
6137
+ pvalues, 0, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6552
6138
  obj_1, model_nature)
6553
6139
  else:
6554
6140
  if is_delete == 0:
6555
6141
  # todo: probably delete
6556
6142
  self.naming_for_printing(
6557
6143
  pvalues, 1, dispersion, obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'],
6558
- obj_1['zi_fit'], obj_1, model_nature)
6144
+ obj_1, model_nature)
6559
6145
  self.coeff_ = betas
6560
6146
  self.stderr = stderr
6561
6147
  self.zvalues = zvalues
@@ -6563,8 +6149,9 @@ class ObjectiveFunction(object):
6563
6149
  if self.significant == 0:
6564
6150
 
6565
6151
  print(self.full_model, 'full model is')
6566
- alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6567
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6152
+ if not self.test_flag:
6153
+ alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6154
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6568
6155
 
6569
6156
  return obj_1, model_nature
6570
6157
 
@@ -6581,8 +6168,9 @@ class ObjectiveFunction(object):
6581
6168
  self.significant = 3
6582
6169
 
6583
6170
  return obj_1, model_nature
6584
- alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6585
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6171
+ if not self.test_flag:
6172
+ alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6173
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6586
6174
  if self.grab_transforms:
6587
6175
 
6588
6176
  if is_halton and self.significant == 1:
@@ -6692,7 +6280,7 @@ class ObjectiveFunction(object):
6692
6280
  alpha_cor_rdm = np.in1d(select_data, cor_rdm) * 1
6693
6281
  alpha_cor_rdm = alpha_cor_rdm.tolist()
6694
6282
  alpha_group_rdm = np.in1d(select_data, group_rdm) * 1
6695
- alpha_group_rdm = alpha_group_rdm.tolist()
6283
+ alpha_group_rdm = alpha_group_rdm.tolist() #todo will this ever trigger
6696
6284
  return alpha, alpha_rdm, alpha_cor_rdm
6697
6285
 
6698
6286
  def show_transforms(self, fix, rdm):
@@ -6825,9 +6413,10 @@ class ObjectiveFunction(object):
6825
6413
 
6826
6414
  Br_come_one = self.Br.copy()
6827
6415
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6828
-
6416
+ #todo make sure this works for ln and truncated normal
6829
6417
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6830
- Br_come_one = self._transform_rand_betas()
6418
+ print('check this, intesection shouldn not happen for all')
6419
+ Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6831
6420
  for k, dist_k in enumerate(distribution):
6832
6421
  if dist_k == 'ln_normal':
6833
6422
  der[:, k, :] = Br_come_one[:, k, :]
@@ -6837,9 +6426,7 @@ class ObjectiveFunction(object):
6837
6426
  return der
6838
6427
 
6839
6428
  def _copy_size_display_as_ones(self, matrix):
6840
- # grab the shape to copy
6841
- please = matrix.shape
6842
- der = dev.np.ones((please), dtype=matrix.dtype)
6429
+ der = dev.np.ones(matrix.shape, dtype=matrix.dtype)
6843
6430
  return der
6844
6431
 
6845
6432
  def prepare_halton(self, dim, n_sample, draws, distribution, long=False, slice_this_way=None):