metacountregressor 0.1.69__py3-none-any.whl → 0.1.78__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,12 +9,10 @@ import math
9
9
  import os
10
10
  import random
11
11
  import sys
12
- import time
13
12
  import warnings
14
13
  from collections import Counter
15
14
  from functools import wraps
16
15
 
17
- from tempfile import TemporaryFile
18
16
  import traceback
19
17
  import latextable
20
18
  import numpy as np
@@ -35,15 +33,22 @@ from statsmodels.tools.numdiff import approx_fprime, approx_hess
35
33
  from sklearn.preprocessing import StandardScaler
36
34
  from texttable import Texttable
37
35
 
38
- from ._device_cust import device as dev
39
- from .pareto_file import Pareto, Solution
36
+ try:
37
+ from ._device_cust import device as dev
38
+ from .pareto_file import Pareto, Solution
39
+ from .data_split_helper import DataProcessor
40
+ except ImportError:
41
+ from metacountregressor._device_cust import device as dev
42
+ from metacountregressor.pareto_file import Pareto, Solution
43
+ from data_split_helper import DataProcessor
44
+
40
45
 
41
46
  np.seterr(divide='ignore', invalid='ignore')
42
47
  warnings.simplefilter("ignore")
43
48
 
44
- # defube the computation boundary limits
45
- min_comp_val = 1e-200
46
- max_comp_val = 1e+300
49
+ # define the computation boundary limits
50
+ min_comp_val = 1e-20
51
+ max_comp_val = 1e+200
47
52
  log_lik_min = -1e+200
48
53
  log_lik_max = 1e+200
49
54
 
@@ -117,21 +122,19 @@ class ObjectiveFunction(object):
117
122
 
118
123
  def __init__(self, x_data, y_data, **kwargs):
119
124
 
120
- self.reg_penalty = .5
125
+ self.reg_penalty = 1
121
126
  self.power_up_ll = False
122
127
  self.bic = None
123
128
  self.other_bic = False
129
+ self.test_flag = 1
124
130
  if self.other_bic:
125
131
  print('change this to false latter ')
126
- offset = None
127
132
 
128
- # initi
133
+ # initialize values
129
134
  self.constant_value = -5.5
130
135
  self.negative_binomial_value = 0.05
131
136
 
132
137
  self.verbose_safe = True
133
- self.zi_force = None # Analst want a zi model and formally declares the zi components below
134
- self.zi_force_names = None # delare the zi components
135
138
  self.please_print = kwargs.get('please_print', 0)
136
139
  self.group_halton = None
137
140
  self.grad_yes = False
@@ -145,7 +148,7 @@ class ObjectiveFunction(object):
145
148
  self.rdm_fit = None
146
149
  self.rdm_cor_fit = None
147
150
  self.dist_fit = None
148
- self.zi_fit = None
151
+
149
152
  self.MAE = None
150
153
  self.best_obj_1 = 100000000.0
151
154
  self._obj_1 = 'bic'
@@ -158,7 +161,7 @@ class ObjectiveFunction(object):
158
161
  self._max_iterations_improvement = 100
159
162
  self.generated_sln = set()
160
163
  self.ave_mae = 0
161
- # defualt paraamaters for hs
164
+ # defalt paramaters for hs #TODO unpack into harmony search class
162
165
  self.algorithm = 'hs' # 'sa' 'de' also avialable
163
166
  self._hms = 20
164
167
  self._max_time = 60 * 60 * 24
@@ -166,7 +169,7 @@ class ObjectiveFunction(object):
166
169
  self._par = 0.3
167
170
  self._mpai = 1
168
171
  self._max_imp = 100000
169
- self._WIC = 1000 # Number of ITerations without Multiobjective Improvement
172
+ self._WIC = 1000 # Number of Iterations without Multiobjective Improvement
170
173
  self._panels = None
171
174
  self.is_multi = True
172
175
  self.method_ll = 'Nelder-Mead-BFGS'
@@ -190,11 +193,6 @@ class ObjectiveFunction(object):
190
193
  if k in acceptable_keys_list:
191
194
  self.__setattr__(k, self.tryeval(kwargs[k]))
192
195
 
193
- if self.zi_force_names is not None:
194
- self.zi_force = True
195
- if 'const' not in self.zi_force_names:
196
- self.zi_force_names = ['const'] + self.zi_force_names
197
- print('did this work?')
198
196
 
199
197
  if 'complexity_level' in kwargs:
200
198
  self.complexity_level = kwargs['complexity_level']
@@ -211,17 +209,22 @@ class ObjectiveFunction(object):
211
209
  raise Exception
212
210
 
213
211
  self.pvalue_penalty = float(kwargs.get('pvalue_penalty', 0.5))
214
-
215
212
  self.pvalue_exceed = 0
216
213
  self._maximize = False # do we maximize or minimize?
217
- # self._random_forest_identify_transformations(x_data, y_data)
218
- # data_names = self._random_forest_preprocess(x_data, y_data)
214
+
219
215
  x_data = sm.add_constant(x_data)
220
216
  self._input_data(x_data, y_data)
217
+
218
+
221
219
  if y_data.ndim == 1:
222
220
  y_data = pd.DataFrame(y_data)
223
221
 
224
- # split the data for testing
222
+ '''
223
+ #TODO ADD THIS IN LATER
224
+ splitter = DataProcessor(x_data, y_data, kwargs)
225
+ self.copy_class_attributes(splitter) #inherit the self objects
226
+ '''
227
+
225
228
  if self._obj_1 == 'MAE' or self._obj_2 in ["MAE", 'RMSE', 'MAE', 'MSE', 'RMSE_IN', 'RMSE_TEST']:
226
229
  self.test_percentage = float(kwargs.get('test_percentage', 0))
227
230
  self.val_percentage = float(kwargs.get('val_percentage', 0))
@@ -229,8 +232,7 @@ class ObjectiveFunction(object):
229
232
  self.is_multi = False
230
233
 
231
234
  if 'panels' in kwargs:
232
- self.group_names = np.asarray(x_data[kwargs['group']].astype(
233
- 'category').cat._parent.dtype.categories)
235
+ self.group_names = np.asarray(x_data[kwargs['group']].astype('category').cat._parent.dtype.categories)
234
236
 
235
237
  x_data[kwargs['group']] = x_data[kwargs['group']].astype(
236
238
  'category').cat.codes
@@ -243,58 +245,39 @@ class ObjectiveFunction(object):
243
245
 
244
246
  N = len(np.unique(x_data[kwargs['panels']].values))
245
247
  id_unique = np.unique(x_data[kwargs['panels']].values)
246
-
247
248
  except KeyError:
248
249
  N = len(np.unique(x_data[kwargs['panels']]))
250
+ id_unique = np.unique(x_data[kwargs['panels']].values)
249
251
 
250
252
  training_size = int((1 - self.test_percentage - self.val_percentage) * N)
251
253
  ids = np.random.choice(N, training_size, replace=False)
252
254
  ids = id_unique[ids]
253
255
  train_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val in ids]
254
256
  test_idx = [ii for ii, id_val in enumerate(x_data[kwargs['panels']]) if id_val not in ids]
255
-
256
257
  df_train = x_data.loc[train_idx, :]
257
258
  df_test = x_data.loc[test_idx, :]
258
259
  y_train = y_data.loc[train_idx, :]
259
260
  y_test = y_data.loc[test_idx, :]
260
-
261
261
  else:
262
262
  N = len(x_data)
263
263
  training_size = int((1 - self.test_percentage - self.val_percentage) * N)
264
264
  ids = np.random.choice(N, training_size, replace=False)
265
265
  id_unique = np.array([i for i in range(N)])
266
266
  ids = id_unique[ids]
267
-
268
267
  train_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] in ids]
269
268
  test_idx = [ii for ii in range(len(id_unique)) if id_unique[ii] not in ids]
269
+ df_train = x_data.loc[train_idx, :]
270
+ df_test = x_data.loc[test_idx, :]
271
+ y_train = y_data.loc[train_idx, :]
272
+ y_test = y_data.loc[test_idx, :]
270
273
 
271
- try: # @IgnoreException
272
- df_train = x_data.loc[train_idx, :]
273
- df_test = x_data.loc[test_idx, :]
274
- y_train = y_data.loc[train_idx, :]
275
- y_test = y_data.loc[test_idx, :]
276
- except:
277
- # Convert all values to their real parts
278
- df_real = x_data.select_dtypes(include=[np.number]).apply(np.real)
279
-
280
- # Replace the original DataFrame's numerical columns with real-valued ones
281
- x_data[df_real.columns] = df_real
282
-
283
- df_train = x_data.iloc[train_idx, :]
284
- df_test = x_data.iloc[test_idx, :]
285
- y_train = y_data.iloc[train_idx, :]
286
- y_test = y_data.iloc[test_idx, :]
287
274
 
288
- self.n_obs = N
275
+ #self.n_obs = N
289
276
  self._characteristics_names = list(self._x_data.columns)
290
- if self.zi_force:
291
- self.alpha_hurdle = np.isin(self._characteristics_names,
292
- [item.split(':')[0] for item in self.zi_force_names]).astype(int).tolist()
293
-
294
277
  self._max_group_all_means = 1
295
278
 
296
279
  exclude_this_test = [4]
297
-
280
+
298
281
  if 'panels' in kwargs:
299
282
  self.panels = np.asarray(df_train[kwargs['panels']])
300
283
  self.panels_test = np.asarray(df_test[kwargs['panels']])
@@ -309,7 +292,6 @@ class ObjectiveFunction(object):
309
292
  X, Y, panel, group = self._arrange_long_format(
310
293
  df_train, y_train, self.ids, self.ids, groupll)
311
294
  self.group_halton = group.copy()
312
- Y = Y.astype('float')
313
295
  self.group_dummies = pd.get_dummies(group)
314
296
  Xnew, Ynew, panel_info = self._balance_panels(X, Y, panel)
315
297
  self.panel_info = panel_info
@@ -324,7 +306,6 @@ class ObjectiveFunction(object):
324
306
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
325
307
  self._x_data = XX.copy()
326
308
  self._y_data = YY.copy()
327
- # Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
328
309
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.panels_test, group_test)
329
310
  if np.max(group) > 50:
330
311
  exclude_this_test = [4]
@@ -353,7 +334,7 @@ class ObjectiveFunction(object):
353
334
 
354
335
  self._samples, self._panels, self._characteristics = self._x_data.shape
355
336
 
356
- # i dont think i need this X, Y, group_info = self._balance_panels(XX, YY, group)
337
+
357
338
 
358
339
  else:
359
340
  self.G = None
@@ -372,77 +353,37 @@ class ObjectiveFunction(object):
372
353
  K = Xnew.shape[1]
373
354
  self._characteristics_names = list(Xnew.columns)
374
355
  XX = Xnew.values.reshape(self.N, self.P, K).copy()
375
- # self.group_dummies = self.group_dummies.values.reshape(self.N, self.P, -1)
376
- # self.group_halton = self.group_halton.reshape(self.N, self.P)[:, 0]
377
356
  YY = Ynew.values.reshape(self.N, self.P, 1).copy()
378
357
  self._x_data = XX.copy()
379
358
  self._y_data = YY.copy()
380
- # Xalt, Yalt, group_info = self._balance_panels(X, Y, group)
359
+
381
360
  if self.is_multi:
382
361
  X, Y, panel, group = self._arrange_long_format(df_test, y_test, self.ids_test, self.ids_test, None)
383
362
  if np.max(group) > 50:
384
363
  exclude_this_test = [4]
385
364
  else:
386
365
  exclude_this_test = []
387
- # self.group_halton_test = group.copy()
388
366
  X, Y, panel_info = self._balance_panels(X, Y, panel)
389
-
367
+
390
368
  self.N_test, self.P_test = panel_info.shape
391
-
392
- # self.group_dummies_test = pd.get_dummies(group)
393
- # self.group_dummies_test = self.group_dummies_test.values.reshape(self.N_test, self.P_test, -1)
394
369
  K = X.shape[1]
395
370
  self.columns_names = X.columns
396
371
  X = X.values.reshape(self.N_test, self.P_test, K)
397
- # self.group_halton_test = self.group_halton_test.reshape(self.N_test, self.P_test)[:, 0]
398
372
  Y = Y.values.reshape(self.N_test, self.P_test, 1)
399
373
  self._x_data_test = X.copy()
400
374
  self.y_data_test = Y.copy()
401
-
375
+
402
376
  self._samples, self._panels, self._characteristics = self._x_data.shape
403
377
 
404
- # draws and pvalue
405
-
406
- if 'Offset' in self._characteristics_names:
407
- offset = True
408
- self.have_offset = offset
409
- if self.have_offset is not None:
410
- try:
411
- # offset for training data
412
- # define offset
413
- val_od = self.get_named_indices(['Offset'])
414
- self._offsets = self._x_data[:, :, val_od]
415
-
416
- # drop the offset from the data
417
- self._x_data = np.delete(self._x_data, val_od, axis=2)
418
- self._characteristics_names = [x for x in self._characteristics_names if not 'Offset' in x]
419
- self._characteristics = len(self._characteristics_names)
420
- # self._x_data.drop(columns=['Offset'], inplace=True)
421
-
422
- # offset for testing data
423
- if self.is_multi:
424
- # define offset
425
- self._offsets_test = self._x_data_test[:, :, val_od]
426
- # self._offsets_test = self._x_data_test['Offset'].to_numpy()
427
- # self._offsets_test = np.reshape(
428
- # self._offsets_test, (-1, 1))
429
- # drop the offset from the data
430
- self._x_data_test = np.delete(self._x_data_test, val_od, axis=2)
431
- # self._x_data_test.drop(columns=['Offset'], inplace=True)
432
- except:
433
- # if no offset, set as 0
434
- self._offsets = np.zeros((self.N, self.P, 1))
435
- if self.is_multi:
436
- self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
437
- else:
438
- self._offsets = np.zeros((self.N, self.P, 1))
439
- if self.is_multi:
440
- self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
441
378
 
379
+ #Define the offset into the data
380
+ self.process_offset()
442
381
  if self.is_multi:
443
382
  self.pareto_printer = Pareto(self._obj_1, self._obj_2, True)
444
-
445
383
  self._pareto_population = list()
384
+
385
+
386
+
446
387
  self.Ndraws = 200 # todo: change back
447
388
  self.draws1 = None
448
389
  self.initial_sig = 1 # pass the test of a single model
@@ -480,8 +421,7 @@ class ObjectiveFunction(object):
480
421
  self.coeff_ = None
481
422
 
482
423
  self.significant = 0
483
- # define the states of our explanaotory variables
484
-
424
+ # define the states of our explanatory variables
485
425
  self._discrete_values = self.define_alphas(self.complexity_level, exclude_this_test,
486
426
  kwargs.get('must_include', []))
487
427
  self._discrete_values = self._discrete_values + \
@@ -506,21 +446,83 @@ class ObjectiveFunction(object):
506
446
  self.endog = None
507
447
  # solution parameters
508
448
  self._min_characteristics = 1
509
-
510
449
  self._max_hurdle = 4
511
450
 
512
- if "Manual_Fit" in kwargs and kwargs['Manual_Fit'] is not None:
451
+ #Manually fit from analyst specification
452
+ manual_fit = kwargs.get('Manual_Fit')
453
+ if manual_fit is not None:
454
+ self.process_manual_fit(manual_fit)
455
+
456
+ self.solution_analyst = None
513
457
 
514
- self.initial_sig = 1 # pass the test of a single model
515
- self.pvalue_sig_value = 1
516
- # embed the solution to how you want it
517
- self.set_defined_seed(42)
518
- a = self.modify_initial_fit(kwargs['Manual_Fit'])
519
- self.makeRegression(a)
520
458
 
521
459
 
522
- find_constant = 0
523
- hard_code = 0
460
+
461
+ def over_ride_self(self, **kwargs):
462
+ """
463
+ Dynamically sets attributes on the instance based on the provided keyword arguments.
464
+ """
465
+ for key, value in kwargs.items():
466
+ setattr(self, key, value)
467
+ print(f"Updated attributes: {kwargs}")
468
+
469
+ def remove_offset(self, data, indices):
470
+ """ Remove offset data from the dataset """
471
+ new_data = np.delete(data, indices, axis=2)
472
+ return new_data
473
+
474
+ def process_offset(self):
475
+ """ Process offset if it exists in the characteristics """
476
+ try:
477
+ if 'Offset' in self._characteristics_names:
478
+ self.have_offset = True
479
+ val_od = self.get_named_indices(['Offset'])
480
+ self._offsets = self._x_data[:, :, val_od]
481
+ self._x_data = self.remove_offset(self._x_data, val_od)
482
+ self._characteristics_names = [x for x in self._characteristics_names if x != 'Offset']
483
+ self._characteristics = len(self._characteristics_names)
484
+
485
+ if self.is_multi:
486
+ self._offsets_test = self._x_data_test[:, :, val_od]
487
+ self._x_data_test = self.remove_offset(self._x_data_test, val_od)
488
+ else:
489
+ self.initialize_empty_offsets()
490
+
491
+ except Exception as e:
492
+ print(f"An error occurred: {e}") # Better error handling
493
+ self.initialize_empty_offsets()
494
+
495
+ def initialize_empty_offsets(self):
496
+ """ Initialize offsets to zero if none are found or on error """
497
+ self._offsets = np.zeros((self.N, self.P, 1))
498
+ if self.is_multi:
499
+ self._offsets_test = np.zeros((self.N_test, self.P_test, 1))
500
+
501
+
502
+ def copy_class_attributes(self, class_object):
503
+ '''
504
+ Loop through an
505
+ '''
506
+
507
+ # Loop through all attributes of the car object and copy them
508
+ for attr in vars(class_object):
509
+ setattr(self, attr, getattr(class_object, attr))
510
+
511
+
512
+ def process_manual_fit(self, manual_fit):
513
+ """Process the manual fit configuration."""
514
+ self.initial_sig = 1 # Example: Initialize some signal
515
+ self.pvalue_sig_value = 1 # Example: Initialize another signal
516
+ self.set_defined_seed(42) # Set a specific seed
517
+
518
+ modified_fit = self.modify_initial_fit(manual_fit) # Modify the initial fit based on manual_fit
519
+ self.makeRegression(modified_fit) # Perform regression with the modified fit
520
+
521
+
522
+ def process_fit_specifications(self, find_constant, hard_code):
523
+ """
524
+ Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
525
+ """
524
526
  if hard_code:
525
527
  manual_fit_spec = {
526
528
  'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
@@ -559,7 +561,7 @@ class ObjectiveFunction(object):
559
561
  constant_values.append(self.beta_dict['const'][0][1])
560
562
  dispersion_values.append(self.beta_dict.get(self._model_type_codes[i], [[0, 0], [0, 0]])[0][1])
561
563
  except:
562
- print('d')
564
+ print('Error during regression analysis.')
563
565
  i += 1
564
566
 
565
567
  # Add the values of this iteration to the total
@@ -570,7 +572,7 @@ class ObjectiveFunction(object):
570
572
  constant_values_avg = [x / 100 for x in constant_values_total]
571
573
  dispersion_values_avg = [x / 100 for x in dispersion_values_total]
572
574
 
573
- self.solution_analyst = None
575
+ return constant_values_avg, dispersion_values_avg
574
576
 
575
577
 
576
578
  def _balance_panels(self, X, y, panels): # ToDO re
@@ -615,22 +617,7 @@ class ObjectiveFunction(object):
615
617
 
616
618
  return np.nan_to_num(quad(integrand, 0, np.inf)[0], nan=0)
617
619
 
618
- def _random_forest_call_r(self):
619
- import rpy2.rinterface as rinterface
620
- import rpy2.robjects as robjects
621
- import rpy2.robjects as ro
622
- from rpy2.robjects import pandas2ri
623
- r = robjects.r
624
- r['source']('testML.R')
625
- pandas2ri.activate()
626
- RF_function_r = robjects.globalenv['RF_plot']
627
- RF_function_corr_r = robjects.globalenv['RF_plot_corr']
628
- r_df = ro.conversion.py2rpy(self._x_data)
629
- y_dy = ro.conversion.py2rpy(self._y_data)
630
- RF_function_r(r_df, y_dy)
631
-
632
- print('did this work')
633
- RF_function_corr_r(r_df, y_dy)
620
+
634
621
 
635
622
  def print_system_utilization(self):
636
623
  # Get CPU usage
@@ -647,7 +634,8 @@ class ObjectiveFunction(object):
647
634
  mem_free = round(mem_info.available /
648
635
  (1024 * 1024), 2) # Convert to MB
649
636
  print(
650
- f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total)")
637
+ f"Current memory usage: {mem_percent}% ({mem_used} MB used / {mem_total} MB total / "
638
+ f" mem free {mem_free})")
651
639
 
652
640
  def _arrange_long_format(self, X, y, ids, panels=None, groups=None):
653
641
  '''converts the data to long format'''
@@ -665,59 +653,14 @@ class ObjectiveFunction(object):
665
653
  if group is not None:
666
654
  group = group[sorted_idx]
667
655
 
668
- return X, y, pnl, group
669
-
670
- pandas_sort = 1
671
- if pandas_sort:
672
- if ids is not None:
673
-
674
- pnl = panels if panels is not None else np.ones(len(ids))
675
- df = X
676
-
677
- df['panels'], df['ids'] = pnl, ids
678
- new = 0
679
- if new:
680
- cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
681
- 'formats': ['<f4', '<f4']})
682
- cols['panels'], cols['ids'] = pnl, ids
683
- sorted_idx = np.argsort(cols, order=['panels', 'ids'])
684
- X, y = X[sorted_idx], y[sorted_idx]
685
- if panels is not None:
686
- panels = panels[sorted_idx]
687
- return X, y, panels
688
-
689
- df = pd.concat([X.reset_index(drop=True),
690
- y.reset_index(drop=True)], axis=1)
691
- sorted_df = df.sort_values(
692
- ['panels', 'ids']).reset_index(drop=True)
693
-
694
- X, y, panels = sorted_df.iloc[:, :-
695
- 3], sorted_df.iloc[:, -3:-2], sorted_df.iloc[:, -2]
696
- if panels is not None:
697
- # panels = panels[sorted_idx]
698
- P_i = (
699
- (np.unique(panels, return_counts=True)[1])).astype(int)
700
- P = np.max(P_i)
701
- N = len(P_i)
702
- print(1)
703
- return X, y, panels
704
-
705
- if ids is not None:
706
- X = np.asarray(X)
707
- y = np.asarray(y)
708
- pnl = panels if panels is not None else np.ones(len(ids))
709
-
710
- cols = np.zeros(len(ids), dtype={'names': ['panels', 'ids'],
711
- 'formats': ['<f4', '<f4']})
712
- cols['panels'], cols['ids'] = pnl, ids
713
- sorted_idx = np.argsort(cols, order=['panels', 'ids'])
714
- X, y = X[sorted_idx], y[sorted_idx]
715
- if panels is not None:
716
- panels = panels[sorted_idx]
656
+ return X, y.astype('float'), pnl, group
717
657
 
718
- return X, y, panels
658
+ return X, y.astype('float'), panels
719
659
 
720
660
  def _random_forest_identify_transformations(self, x_data, y_data):
661
+ '''
662
+ use the random forrest model to identify best feature
663
+ '''
721
664
  # let's use the pprint module for readability
722
665
  import inspect
723
666
  from pprint import pprint
@@ -866,7 +809,6 @@ class ObjectiveFunction(object):
866
809
  def pvalue_asterix_add(self, pvalues):
867
810
  pvalue_ast = list()
868
811
  for i in range(len(pvalues)):
869
- signif = ""
870
812
  if float(pvalues[i]) < 0.001:
871
813
  signif = "***"
872
814
  elif float(pvalues[i]) < 0.01:
@@ -911,8 +853,7 @@ class ObjectiveFunction(object):
911
853
  rdm_fit = self.none_handler(self.rdm_fit)
912
854
  if rdm_cor_fit is None:
913
855
  rdm_cor_fit = self.none_handler(self.rdm_cor_fit)
914
- if zi_fit is None:
915
- zi_fit = self.none_handler(self.zi_fit)
856
+
916
857
  dis_fit = [x for x in self.none_handler(
917
858
  self.dist_fit)] # check if dis fit is name
918
859
 
@@ -977,18 +918,18 @@ class ObjectiveFunction(object):
977
918
  br_w_names = [randvars[i] + " (Std. Dev.) " + rand_vars_dis[i]
978
919
  for i in range(len(randvars))]
979
920
 
980
- zi_names = [x + ":inflated" for x in self.none_handler(self.zi_force_names)]
921
+
981
922
 
982
923
  names = fixednames + randvars + chol_names + \
983
- br_w_names + chol_part_1 + chol + zi_names + hetro_long + dispersion_name
924
+ br_w_names + chol_part_1 + chol + hetro_long + dispersion_name
984
925
  self.name_deleter = fixednames + randvars + chol_names + randvars + [chol_names[i] for i
985
926
  in range(len(chol_names)) for j in
986
927
  range(
987
- i + 1)] + zi_names + dispersion_name # TODO does this break
928
+ i + 1)] + dispersion_name # TODO does this break
988
929
  name_delete_2 = fixednames + randvars + chol_names + randvars + [chol_names[i] + "/" +
989
930
  chol_names[j] for i
990
931
  in range(len(chol_names)) for j in
991
- range(i + 1)] + zi_names + dispersion_name
932
+ range(i + 1)] + dispersion_name
992
933
  index_dict = {}
993
934
  for i, name in enumerate(name_delete_2):
994
935
  split_names = name.split('/')
@@ -1012,9 +953,9 @@ class ObjectiveFunction(object):
1012
953
  randvars = [x for x in self.none_handler(rdm_fit)]
1013
954
  chol_names = [x for x in self.none_handler(rdm_cor_fit)]
1014
955
 
1015
- zi_names = [x + ': inflated' for x in self.none_handler(self.zi_force_names)]
1016
956
 
1017
- names = fixednames + randvars + chol_names + zi_names + big_hetro + dispersion_name
957
+
958
+ names = fixednames + randvars + chol_names + big_hetro + dispersion_name
1018
959
 
1019
960
  names = np.array(names) # TODO check order
1020
961
  self.print_transform = self.transform_id_names + \
@@ -1052,22 +993,8 @@ class ObjectiveFunction(object):
1052
993
  if not isinstance(self.pvalues, np.ndarray):
1053
994
  raise Exception
1054
995
 
1055
- for i in range(len(self.coeff_)):
1056
- signif = ""
1057
996
 
1058
- if float(self.pvalues[i]) < 0.01:
1059
- signif = "***"
1060
- elif float(self.pvalues[i]) < 0.05:
1061
- signif = "**"
1062
- elif float(self.pvalues[i]) < 0.1:
1063
- signif = "*"
1064
997
 
1065
- '''
1066
- print(fmt.format(self.coeff_names[i][:coeff_name_str_length], self.print_transform[i], self.coeff_[i],
1067
- self.stderr[i], self.zvalues[i], self.pvalues[i],
1068
- signif
1069
- ))
1070
- '''
1071
998
  if self.please_print or save_state:
1072
999
 
1073
1000
  if self.convergance is not None:
@@ -1175,14 +1102,7 @@ class ObjectiveFunction(object):
1175
1102
  self.save_to_file(latextable.draw_latex(
1176
1103
  table, caption=caption, caption_above=True), file_name)
1177
1104
 
1178
- # print('change this')
1179
- # df = pd.read_csv("artificial_mixed_corr_2023_MOOF.csv")
1180
-
1181
- # updating the column value/data
1182
- # df['Y'] = np.mean(self.lam, axis = (1,2))
1183
1105
 
1184
- # writing into the file
1185
- # df.to_csv("artificial_mixed_corr_2023_MOOF.csv", index=False)
1186
1106
 
1187
1107
  def summary(self, model=None, algorithm=None, transform_list=None, long_print=0, solution=None):
1188
1108
  """
@@ -1540,22 +1460,9 @@ class ObjectiveFunction(object):
1540
1460
  alpha_hetro = [
1541
1461
  0 if x != 5 else 1 for x in vector[:self._characteristics]]
1542
1462
 
1543
- if self.zi_force == True:
1544
1463
 
1545
- return {
1546
- 'alpha': alpha,
1547
- 'alpha_rdm': alpha_rdm,
1548
- 'alpha_cor_rdm': alpha_cor_rdm,
1549
- 'alpha_grouped': alpha_grouped,
1550
- 'alpha_hetro': alpha_hetro,
1551
- 'distributions': distributions,
1552
- 'transformations': transformations,
1553
- 'exog_infl': self.zi_force_names,
1554
- 'dispersion': dispersion
1555
- }
1556
1464
 
1557
- else:
1558
- return {
1465
+ return {
1559
1466
  'alpha': alpha,
1560
1467
  'alpha_rdm': alpha_rdm,
1561
1468
  'alpha_cor_rdm': alpha_cor_rdm,
@@ -1563,7 +1470,6 @@ class ObjectiveFunction(object):
1563
1470
  'alpha_hetro': alpha_hetro,
1564
1471
  'distributions': distributions,
1565
1472
  'transformations': transformations,
1566
-
1567
1473
  'dispersion': dispersion
1568
1474
  }
1569
1475
 
@@ -1599,7 +1505,7 @@ class ObjectiveFunction(object):
1599
1505
 
1600
1506
  def repair(self, vector, reduce_to_this=10000): # todo get the number of parameters
1601
1507
  'Method to repair the model so that the number of paramaters is held within the constraint'
1602
- b = 0
1508
+
1603
1509
  new_j = 0
1604
1510
  # extract explanatory vector
1605
1511
  prmVect = vector[:self._characteristics]
@@ -1618,7 +1524,6 @@ class ObjectiveFunction(object):
1618
1524
  int(np.min((5, self.complexity_level - 1)))])
1619
1525
 
1620
1526
  count_3 = prmVect.count(3)
1621
- this_many = count_3 * (count_3 + 1) / 2
1622
1527
 
1623
1528
  vector[:len(prmVect)] = prmVect.copy()
1624
1529
 
@@ -1637,8 +1542,7 @@ class ObjectiveFunction(object):
1637
1542
  # b = sum(prmVect) + self.is_dispersion(vector[-1])
1638
1543
  max_loops = 100 # Maximum number of loops
1639
1544
  counter = 0 # Counter variable to keep track of the number of loops
1640
- if any(isinstance(num, int) and num < 0 for num in vector):
1641
- raise Exception('fhfhfhf')
1545
+
1642
1546
 
1643
1547
  while b > self._max_characteristics and counter < max_loops or b > reduce_to_this:
1644
1548
 
@@ -1686,8 +1590,6 @@ class ObjectiveFunction(object):
1686
1590
  counter += 1
1687
1591
 
1688
1592
  counter = 0
1689
- if any(isinstance(num, int) and num < 0 for num in vector):
1690
- raise Exception('fhfhfhf')
1691
1593
  while b < self._min_characteristics and counter < max_loops:
1692
1594
 
1693
1595
  weights = [1 if x == 0 else 0 for x in only_ints_vals]
@@ -1734,13 +1636,13 @@ class ObjectiveFunction(object):
1734
1636
  cor_l = 0 if self.rdm_cor_fit is None else len(self.rdm_cor_fit)
1735
1637
  Kh = 0 if self.hetro_fit is None else len(self.hetro_fit) + len(set(self.dist_hetro))
1736
1638
 
1737
- zi_terms = 0 if self.zi_fit is None else len(self.zi_fit)
1639
+
1738
1640
  Kchol = int((cor_l *
1739
1641
  (cor_l + 1)) / 2)
1740
1642
  n_coeff = Kf + Kr + cor_l + Kchol + Kr_b + Kh
1741
1643
  if block:
1742
- return [Kf, Kr, cor_l, Kr_b, Kchol, Kh, zi_terms]
1743
- return Kf, Kr, cor_l, Kr_b, Kchol, Kh, zi_terms
1644
+ return [Kf, Kr, cor_l, Kr_b, Kchol, Kh]
1645
+ return Kf, Kr, cor_l, Kr_b, Kchol, Kh
1744
1646
 
1745
1647
  def find_index_of_block(self, lst, value):
1746
1648
  cumulative_sum = 0
@@ -1821,8 +1723,7 @@ class ObjectiveFunction(object):
1821
1723
  self.rdm_fit)):
1822
1724
  raise Exception('pop wrong for id names')
1823
1725
 
1824
- # return 'need to delete all of the dups'
1825
- # self.rdm_cor_fit.pop(self.name_deleter(idx))
1726
+
1826
1727
 
1827
1728
  def get_value_to_delete(self, idx, dispersion):
1828
1729
  block = self.get_num_params(True)
@@ -1858,8 +1759,7 @@ class ObjectiveFunction(object):
1858
1759
  self.dist_fit.pop(cc[b] + len(self.rdm_fit))
1859
1760
  self.transform_id_names.pop(
1860
1761
  cc[b] + len(self.none_handler(self.fixed_fit)) + len(self.none_handler(self.rdm_fit)))
1861
- # return 'need to delete all of the dups'
1862
- # self.rdm_cor_fit.pop(self.name_deleter(idx))
1762
+
1863
1763
 
1864
1764
  def get_param_num(self, dispersion=0):
1865
1765
  a = np.sum(self.get_num_params()) + \
@@ -1890,7 +1790,7 @@ class ObjectiveFunction(object):
1890
1790
  return_violated_terms=0):
1891
1791
 
1892
1792
  num_params = len(pvalues)
1893
- Kf, Kr, Kc, Kr_b, Kchol, Kh, zi_b = self.get_num_params()
1793
+ Kf, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
1894
1794
 
1895
1795
  vio_counts = 0
1896
1796
  pvalues = np.array([float(string) for string in pvalues])
@@ -1915,18 +1815,14 @@ class ObjectiveFunction(object):
1915
1815
  subpvalues[i] = 0
1916
1816
 
1917
1817
  sum_k += Kr_b
1918
- if Kchol > 0:
1919
- cc = [i for i
1920
- in range(len(self.rdm_cor_fit)) for j in range(i + 1)]
1921
-
1922
1818
  lower_triangular = subpvalues[sum_k:sum_k + Kchol]
1923
1819
 
1924
- n = Kc # compute matrix size
1820
+
1925
1821
  # initialize matrix with zeros
1926
- matrix_alt = [[0] * n for _ in range(n)]
1822
+ matrix_alt = [[0] * Kc for _ in range(Kc)]
1927
1823
  index = 0
1928
1824
 
1929
- for i in range(n):
1825
+ for i in range(Kc):
1930
1826
  for j in range(i + 1):
1931
1827
  # fill in lower triangular entries
1932
1828
  matrix_alt[i][j] = lower_triangular[index]
@@ -2414,17 +2310,9 @@ class ObjectiveFunction(object):
2414
2310
 
2415
2311
  if obj_1 is not None:
2416
2312
  obj_1['layout'] = vector.copy()
2417
- # alpha, alpha_rdm, alpha_cor_rdm = self.modify(
2418
- # obj_1['fixed_fit'], obj_1['rdm_fit'], obj_1['rdm_cor_fit'])
2419
- # a = self.modifyn(model_mod)
2420
- # vector = self.modify_vector(
2421
- # vector, alpha, alpha_rdm, alpha_cor_rdm)
2422
2313
  sub_vector = vector[:self._characteristics]
2423
2314
  dispersion_parm = vector[-1]
2424
- if dispersion_parm == 0:
2425
- num_parm = sum(sub_vector)
2426
- else:
2427
- num_parm = sum(sub_vector) + 1
2315
+
2428
2316
 
2429
2317
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
2430
2318
  obj_1[self._obj_1] = 10 ** 9
@@ -2457,7 +2345,7 @@ class ObjectiveFunction(object):
2457
2345
 
2458
2346
  self.Last_Sol = obj_1.copy()
2459
2347
 
2460
- #
2348
+
2461
2349
 
2462
2350
  self.reset_sln()
2463
2351
  if not self.is_quanitifiable_num(obj_1[self._obj_1]):
@@ -2495,7 +2383,7 @@ class ObjectiveFunction(object):
2495
2383
  self.coeff_names = None
2496
2384
  self.draws1 = None
2497
2385
  self.coeff_ = None
2498
- self.zi_fit = None
2386
+
2499
2387
  self.bic = None
2500
2388
  self.log_lik = None
2501
2389
  self.pvalues = None
@@ -2589,13 +2477,13 @@ class ObjectiveFunction(object):
2589
2477
  def set_defined_seed(self, seed):
2590
2478
  print('Benchmaking test with Seed', seed)
2591
2479
  np.random.seed(seed)
2592
- #pd.random.seed(seed)
2480
+
2593
2481
  random.seed(seed)
2594
2482
 
2595
2483
  def set_random_seed(self):
2596
2484
  print('Imbdedding Seed', self._random_seed)
2597
2485
  np.random.seed(self._random_seed)
2598
- #pd.random.seed(self._random_seed)
2486
+
2599
2487
  random.seed(self._random_seed)
2600
2488
  return self._random_seed
2601
2489
 
@@ -2720,85 +2608,9 @@ class ObjectiveFunction(object):
2720
2608
  print(e)
2721
2609
  print('f')
2722
2610
 
2723
- def negbinom_gradients(r, p, k, a=None): # TODO: delete if wrong
2724
- """_summary_
2725
2611
 
2726
- Args:
2727
- r (_type_): rate paramaters or dispersion of the nb
2728
- p (_type_): probability
2729
- k (_type_): vector of (non-negative integer) quantiles.
2730
- a (_type_, optional): optional paramater, if none NB model, otherwise NB-Lindley model with Lindley paramater a.
2731
2612
 
2732
- Raises:
2733
- Exception: _description_
2734
- Exception: _description_
2735
- ValueError: _description_
2736
- Exception: _description_
2737
- Exception: _description_
2738
-
2739
- Returns:
2740
- _type_: _description_
2741
- """
2742
- # fine the NegBinom PMF
2743
- import scipy.special as sps
2744
- negbinom_pmf = sps.comb(k + r - 1, k) * p ** r * (1 - p) ** k
2745
2613
 
2746
- # Calculate the gradient of the NegBinom PMF with respect to r and p
2747
- d_negbinom_pmf_dr = sps.comb(
2748
- k + r - 1, k) * (np.log(p) - sps.digamma(r)) * p ** r * (1 - p) ** k
2749
- d_negbinom_pmf_dp = sps.comb(
2750
- k + r - 1, k) * (r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k
2751
-
2752
- if a is not None:
2753
- # Define the NegBinom-Lindley PMF
2754
- negbinom_lindley_pmf = sps.comb(a + k - 1, k) * p ** r * (1 - p) ** k
2755
-
2756
- # Calculate the gradient of the NegBinom-Lindley PMF with respect to r, p, and a
2757
- d_negbinom_lindley_pmf_dr = sps.comb(
2758
- a + k - 1, k) * (np.log(p) * p ** r * (1 - p) ** k)
2759
- d_negbinom_lindley_pmf_dp = sps.comb(
2760
- a + k - 1, k) * ((r / p - (k + r) / (1 - p)) * p ** r * (1 - p) ** k)
2761
- d_negbinom_lindley_pmf_da = sps.comb(
2762
- a + k - 1, k) * (-sps.digamma(a) + sps.digamma(a + k)) * p ** r * (1 - p) ** k
2763
-
2764
- return [d_negbinom_pmf_dr, d_negbinom_pmf_dp], [d_negbinom_lindley_pmf_dr, d_negbinom_lindley_pmf_dp,
2765
- d_negbinom_lindley_pmf_da]
2766
- else:
2767
- return [d_negbinom_pmf_dr, d_negbinom_pmf_dp]
2768
-
2769
- def f(self, x, N, sig, mu):
2770
- return norm.pdf(x, 0, 1) * poisson.pmf(N, np.exp(x * sig + mu))
2771
-
2772
- def poilog(self, n, mu, sig):
2773
- from scipy import integrate
2774
- if len(mu) > 1 or len(sig) > 1:
2775
- raise ValueError(
2776
- "vectorization of mu and sig is currently not implemented")
2777
- if any((n[n != 0] / np.trunc(n[n != 0])) != 1):
2778
- raise ValueError("all n must be integers")
2779
- if any(n < 0):
2780
- raise ValueError("one or several values of n are negative")
2781
- if not np.all(np.isfinite(np.concatenate((mu, sig)))):
2782
- raise ValueError("all parameters should be finite")
2783
- if sig <= 0:
2784
- raise ValueError("sig is not larger than 0")
2785
- spos = np.where(n < 8)[0]
2786
- lpos = np.where(n >= 8)[0]
2787
- val = np.empty_like(n)
2788
-
2789
- if spos.size > 0:
2790
- vali = np.empty(spos.size)
2791
- for i in range(spos.size):
2792
- try:
2793
- vali[i] = integrate.quad(
2794
- self.f, -np.inf, np.inf, sig, mu, args=(n[spos[i]],))[0]
2795
- except:
2796
- vali[i] = 1e-300
2797
- valp = self.poilog(n[spos], mu, sig ** 2)[0]
2798
- val[spos] = np.maximum(vali, valp)
2799
- if lpos.size > 0:
2800
- val[lpos] = self.poilog(n[lpos], mu, sig ** 2)[0]
2801
- return val
2802
2614
 
2803
2615
  def negbinom_pmf(self, r, p, k, a=None): # TODO: delete if wrong
2804
2616
  """_summary_
@@ -2828,45 +2640,7 @@ class ObjectiveFunction(object):
2828
2640
  negbinom_lindley_pmf = sc.comb(a + k - 1, k) * p ** r * (1 - p) ** k
2829
2641
  return negbinom_lindley_pmf
2830
2642
 
2831
- def nbl_score(self, y, X, betas, alpha, theta):
2832
- from scipy.special import gammaln, psi
2833
- """
2834
- Calculate the Negative Binomial-lindley model score vector of the log-likelihood.
2835
-
2836
- Parameters:
2837
- -----------
2838
- y : numpy array
2839
- The dependent variable of the model.
2840
- X : numpy array
2841
- The independent variables of the model.
2842
- beta : numpy array
2843
- The coefficients of the model.
2844
- alpha : float
2845
- The dispersion parameter of the Negative Binomial-lindley distribution.
2846
- theta : float
2847
- The theta parameter of the Negative Binomial-lindley distribution.
2848
-
2849
- Returns:
2850
- --------
2851
- score : numpy array
2852
- The score vector of the Negative Binomial-lindley model log-likelihood.
2853
- """
2854
- alpha = betas[-1]
2855
- theta = betas[-2]
2856
- beta = betas[:-2]
2857
- zi = self.my_lindley(y, theta).ravel()
2858
-
2859
- eta = np.dot(X, beta)
2860
- mu = np.exp(eta) * zi
2861
- p = 1 / (1 + mu * theta / alpha)
2862
- q = 1 - p
2863
- score = np.zeros(len(betas))
2864
2643
 
2865
- for i in range(len(y)):
2866
- score += (psi(y[i] + zi[i] * p[i]) - psi(alpha * p[i]) + np.log(zi[i])
2867
- - np.log(1 + zi * mu[i] / alpha)) * X[i, :]
2868
-
2869
- return score
2870
2644
 
2871
2645
  def poisson_lognormal_glm_score(self, betas, Y, X, sigma, tau=1e-6):
2872
2646
  """
@@ -3918,195 +3692,7 @@ class ObjectiveFunction(object):
3918
3692
  # np.sum(stats.poisson.logpmf(endog, np.exp(XB)))
3919
3693
  return -np.exp(XB) + y * XB - sc.gammaln(y + 1)
3920
3694
 
3921
- def loglik_zi(params, return_grad=False):
3922
- """
3923
- Loglikelihood for observations of Generic Zero Inflated model.
3924
-
3925
- Parameters
3926
- ----------
3927
- params : array_like
3928
- The parameters of the model.
3929
-
3930
- Returns
3931
- -------
3932
- loglike : ndarray
3933
- The log likelihood for each observation of the model evaluated
3934
- at `params`. See Notes for definition.
3935
-
3936
- Notes
3937
- -----
3938
- .. math:: \\ln L=\\ln(w_{i}+(1-w_{i})*P_{main\\_model})+
3939
- \\ln(1-w_{i})+L_{main\\_model}
3940
- where P - pdf of main model, L - loglike function of main model.
3941
-
3942
- for observations :math:`i=1,...,n`
3943
- """
3944
- params_infl = params[:self.k_inflate]
3945
- params_main = params[self.k_inflate:]
3946
-
3947
- y = self.endog
3948
- w = predict_logit(params_infl, exog_infl)
3949
-
3950
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
3951
- llf_main = loglik_obs_poisson(params_main, y)
3952
- dispersion = 0
3953
- b_gam = None
3954
- Xd = exog
3955
- eta = np.tile(np.dot(Xd, params_main), (1, 1)).transpose()
3956
- eVd = np.exp(np.clip(eta, None, EXP_UPPER_LIMIT))
3957
-
3958
- llf_main_og = self.loglik_obs(y, eVd.ravel(), dispersion, b_gam)
3959
- zero_idx = np.nonzero(y == 0)[0]
3960
- nonzero_idx = np.nonzero(y)[0] # type: ignore
3961
-
3962
- llf = np.zeros_like(y, dtype=np.float64)
3963
- llf[zero_idx] = (np.log(w[zero_idx] +
3964
- (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
3965
- llf[nonzero_idx] = np.log(
3966
- 1 - w[nonzero_idx]) + llf_main[nonzero_idx]
3967
- if return_grad:
3968
- score_main = Xd.T @ (y - eVd.ravel())
3969
- L = np.exp(np.dot(Xd, params_main))
3970
- score_main = (self.endog - L)[:, None] * Xd
3971
-
3972
- dldp = np.zeros(
3973
- (exog.shape[0], len(params_main)), dtype=np.float64)
3974
- dldw = np.zeros_like(exog_infl, dtype=np.float64)
3975
-
3976
- dldp[zero_idx, :] = (score_main[zero_idx].T *
3977
- (1 - (w[zero_idx]) / np.exp(llf[zero_idx]))).T
3978
- dldp[nonzero_idx, :] = score_main[nonzero_idx]
3979
-
3980
- dldw[zero_idx, :] = (exog_infl[zero_idx].T * w[zero_idx] *
3981
- (1 - w[zero_idx]) *
3982
- (1 - np.exp(llf_main[zero_idx])) /
3983
- np.exp(llf[zero_idx])).T
3984
- dldw[nonzero_idx, :] = -(exog_infl[nonzero_idx].T *
3985
- w[nonzero_idx]).T
3986
-
3987
- return llf, np.hstack((dldw, dldp)).sum(axis=0)
3988
-
3989
- else:
3990
-
3991
- return llf
3992
-
3993
- def zipoisson_logpmf(x, mu, w):
3994
- return _lazywhere(x != 0, (x, mu, w),
3995
- (lambda x, mu, w: np.log(1. - w) + x * np.log(mu) -
3996
- sc.gammaln(x + 1.) - mu),
3997
- np.log(w + (1. - w) * np.exp(-mu)))
3998
3695
 
3999
- def zipoisson_pmf(x, mu, w):
4000
- return np.exp(zipoisson_logpmf(x, mu, w))
4001
-
4002
- def loglik_logit(params, endog_y, exog_x): # this is predict I think
4003
- q = 2 * endog_y - 1
4004
- X = exog_x
4005
- return np.sum(np.log(cdf(q * np.dot(X, params))))
4006
-
4007
- def predict_logit(params, exog=None, linear=False):
4008
- if exog is None:
4009
- exog = self.exog
4010
- if not linear:
4011
- return (cdf(np.dot(exog, params)))
4012
- else:
4013
- return (np.dot(exog, params))
4014
-
4015
- def cdf(X):
4016
- """
4017
- The logistic cumulative distribution function
4018
-
4019
- Parameters
4020
- ----------
4021
- X : array_like
4022
- `X` is the linear predictor of the logit model. See notes.
4023
-
4024
- Returns
4025
- -------
4026
- 1/(1 + exp(-X))
4027
-
4028
- Notes
4029
- -----
4030
- In the logit model,
4031
-
4032
- .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
4033
- \\text{Prob}\\left(Y=1|x\\right)=
4034
- \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
4035
- """
4036
- X = np.asarray(X)
4037
- return 1 / (1 + np.exp(-X))
4038
-
4039
- llobs, grad = loglik_zi(betas, return_grad)
4040
- llf = np.sum(llobs)
4041
- if return_grad:
4042
- return -llf, -grad
4043
- else:
4044
- return -llf
4045
-
4046
- def cdf_logit(self, X):
4047
- """
4048
- The logistic cumulative distribution function
4049
-
4050
- Parameters
4051
- ----------
4052
- X : array_like
4053
- `X` is the linear predictor of the logit model. See notes.
4054
-
4055
- Returns
4056
- -------
4057
- 1/(1 + exp(-X))
4058
-
4059
- Notes
4060
- -----
4061
- In the logit model,
4062
-
4063
- .. math:: \\Lambda\\left(x^{\\prime}\\beta\\right)=
4064
- \\text{Prob}\\left(Y=1|x\\right)=
4065
- \\frac{e^{x^{\\prime}\\beta}}{1+e^{x^{\\prime}\\beta}}
4066
- """
4067
- X = np.asarray(X)
4068
- return 1 / (1 + np.exp(-X))
4069
-
4070
- def predict_logit_part(self, params_infl, exog_infl, linear=False):
4071
-
4072
- if not linear:
4073
- return (self.cdf_logit(np.dot(exog_infl, params_infl)))
4074
- else:
4075
- return (np.dot(exog_infl, params_infl))
4076
-
4077
- def ZeroInflate_W_setup(self, exog_infl, y, params_infl):
4078
-
4079
- w = self.predict_logit_part(params_infl, exog_infl)
4080
-
4081
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4082
-
4083
- llf_main = [1, 2, 3] # TODO ge
4084
- zero_idx = np.nonzero(y == 0)[0]
4085
- nonzero_idx = np.nonzero(y)[0]
4086
-
4087
- llf = np.zeros_like(y, dtype=np.float64)
4088
- llf[zero_idx] = (np.log(w[zero_idx] +
4089
- (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4090
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4091
-
4092
- return llf
4093
-
4094
- def dPXL(self, x, alpha):
4095
- return ((alpha ** 2) * (x + 3 * alpha + (alpha ** 2) + 3)) / (1 + alpha) ** (4 + x)
4096
-
4097
- # Define the gradient function
4098
-
4099
- def poisson_lindley_gradient(self, params, exog, endog):
4100
- beta = params[-1]
4101
- mu = np.exp(np.dot(exog, params[:-1]))
4102
- q = beta / (1 + beta)
4103
- d_beta = (endog.ravel() + 1) / (mu + endog.ravel() + 1) - q / (1 - q)
4104
- d_beta = self.dpoisl(endog, beta).ravel()
4105
- d_mu = np.dot((endog - mu) * (1 - q) / (mu + endog + 1), exog)
4106
-
4107
- grad_n = np.concatenate((d_mu, np.atleast_2d(d_beta).T), axis=1)
4108
- der = np.sum(grad_n, axis=0)
4109
- return der, grad_n
4110
3696
 
4111
3697
  def dpoisl(self, x, theta, log=False):
4112
3698
  # if theta < 0:
@@ -4183,21 +3769,9 @@ class ObjectiveFunction(object):
4183
3769
 
4184
3770
  proba_r = self.general_poisson_pmf(eVd, y, b_gam)
4185
3771
 
4186
- elif dispersion == 3:
4187
- fa, ba = self.get_dispersion_paramaters(betas, dispersion)
4188
- zi = self.my_lindley(y, ba)
4189
- proba_r = poisson.pmf(y, zi * eVd.ravel())
4190
- # proba_r = self.lindl_pmf_chatgpt(y, l_pam)
4191
- # prob_2 = self.dpoisl(y, l_pam)
4192
- # proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
4193
- # proba_r = self.poisson_lindley_pmf(eVd, l_pam, y)
4194
- # print(1)
4195
- # proba_r = self.dpoisl(y, eVd)
4196
3772
 
4197
- elif dispersion == 4:
4198
- fa, ba = self.get_dispersion_paramaters(betas, dispersion)
4199
- self.zi = self.my_lindley(eVd, ba)
4200
- proba_r = self._nonlog_nbin(y, eVd + self.zi, b_gam)
3773
+
3774
+
4201
3775
  # proba_r = self._nonlog_nbin_lindley(y, eVd, fa, ba)
4202
3776
 
4203
3777
  elif dispersion == 'poisson_lognormal':
@@ -4267,7 +3841,7 @@ class ObjectiveFunction(object):
4267
3841
  # if (len(betas) -Kf-Kr-self.is_dispersion(dispersion)) != (Kchol + Kr):
4268
3842
 
4269
3843
  # gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros((N, Kr)), np.zeros((N, len(betas) -Kf-Kr-self.is_dispersion(dispersion))) #FIX
4270
- Kf2, Kr, Kc, Kr_b, Kchol, Kh, zi_terms = self.get_num_params()
3844
+ Kf2, Kr, Kc, Kr_b, Kchol, Kh = self.get_num_params()
4271
3845
 
4272
3846
  gr_f, gr_u, gr_s = np.zeros((N, Kf)), np.zeros(
4273
3847
  (N, Kr + Kc)), np.zeros((N, Kchol + Kr_b))
@@ -4716,8 +4290,10 @@ class ObjectiveFunction(object):
4716
4290
  return self._loglik_gradient(self, betas, *stuff)
4717
4291
 
4718
4292
  def get_br_and_bstd(betas, self):
4719
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
4720
- br = betas[Kf:Kf + Kr]
4293
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4294
+ Kr = Kr_a + Kr_c #todo check if this works
4295
+ print('check if this works')
4296
+ br = betas[Kf_a:Kf_a + Kr]
4721
4297
  # Calculate the size of the br matrix
4722
4298
  br_size = int((1 + np.sqrt(1 + 8 * Kr_b_a)) / 2)
4723
4299
 
@@ -4728,7 +4304,7 @@ class ObjectiveFunction(object):
4728
4304
  index = 0
4729
4305
  for i in range(br_size):
4730
4306
  for j in range(i, br_size):
4731
- br_std[j, i] = betas[Kf + Kr + index]
4307
+ br_std[j, i] = betas[Kf_a + Kr + index]
4732
4308
  index += 1
4733
4309
 
4734
4310
  brstd = br_std
@@ -4767,7 +4343,7 @@ class ObjectiveFunction(object):
4767
4343
  self.n_obs = len(y) # feeds into gradient
4768
4344
  if draws is None and draws_grouped is None and (
4769
4345
  'draws_hetro' not in model_nature or model_nature.get('draws_hetro').shape[1] == 0):
4770
-
4346
+ #TODO do i shuffle the draws
4771
4347
  if type(Xd) == dict:
4772
4348
  N, Kf, P = 0, 0, 0
4773
4349
  for key in Xd:
@@ -4781,7 +4357,7 @@ class ObjectiveFunction(object):
4781
4357
  Bf = betas[0:Kf] # Fixed betas
4782
4358
 
4783
4359
  main_disper, lindley_disp = self.get_dispersion_paramaters(
4784
- betas, dispersion)
4360
+ betas, dispersion) #todo fix this up
4785
4361
  if lindley_disp is not None:
4786
4362
  if lindley_disp <= 0:
4787
4363
  penalty += 1
@@ -4808,29 +4384,13 @@ class ObjectiveFunction(object):
4808
4384
  # llf_main = np.clip(llf_main, log_lik_min, log_lik_max)
4809
4385
 
4810
4386
  loglik = llf_main.sum()
4811
- if 'exog_infl' in model_nature:
4812
- params_infl = betas[Kf:Kf + len(model_nature.get('exog_infl'))]
4813
- params_main = Bf
4814
- # ones = np.ones((model_nature.get('exog_inflX').shape[0], model_nature.get('exog_inflX').shape[1], 1))
4815
- # exog_infl = np.concatenate((ones, model_nature.get('exog_inflX')), axis =2 )
4816
- exog_infl = model_nature.get('exog_inflX')
4817
- llf_main = llf_main # TODO test this
4818
- w = self.predict_logit_part(params_infl, exog_infl)
4819
4387
 
4820
- w = np.clip(w, np.finfo(float).eps, 1 - np.finfo(float).eps)
4821
-
4822
- zero_idx = np.nonzero(y == 0)[0]
4823
- nonzero_idx = np.nonzero(y)[0] # FIXME should shape be unravelled
4824
-
4825
- llf = np.zeros_like(y, dtype=np.float64).reshape(-1, 1) # TODO test this i added ravel to this code
4826
- llf[zero_idx] = (np.log(w[zero_idx] + (1 - w[zero_idx]) * np.exp(llf_main[zero_idx])))
4827
- llf[nonzero_idx] = np.log(1 - w[nonzero_idx]) + llf_main[nonzero_idx]
4828
- loglik = llf.sum()
4829
4388
 
4830
4389
  loglik = np.clip(loglik, log_lik_min, log_lik_max)
4831
4390
  if self.power_up_ll:
4832
4391
 
4833
4392
  loglik += 2*loglik
4393
+ print('am i powering up')
4834
4394
  penalty = self.regularise_l2(betas)
4835
4395
 
4836
4396
  if not np.isreal(loglik):
@@ -4851,7 +4411,7 @@ class ObjectiveFunction(object):
4851
4411
  else:
4852
4412
  return -loglik + penalty
4853
4413
  # Else, we have draws
4854
- self.n_obs = len(y) * self.Ndraws
4414
+ self.n_obs = len(y) * self.Ndraws #todo is this problematic
4855
4415
  penalty += self._penalty_betas(
4856
4416
  betas, dispersion, penalty, float(len(y) / 10.0))
4857
4417
 
@@ -4908,7 +4468,7 @@ class ObjectiveFunction(object):
4908
4468
  # if (Kchol +Kr) != (len(betas) -Kf-Kr -self.is_dispersion(dispersion)):
4909
4469
  # print('I think this is fine')
4910
4470
  n_coeff = self.get_param_num(dispersion)
4911
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
4471
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh = self.get_num_params()
4912
4472
  if Kchol_a != Kchol:
4913
4473
  print('hold')
4914
4474
 
@@ -5905,7 +5465,7 @@ class ObjectiveFunction(object):
5905
5465
  b[-1] = .5
5906
5466
  if method == 'L-BFGS-B' or method2 == 'L-BFGS-B':
5907
5467
 
5908
- Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh, zi_terms_a = self.get_num_params()
5468
+ Kf_a, Kr_a, Kr_c, Kr_b_a, Kchol_a, Kh= self.get_num_params()
5909
5469
  if Kh > 0:
5910
5470
  Kh_e = mod.get('XH').shape[-1]
5911
5471
  Kh_range = Kh - Kh_e
@@ -6024,14 +5584,14 @@ class ObjectiveFunction(object):
6024
5584
  mod['dispersion_penalty'] = np.abs(b[-1])
6025
5585
  grad_args = (
6026
5586
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0, self.rdm_cor_fit,
6027
- self.zi_fit, exog_infl, draws_grouped, XG, mod)
5587
+ None, exog_infl, draws_grouped, XG, mod)
6028
5588
  # self.gradients_est_yes = (1, 1)
6029
5589
 
6030
5590
  if draws is None and draws_hetro is not None:
6031
5591
  print('hold')
6032
5592
  betas_est = self._minimize(self._loglik_gradient, b, args=(
6033
5593
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
6034
- self.rdm_cor_fit, self.zi_fit, exog_infl, draws_grouped, XG, mod),
5594
+ self.rdm_cor_fit, None, exog_infl, draws_grouped, XG, mod),
6035
5595
  method=method2, tol=tol['ftol'],
6036
5596
  options={'gtol': tol['gtol']}, bounds=bounds,
6037
5597
  hess_calc=True if method2 == 'Nelder-Mead-BFGS' else False)
@@ -6050,7 +5610,7 @@ class ObjectiveFunction(object):
6050
5610
  betas_est = self._minimize(self._loglik_gradient, b, args=(
6051
5611
  X, y, draws, X, Xr, self.batch_size, False, False, dispersion, 0, False, 0,
6052
5612
  self.rdm_cor_fit,
6053
- self.zi_fit, exog_infl, draws_grouped, XG, mod),
5613
+ None, exog_infl, draws_grouped, XG, mod),
6054
5614
  method=method2, tol=tol['ftol'],
6055
5615
  options={'gtol': tol['gtol']})
6056
5616
 
@@ -6059,7 +5619,7 @@ class ObjectiveFunction(object):
6059
5619
 
6060
5620
  if np.isfinite(betas_est['fun']):
6061
5621
  self.naming_for_printing(
6062
- betas_est['x'], 0, dispersion, zi_fit=sub_zi, model_nature=mod)
5622
+ betas_est['x'], 0, dispersion, model_nature=mod)
6063
5623
 
6064
5624
  if method2 == 'L-BFGS-B':
6065
5625
 
@@ -6226,8 +5786,7 @@ class ObjectiveFunction(object):
6226
5786
  self.rdm_cor_fit = [x for x, y in zip(
6227
5787
  select_data, model_nature.get('alpha_cor_rdm')) if y == 1]
6228
5788
 
6229
- # if self.zi_force:
6230
- # self.zi_fit = [x for x, y in zip(select_data, model_nature.get('exog_infl')) if y == 1]
5789
+
6231
5790
  # if alpha_grouped is not None:
6232
5791
  self.grouped_rpm = [x for x, y in zip(select_data, model_nature.get('alpha_grouped')) if y == 1]
6233
5792
  self.hetro_fit = [x for x, y in zip(select_data, model_nature.get('alpha_hetro')) if y == 1]
@@ -6405,14 +5964,14 @@ class ObjectiveFunction(object):
6405
5964
  df_test[:, :, idx], model_nature.get('transformations')[idx] = self.transformer(
6406
5965
  t, idx, df_test[:, :, idx])
6407
5966
  if np.max(df_tf[:, :, idx]) >= 77000:
5967
+
6408
5968
  raise Exception('should not be possible')
6409
5969
 
6410
5970
  self.define_selfs_fixed_rdm_cor(model_nature)
6411
5971
  indices = self.get_named_indices(self.fixed_fit)
6412
5972
  indices5 = self.get_named_indices(self.hetro_fit)
6413
5973
 
6414
- if self.zi_force:
6415
- model_nature['exog_inflX'] = df_tf[:, :, self.get_named_indices(self.zi_force_names)]
5974
+
6416
5975
 
6417
5976
  x_h_storage = []
6418
5977
  x_h_storage_test = []
@@ -6445,7 +6004,7 @@ class ObjectiveFunction(object):
6445
6004
  if XG is not None:
6446
6005
  indices4_test = np.repeat(self.get_named_indices(self.grouped_rpm),
6447
6006
  self.group_dummies_test.shape[2]) if self.grouped_rpm != [] else []
6448
- XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4]
6007
+ XGtest = np.tile(self.group_dummies_test, len(self.grouped_rpm)) * df_test[:, :, indices4_test]
6449
6008
  model_nature['XG'] = XG
6450
6009
  model_nature['XGtest'] = XGtest
6451
6010
 
@@ -6488,7 +6047,8 @@ class ObjectiveFunction(object):
6488
6047
  Xr_test = None
6489
6048
  model_nature['Xr_test'] = Xr_test
6490
6049
  if (Xr.ndim <= 1) or (Xr.shape[0] <= 11) or np.isin(Xr, [np.inf, -np.inf, None, np.nan]).any():
6491
- print('what the actual fuck')
6050
+ print('Not Possible')
6051
+ raise Exception
6492
6052
  if Xr.size == 0:
6493
6053
  Xr = None
6494
6054
  Xr_test = None
@@ -6509,10 +6069,10 @@ class ObjectiveFunction(object):
6509
6069
  obj_1.add_layout(layout)
6510
6070
 
6511
6071
  model_form_name = self.check_complexity(
6512
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, self.zi_fit, dispersion, is_halton, model_nature)
6072
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit, None, dispersion, is_halton, model_nature)
6513
6073
 
6514
6074
  obj_1.add_names(self.fixed_fit.copy(), self.rdm_fit.copy(),
6515
- self.rdm_cor_fit.copy(), model_form_name, self.zi_fit, pvalues)
6075
+ self.rdm_cor_fit.copy(), model_form_name, None, pvalues)
6516
6076
  if not isinstance(obj_1, dict):
6517
6077
  raise Exception('should not be possible')
6518
6078
 
@@ -6540,7 +6100,7 @@ class ObjectiveFunction(object):
6540
6100
  else:
6541
6101
  obj_1 = Solution()
6542
6102
  self.significant = 3
6543
- print('not_implented yet')
6103
+ print('not_implemented yet') #TODO check this for exciddeing values
6544
6104
 
6545
6105
  if self.is_quanitifiable_num(obj_1[self._obj_1]) and pvalues is not None:
6546
6106
  self.bic = obj_1['bic']
@@ -6563,8 +6123,9 @@ class ObjectiveFunction(object):
6563
6123
  if self.significant == 0:
6564
6124
 
6565
6125
  print(self.full_model, 'full model is')
6566
- alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6567
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6126
+ if not self.test_flag:
6127
+ alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6128
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6568
6129
 
6569
6130
  return obj_1, model_nature
6570
6131
 
@@ -6581,8 +6142,9 @@ class ObjectiveFunction(object):
6581
6142
  self.significant = 3
6582
6143
 
6583
6144
  return obj_1, model_nature
6584
- alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6585
- self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6145
+ if not self.test_flag:
6146
+ alpha, alpha_rdm, alpha_cor_rdm = self.modify(
6147
+ self.fixed_fit, self.rdm_fit, self.rdm_cor_fit)
6586
6148
  if self.grab_transforms:
6587
6149
 
6588
6150
  if is_halton and self.significant == 1:
@@ -6692,7 +6254,7 @@ class ObjectiveFunction(object):
6692
6254
  alpha_cor_rdm = np.in1d(select_data, cor_rdm) * 1
6693
6255
  alpha_cor_rdm = alpha_cor_rdm.tolist()
6694
6256
  alpha_group_rdm = np.in1d(select_data, group_rdm) * 1
6695
- alpha_group_rdm = alpha_group_rdm.tolist()
6257
+ alpha_group_rdm = alpha_group_rdm.tolist() #todo will this ever trigger
6696
6258
  return alpha, alpha_rdm, alpha_cor_rdm
6697
6259
 
6698
6260
  def show_transforms(self, fix, rdm):
@@ -6825,9 +6387,10 @@ class ObjectiveFunction(object):
6825
6387
 
6826
6388
  Br_come_one = self.Br.copy()
6827
6389
  # betas_random = self._transform_rand_betas(betas, betas_std, draws)
6828
-
6390
+ #todo make sure this works for ln and truncated normal
6829
6391
  if any(set(distribution).intersection(['ln_normal', 'tn_normal'])):
6830
- Br_come_one = self._transform_rand_betas()
6392
+ print('check this, intesection shouldn not happen for all')
6393
+ Br_come_one = self._transform_rand_betas(betas, betas_std, draws)
6831
6394
  for k, dist_k in enumerate(distribution):
6832
6395
  if dist_k == 'ln_normal':
6833
6396
  der[:, k, :] = Br_come_one[:, k, :]
@@ -6837,9 +6400,7 @@ class ObjectiveFunction(object):
6837
6400
  return der
6838
6401
 
6839
6402
  def _copy_size_display_as_ones(self, matrix):
6840
- # grab the shape to copy
6841
- please = matrix.shape
6842
- der = dev.np.ones((please), dtype=matrix.dtype)
6403
+ der = dev.np.ones(matrix.shape, dtype=matrix.dtype)
6843
6404
  return der
6844
6405
 
6845
6406
  def prepare_halton(self, dim, n_sample, draws, distribution, long=False, slice_this_way=None):