metacountregressor 0.1.235__tar.gz → 0.1.237__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {metacountregressor-0.1.235/metacountregressor.egg-info → metacountregressor-0.1.237}/PKG-INFO +1 -1
  2. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/helperprocess.py +63 -2
  3. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/solution.py +663 -30
  4. {metacountregressor-0.1.235 → metacountregressor-0.1.237/metacountregressor.egg-info}/PKG-INFO +1 -1
  5. metacountregressor-0.1.237/version.txt +1 -0
  6. metacountregressor-0.1.235/version.txt +0 -1
  7. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/LICENSE.txt +0 -0
  8. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/MANIFEST.in +0 -0
  9. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/README.md +0 -0
  10. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/README.rst +0 -0
  11. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/__init__.py +0 -0
  12. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/_device_cust.py +0 -0
  13. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/app_main.py +0 -0
  14. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/data_split_helper.py +0 -0
  15. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/halton.py +0 -0
  16. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/main.py +0 -0
  17. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/main_old.py +0 -0
  18. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/metaheuristics.py +0 -0
  19. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/pareto_file.py +0 -0
  20. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/pareto_logger__plot.py +0 -0
  21. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/setup.py +0 -0
  22. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/single_objective_finder.py +0 -0
  23. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor/test_generated_paper2.py +0 -0
  24. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor.egg-info/SOURCES.txt +0 -0
  25. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor.egg-info/dependency_links.txt +0 -0
  26. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor.egg-info/not-zip-safe +0 -0
  27. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor.egg-info/requires.txt +0 -0
  28. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/metacountregressor.egg-info/top_level.txt +0 -0
  29. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/setup.cfg +0 -0
  30. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/setup.py +0 -0
  31. {metacountregressor-0.1.235 → metacountregressor-0.1.237}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.235
3
+ Version: 0.1.237
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -5,8 +5,8 @@ import csv
5
5
  import matplotlib.pyplot as plt
6
6
  from scipy import stats as st
7
7
  from sklearn.preprocessing import StandardScaler
8
-
9
-
8
+ import os
9
+ import shutil
10
10
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
11
11
 
12
12
 
@@ -22,6 +22,67 @@ def generate_param_combinations(param_grid):
22
22
  combinations = [dict(zip(keys, v)) for v in product(*values)]
23
23
  return combinations
24
24
 
25
+ def delete_all_folders(directory_path):
26
+ try:
27
+ # Check if the directory exists
28
+ if not os.path.exists(directory_path):
29
+ print(f"The directory '{directory_path}' does not exist.")
30
+ return
31
+
32
+ # Iterate through items in the directory
33
+ for item in os.listdir(directory_path):
34
+ item_path = os.path.join(directory_path, item)
35
+
36
+ # Check if the item is a folder
37
+ if os.path.isdir(item_path):
38
+ # Use shutil.rmtree to delete the folder and its contents
39
+ shutil.rmtree(item_path)
40
+ print(f"Deleted folder: {item_path}")
41
+ else:
42
+ print(f"Skipped non-folder item: {item_path}")
43
+
44
+ print("All folders deleted successfully.")
45
+ except Exception as e:
46
+ print(f"An error occurred: {e}")
47
+
48
+ def delete_all_contents(directory_path):
49
+ try:
50
+ # Check if the directory exists
51
+ if not os.path.exists(directory_path):
52
+ print(f"The directory '{directory_path}' does not exist.")
53
+ return
54
+
55
+ # Iterate through items in the directory
56
+ for item in os.listdir(directory_path):
57
+ item_path = os.path.join(directory_path, item)
58
+
59
+ # If the item is a directory, delete it
60
+ if os.path.isdir(item_path):
61
+ shutil.rmtree(item_path) # Recursively delete the folder
62
+ print(f"Deleted folder: {item_path}")
63
+ else:
64
+ # If the item is a file, delete it
65
+ os.remove(item_path)
66
+ print(f"Deleted file: {item_path}")
67
+
68
+ print("All contents deleted successfully.")
69
+ except Exception as e:
70
+ print(f"An error occurred: {e}")
71
+
72
+ def delete_folder_and_contents(directory_path):
73
+ try:
74
+ # Check if the directory exists
75
+ if not os.path.exists(directory_path):
76
+ print(f"The directory '{directory_path}' does not exist.")
77
+ return
78
+
79
+ # Delete the entire folder and its contents
80
+ shutil.rmtree(directory_path)
81
+ print(f"Deleted folder and all its contents: {directory_path}")
82
+ except Exception as e:
83
+ print(f"An error occurred: {e}")
84
+
85
+
25
86
 
26
87
  ##Select the best Features Based on RF
27
88
  def select_features(X_train, y_train, n_f=16):
@@ -121,6 +121,7 @@ class ObjectiveFunction(object):
121
121
  """
122
122
 
123
123
  def __init__(self, x_data, y_data, **kwargs):
124
+ self.gbl_best = 1000000.0
124
125
  self.linear_regression = kwargs.get('linear_model', False)
125
126
  self.reg_penalty = 0
126
127
  self.power_up_ll = False
@@ -215,11 +216,14 @@ class ObjectiveFunction(object):
215
216
  print('no name set, setting name as 0')
216
217
  self.instance_number = str(0) # set an arbitrary instance number
217
218
 
218
- if not os.path.exists(self.instance_number):
219
- if kwargs.get('make_directory', True):
220
- print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
221
- os.makedirs(self.instance_number)
222
-
219
+ if kwargs.get('save_directory', True):
220
+ self.save_state = True
221
+ if not os.path.exists(self.instance_number):
222
+ if kwargs.get('make_directory', True):
223
+ print('Making a Directory, if you want to stop from storing the files to this directory set argumet: make_directory:False')
224
+ os.makedirs(self.instance_number)
225
+ else:
226
+ self.save_state = False
223
227
  if not hasattr(self, '_obj_1'):
224
228
  print('_obj_1 required, define as bic, aic, ll')
225
229
  raise Exception
@@ -443,10 +447,9 @@ class ObjectiveFunction(object):
443
447
  print('Setup Complete...')
444
448
  self._characteristics_names = list(self._x_data.columns)
445
449
  # define the variables
446
- # self._transformations = ["no", "sqrt", "log", "exp", "fact", "arcsinh", 2, 3]
447
- self._transformations = ["no", "sqrt", "log", "arcsinh"]
448
- self._transformations = kwargs.get('_transformation', ["no", "sqrt", "log", 'arcsinh'])
449
- self._transformations = kwargs.get('_transformation', ["no", "log", "sqrt", "arcsinh"])
450
+
451
+
452
+ self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh"])
450
453
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
451
454
 
452
455
  self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
@@ -482,7 +485,7 @@ class ObjectiveFunction(object):
482
485
  model_types = [[0, 1]] # add 2 for Generalized Poisson
483
486
  #model_types = [[0]]
484
487
 
485
- if kwargs.get('linear_model', None) is not None:
488
+ if self:
486
489
  model_types = [[0]]
487
490
  self.grad_yes = False
488
491
 
@@ -1082,15 +1085,7 @@ class ObjectiveFunction(object):
1082
1085
  [''] * (len(names) - len(self.transform_id_names))
1083
1086
  self.coeff_names = names
1084
1087
 
1085
- '''
1086
- if betas is not None:
1087
- try:
1088
- if len(betas) != len(names):
1089
- print('standard_model', no_draws)
1090
-
1091
- except Exception as e:
1092
- print(e)
1093
- '''
1088
+
1094
1089
 
1095
1090
 
1096
1091
 
@@ -1228,8 +1223,9 @@ class ObjectiveFunction(object):
1228
1223
 
1229
1224
  if save_state:
1230
1225
  # print(file_name)
1231
- self.save_to_file(latextable.draw_latex(
1232
- table, caption=caption, caption_above=True), file_name)
1226
+ if self.save_state:
1227
+ self.save_to_file(latextable.draw_latex(
1228
+ table, caption=caption, caption_above=True), file_name)
1233
1229
 
1234
1230
 
1235
1231
 
@@ -2375,6 +2371,12 @@ class ObjectiveFunction(object):
2375
2371
  else:
2376
2372
  sorted(my_dict, key=lambda x: x[0]['pval_percentage'])
2377
2373
 
2374
+ def update_gbl_best(self, obj_1):
2375
+ '''Method to update the global best solution. Also sets the significant attribute to 1 if the global best is updated'''
2376
+ if self.gbl_best > obj_1[self._obj_1]:
2377
+ self.gbl_best = obj_1[self._obj_1]
2378
+ self.significant = 1
2379
+
2378
2380
  def get_fitness(self, vector, multi=False, verbose=False, max_routine=3):
2379
2381
  obj_1 = 10.0 ** 4
2380
2382
  obj_best = None
@@ -2401,7 +2403,7 @@ class ObjectiveFunction(object):
2401
2403
 
2402
2404
  a = {}
2403
2405
  obj_1, model_mod = self.makeRegression(model_nature, layout=layout, **a)
2404
-
2406
+ self.update_gbl_best(obj_1)
2405
2407
  if self.pvalues is None:
2406
2408
  self.reset_sln()
2407
2409
  return obj_1
@@ -4581,8 +4583,14 @@ class ObjectiveFunction(object):
4581
4583
 
4582
4584
  brstd = br_std
4583
4585
 
4584
-
4585
-
4586
+ def _loglik_prefit(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
4587
+ return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
4588
+ zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None, model_nature=None, kwarg=None,
4589
+ **kwargs):
4590
+ """Fixed and random parameters are handled separately to speed up the estimation and the results are concatenated.
4591
+ pass
4592
+ """
4593
+ pass
4586
4594
  def _loglik_gradient(self, betas, Xd, y, draws=None, Xf=None, Xr=None, batch_size=None, return_gradient=False,
4587
4595
  return_gradient_n=False, dispersion=0, test_set=0, return_EV=False, verbose=0, corr_list=None,
4588
4596
  zi_list=None, exog_infl=None, draws_grouped=None, Xgroup=None, model_nature=None, kwarg=None,
@@ -5449,6 +5457,23 @@ class ObjectiveFunction(object):
5449
5457
  # self.grad_n = optim_res['grad_n']
5450
5458
  # self.total_fun_eval = optim_res['nfev']2
5451
5459
 
5460
+ def handle_covariance(self, covariance):
5461
+ """
5462
+ Safely handle covariance matrix, converting it to a dense NumPy array if needed.
5463
+
5464
+ Parameters:
5465
+ covariance: The covariance matrix, which may be a `LbfgsInvHessProduct`.
5466
+
5467
+ Returns:
5468
+ A dense NumPy array of the covariance matrix.
5469
+ """
5470
+ # Check if the covariance is an `LbfgsInvHessProduct`
5471
+ if hasattr(covariance, "todense"):
5472
+ # Convert to a dense NumPy array
5473
+ covariance = covariance.todense()
5474
+ return covariance
5475
+
5476
+
5452
5477
  def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
5453
5478
  # sample_size = len(self._x_data) - len(optim_res['x']) -1
5454
5479
  sample_size = len(self._x_data)
@@ -5463,6 +5488,7 @@ class ObjectiveFunction(object):
5463
5488
  if robust else optim_res['hess_inv']
5464
5489
  else:
5465
5490
  covariance = np.diag(np.ones(len(optim_res.x)))
5491
+ covariance = self.handle_covariance(covariance)
5466
5492
  covariance = np.clip(covariance, 0, None)
5467
5493
  stderr = np.sqrt(np.diag(covariance))
5468
5494
  # stderr = [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
@@ -5538,10 +5564,9 @@ class ObjectiveFunction(object):
5538
5564
  self.none_handler(self.rdm_cor_fit) + \
5539
5565
  self.get_dispersion_name(dispersion)
5540
5566
  return a
5541
-
5542
- def fitRegression(self, mod,
5567
+
5568
+ def fitRegression_prefit(self, mod,
5543
5569
  dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5544
-
5545
5570
  """
5546
5571
  Fits a poisson regression given data and outcomes if dispersion is not declared
5547
5572
  if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
@@ -5577,7 +5602,613 @@ class ObjectiveFunction(object):
5577
5602
  zvalues = None
5578
5603
  if mod.get('Xr') is not None or mod.get('XG') is not None or mod.get('XH') is not None:
5579
5604
  calc_gradient = True
5605
+ calc_gradient = False if self.linear_regression else True
5606
+ n, p, k = mod.get('X').shape
5607
+ _r, pr, kr = mod.get('Xr').shape
5608
+ kh = mod.get('XH').shape[2]
5609
+
5610
+ if 'XG' in mod:
5611
+ _g, pg, kg = mod.get('XG').shape
5612
+ else:
5613
+ _g, pg, kg = 0, 0, 0
5614
+
5615
+ dispersion_param_num = self.is_dispersion(dispersion)
5616
+ if self.no_extra_param:
5617
+ dispersion_param_num =0
5618
+
5619
+ #paramNum = self.get_param_num(dispersion)
5620
+ self.no_random_paramaters = 0
5621
+ if 'XG' in mod:
5622
+ XX = np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
5623
+ elif 'XH' in mod:
5624
+ XX = np.concatenate((mod.get('X'), mod.get('Xr'), mod.get('XH')), axis=2)
5625
+ else:
5626
+ XX = np.concatenate((mod.get('X'), mod.get('Xr')), axis=2)
5627
+
5628
+ if self.is_multi:
5629
+ if mod.get('X_test') is not None and mod.get('Xr_test') is not None:
5630
+ if 'XH' in mod:
5631
+ XX_test = np.concatenate((mod.get('X_test'), mod.get('Xr_test'), mod.get('XH_test')),
5632
+ axis=2)
5633
+ else:
5634
+ XX_test = np.concatenate((mod.get('X_test'), mod.get('Xr_test')), axis=2)
5635
+
5636
+
5637
+
5638
+ else:
5639
+
5640
+ XX = mod.get('Xr')
5641
+ if mod.get('Xr_test') is not None:
5642
+ XX_test = mod.get('Xr_test')
5643
+
5644
+ bb = np.random.uniform(
5645
+ -0.05, 0.05, size=k + kr + kg + kh + dispersion_param_num)
5646
+
5647
+ if method == 'L-BFGS-B':
5648
+ if dispersion == 0:
5649
+ bounds = []
5650
+ for i in bb:
5651
+ bounds = bounds + [(i - 30, i + 30)]
5652
+
5653
+ # bound = [(-100,100) ]*len(b)
5654
+
5655
+ elif dispersion == 1: # TODO test bounds was NOne
5656
+ bounds = []
5657
+ for i in bb[:-1]:
5658
+ bounds = bounds + [(i - 30, i + 30)]
5659
+ bounds = bounds + [(-1, 5)]
5660
+
5661
+ elif dispersion == 2:
5662
+ bounds = []
5663
+ for i in bb[:-1]:
5664
+ bounds = bounds + [(i - 5, i + 5)]
5665
+ bounds = bounds + [(0.1, .99)]
5666
+
5667
+ else:
5668
+ bounds = None
5669
+ else:
5670
+ bb[0] = self.constant_value
5671
+ if dispersion == 1:
5672
+ if not self.no_extra_param:
5673
+ bb[-1] = self.negative_binomial_value
5674
+ bounds = None
5675
+
5676
+
5677
+
5678
+ # intial_beta = minimize(self._loglik_gradient, bb, args =(XX, y, None, None, None, None, calc_gradient, hess_est, dispersion, 0, False, 0, None, sub_zi, exog_infl, None, None, mod), method = 'nelder-mead', options={'gtol': 1e-7*len(XX)})
5679
+ hess_est = False if method2 in ['L-BFGS-B', 'BFGS_2', 'Nelder-Mead-BFGS'] else True
5680
+
5681
+ if self.no_extra_param:
5682
+ dispersion_poisson = 0
5683
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5684
+ args=(XX, y, None, None, None, None, calc_gradient, hess_est,
5685
+ dispersion_poisson, 0, False, 0, None, None, None, None, None,
5686
+ mod),
5687
+ method=method2, tol=1e-5, options={'gtol': tol['gtol']},
5688
+ bounds=bounds)
5689
+ if dispersion:
5690
+ try:
5691
+ nb_parma = self.poisson_mean_get_dispersion(initial_beta.x, XX, y)
5692
+ except:
5693
+ nb_parma = 0.5
5694
+
5695
+ if method2 == 'L-BFGS-B':
5696
+ if hasattr(initial_beta.hess_inv, 'todense'):
5697
+ initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
5698
+ 'todense') else np.array(
5699
+ [initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
5700
+
5701
+ bb = initial_beta['x'].copy()
5702
+
5703
+ if initial_beta is not None and np.isnan(initial_beta['fun']):
5704
+ initial_beta = self._minimize(self._loglik_gradient, bb,
5705
+ args=(XX, y, None, None, None, None, True, True, dispersion,
5706
+ 0, False, 0, None, None, None, None, None, mod),
5707
+ method=method2, tol=tol['ftol'], options={'gtol': tol['gtol']})
5708
+
5709
+ if initial_beta is not None and not np.isnan(initial_beta['fun']):
5710
+ self._no_random_paramaters = 1
5711
+ if initial_beta['success'] != 0:
5712
+ self.convergance = 0
5713
+ else:
5714
+ self.convergance = 1
5715
+ print('TODO NEED TO RETURN THE THINGS I CARE ABOUT')
5716
+ else:
5717
+
5718
+ is_halton = 0
5719
+
5720
+ print('Solution was not finite, error. Continue')
5721
+ sol.add_objective()
5722
+ return sol, None, None, None, None, None, None, 0
5723
+ except Exception as e:
5724
+ exc_type, exc_obj, exc_tb = sys.exc_info()
5725
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
5726
+ print(exc_type, fname, exc_tb.tb_lineno)
5727
+
5728
+ def fitRegression_in_chunks(self, mod,dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
5729
+ """
5730
+ Fits a poisson regression given data and outcomes if dispersion is not declared
5731
+ if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
5732
+
5733
+ #TODO lineraregression
5734
+ Inputs:
5735
+ X - array. Design matrix
5736
+ y - array. Observed outcomes
5737
+ Outputs:
5738
+ betas_est - array. Coefficients which maximize the negative log-liklihood.
5739
+ """
5740
+ # Set defualt method
5741
+ #TODO, the inital fit worked but it throws
5742
+ tol = {'ftol': 1e-8, 'gtol': 1e-6}
5743
+
5744
+ sol = Solution()
5745
+
5746
+ initial_betas = self.fitRegression_prefit(mod, dispersion, maxiter, batch_size, num_hess, **kwargs)
5747
+
5580
5748
 
5749
+
5750
+
5751
+ def _build_design_matrix(self, mod):
5752
+ """
5753
+ Build the design matrix `XX` by combining `X`, `Xr`, `XG`, and `XH`.
5754
+
5755
+ Parameters:
5756
+ mod: Dictionary containing data and parameters.
5757
+
5758
+ Returns:
5759
+ Combined design matrix `XX`.
5760
+ """
5761
+ X, Xr, XG, XH = mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
5762
+ if XG is not None:
5763
+ return np.concatenate((X, XG, Xr, XH), axis=2)
5764
+ elif XH is not None:
5765
+ return np.concatenate((X, Xr, XH), axis=2)
5766
+ else:
5767
+ return np.concatenate((X, Xr), axis=2)
5768
+
5769
+
5770
+ def _update_attributes(self, optimization_result, mod):
5771
+ """
5772
+ Update instance attributes like `self.significant` and `self.draws`.
5773
+
5774
+ Parameters:
5775
+ optimization_result: The result of the optimization process.
5776
+ mod: The model dictionary containing data and parameters.
5777
+ """
5778
+ # Update `self.significant` based on p-values or other criteria
5779
+ if optimization_result is not None:
5780
+ significant_threshold = 0.05 # Example threshold for significance
5781
+ self.significant = all(
5782
+ p < significant_threshold for p in mod.get("pvalues", [])
5783
+ )
5784
+ else:
5785
+ self.significant = False # Mark as not significant if optimization failed
5786
+
5787
+ # Update `self.draws` based on `mod` or other factors
5788
+ if "Xr" in mod:
5789
+ Xr = mod.get("Xr")
5790
+ draws = Xr.shape[0] if Xr is not None else 0 # Example: Number of rows in Xr
5791
+ self.draws = draws
5792
+ else:
5793
+ self.draws = 0
5794
+
5795
+ def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
5796
+ """
5797
+ Run the optimization process with draws logic and update the Solution object.
5798
+
5799
+ Parameters:
5800
+ XX: Design matrix.
5801
+ y: Observed outcomes.
5802
+ dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
5803
+ initial_params: Initial parameter array.
5804
+ bounds: List of bounds for each parameter.
5805
+ tol: Tolerance for the optimization process (dictionary with ftol and gtol).
5806
+ mod: Dictionary containing additional data.
5807
+
5808
+ Returns:
5809
+ Solution object with updated objectives.
5810
+ """
5811
+ # Extract relevant data
5812
+ X, Xr, XG = mod.get('X'), mod.get('Xr'), mod.get('XG')
5813
+ distribution = mod.get('dist_fit')
5814
+
5815
+ # Prepare draws
5816
+ draws = self._prepare_draws(Xr, distribution)
5817
+ draws_grouped = self._prepare_grouped_draws(XG, mod) if XG is not None else None
5818
+
5819
+ # Optimization method and options
5820
+ method = self.method_ll
5821
+ print('updataing methods')
5822
+ method = 'Nelder-Mead-BFGS'
5823
+ options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
5824
+
5825
+ # Run optimization
5826
+ optimization_result = self._minimize(
5827
+ self._loglik_gradient,
5828
+ initial_params,
5829
+ args=(
5830
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5831
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod
5832
+ ),
5833
+ method=method,
5834
+ bounds=bounds,
5835
+ tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
5836
+ options=options
5837
+ )
5838
+ return optimization_result
5839
+
5840
+
5841
+ def _initialize_params_and_bounds(self, XX, dispersion):
5842
+ """Initialize parameters and set bounds for optimization."""
5843
+ num_params = XX.shape[2] # Number of features
5844
+ initial_params = np.random.uniform(-0.05, 0.05, size=num_params)
5845
+
5846
+ # Define bounds for optimization
5847
+ if dispersion == 0:
5848
+ bounds = [(-30, 30) for _ in initial_params]
5849
+ elif dispersion == 1:
5850
+ bounds = [(-30, 30) for _ in initial_params[:-1]] + [(-1, 5)]
5851
+ elif dispersion == 2:
5852
+ bounds = [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
5853
+ else:
5854
+ bounds = None
5855
+
5856
+ return initial_params, bounds
5857
+
5858
+
5859
+ def _prepare_data(self, mod):
5860
+ """Prepare data matrices (XX, XX_test) and outcomes (y, y_test)."""
5861
+ y = mod.get('y')
5862
+ y_test = mod.get('y_test')
5863
+
5864
+ # Combine main data matrices
5865
+ XX = self._combine_data_matrices(mod)
5866
+
5867
+ # Combine test data matrices
5868
+ if mod.get('X_test') is not None and mod.get('Xr_test') is not None:
5869
+ if 'XH' in mod:
5870
+ XX_test = np.concatenate(
5871
+ (mod.get('X_test'), mod.get('Xr_test'), mod.get('XH_test')), axis=2
5872
+ )
5873
+ else:
5874
+ XX_test = np.concatenate((mod.get('X_test'), mod.get('Xr_test')), axis=2)
5875
+ else:
5876
+ XX_test = None
5877
+
5878
+ return XX, XX_test, y, y_test
5879
+
5880
+ def _handle_error(self, e):
5881
+ """Handle exceptions and log errors."""
5882
+ import sys, os
5883
+ exc_type, exc_obj, exc_tb = sys.exc_info()
5884
+ fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
5885
+ print(f"Error: {e}, File: {fname}, Line: {exc_tb.tb_lineno}")
5886
+
5887
+
5888
+
5889
+
5890
+
5891
+ def _postprocess_results(self, optimization_result, XX, XX_test, y, y_test, dispersion, mod):
5892
+ """
5893
+ Process optimization results and calculate metrics.
5894
+
5895
+ Parameters:
5896
+ optimization_result: The result of the optimization process.
5897
+ XX: Design matrix for training data.
5898
+ XX_test: Design matrix for test data (if applicable).
5899
+ y: Observed outcomes for training data.
5900
+ y_test: Observed outcomes for test data (if applicable).
5901
+ dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
5902
+ mod: Dictionary containing additional model data.
5903
+
5904
+ Returns:
5905
+ log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
5906
+ """
5907
+ if optimization_result is not None and np.isfinite(optimization_result['fun']):
5908
+ # Calculate post-fit metrics (log-likelihood, AIC, BIC, etc.)
5909
+ log_ll, aic, bic, stderr, zvalues, pvalue_alt, other_measures = self._post_fit_ll_aic_bic(
5910
+ optimization_result, simple_fit=False, is_dispersion=dispersion
5911
+ )
5912
+
5913
+ # Validation metrics if test data is available (in-sample and out-of-sample MAE)
5914
+ in_sample_mae = None
5915
+ out_sample_mae = None
5916
+ if self.is_multi and XX_test is not None:
5917
+ in_sample_mae = self.validation(
5918
+ optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
5919
+ )
5920
+ out_sample_mae = self.validation(
5921
+ optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
5922
+ )
5923
+
5924
+ return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
5925
+
5926
+ else:
5927
+ # Optimization failed, return None for all metrics
5928
+ print("Optimization failed.")
5929
+ return None, None, None, None, None, None, None, None
5930
+ def _prepare_data_and_bounds(self, mod, dispersion):
5931
+ """Prepare the data matrices, bounds, and initial parameters."""
5932
+ # Prepare data matrices
5933
+ XX = self._combine_data_matrices(mod) # Combine mod['X'], mod['Xr'], mod['XH'], etc.
5934
+
5935
+ # Set initial parameters
5936
+ initial_params = self._initialize_parameters(XX, dispersion)
5937
+
5938
+ # Define bounds for optimization
5939
+ bounds = self._set_bounds(initial_params, dispersion)
5940
+
5941
+ return XX, bounds, initial_params
5942
+
5943
+
5944
+ def _combine_data_matrices(self, mod):
5945
+ """Combine data matrices (X, Xr, XH, etc.) into a single matrix."""
5946
+ if 'XG' in mod:
5947
+ return np.concatenate((mod.get('X'), mod.get('XG'), mod.get('Xr'), mod.get('XH')), axis=2)
5948
+ elif 'XH' in mod:
5949
+ return np.concatenate((mod.get('X'), mod.get('Xr'), mod.get('XH')), axis=2)
5950
+ else:
5951
+ return np.concatenate((mod.get('X'), mod.get('Xr')), axis=2)
5952
+
5953
+
5954
+ def _initialize_parameters(self, XX, dispersion):
5955
+ """Initialize random parameters for optimization."""
5956
+ num_params = XX.shape[2] # Number of features
5957
+ return np.random.uniform(-0.05, 0.05, size=num_params)
5958
+
5959
+
5960
+ def _set_bounds(self, initial_params, dispersion):
5961
+ """Set bounds for optimization based on the dispersion type."""
5962
+ return None
5963
+ if dispersion == 0:
5964
+ return [(-30, 30) for _ in initial_params]
5965
+ elif dispersion == 1:
5966
+ return [(-30, 30) for _ in initial_params[:-1]] + [(-1, 5)]
5967
+ elif dispersion == 2:
5968
+ return [(-5, 5) for _ in initial_params[:-1]] + [(0.1, 0.99)]
5969
+ else:
5970
+ return None
5971
+ def _build_test_matrix(self, mod):
5972
+ """
5973
+ Build the test matrix `XX_test` by combining `X_test`, `Xr_test`, and `XH_test`.
5974
+
5975
+ Parameters:
5976
+ mod: Dictionary containing test data.
5977
+
5978
+ Returns:
5979
+ Combined test matrix `XX_test`.
5980
+ """
5981
+ X_test, Xr_test, XG_test, XH_test = (
5982
+ mod.get('X_test'), mod.get('Xr_test'), mod.get('XG_test'), mod.get('XH_test')
5983
+ )
5984
+ if X_test is None or Xr_test is None:
5985
+ return None
5986
+
5987
+ if XH_test is not None:
5988
+ return np.concatenate((X_test, Xr_test, XH_test), axis=2)
5989
+ elif XG_test is not None:
5990
+ return np.concatenate((X_test, XG_test, Xr_test), axis=2)
5991
+ else:
5992
+ return np.concatenate((X_test, Xr_test), axis=2)
5993
+
5994
+ def _calculate_num_coefficients(self, mod, dispersion):
5995
+ """
5996
+ Calculate the total number of coefficients for the regression model.
5997
+
5998
+ Parameters:
5999
+ mod: Dictionary containing data and parameters.
6000
+ dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
6001
+
6002
+ Returns:
6003
+ Total number of coefficients.
6004
+ """
6005
+ X, Xr, XG, XH = mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
6006
+ n, p, k = X.shape
6007
+ kr = Xr.shape[2] if Xr is not None else 0
6008
+ kg = XG.shape[2] if XG is not None else 0
6009
+ kh = XH.shape[2] if XH is not None else 0
6010
+
6011
+ # Dispersion adds one additional parameter if enabled
6012
+ dispersion_param = 1 if dispersion > 0 else 0
6013
+ return sum(self.get_num_params()) + dispersion_param
6014
+ #return k + kr + kg + kh + dispersion_param
6015
+ def _build_initial_params(self, num_coefficients, dispersion):
6016
+ """
6017
+ Build the initial parameter array for optimization.
6018
+
6019
+ Parameters:
6020
+ num_coefficients: Total number of coefficients.
6021
+ dispersion: Dispersion parameter (0=Poisson, 1=NB, 2=GP).
6022
+
6023
+ Returns:
6024
+ Initial parameter array.
6025
+ """
6026
+ # Generate random initial coefficients
6027
+ initial_params = np.random.uniform(-0.05, 0.05, size=num_coefficients)
6028
+
6029
+ # Add dispersion parameter if applicable
6030
+ if dispersion > 0:
6031
+ initial_params = np.insert(initial_params, -1, 0.)
6032
+
6033
+ return initial_params
6034
+
6035
+ def fitRegression(self, mod, dispersion=0, maxiter=4000, batch_size=None, num_hess=False, **kwargs):
6036
+ """
6037
+ Fits a Poisson regression, NB regression (dispersion=1), or GP regression (dispersion=2).
6038
+
6039
+ Parameters:
6040
+ mod: Dictionary containing data and parameters.
6041
+ dispersion: 0 for Poisson, 1 for NB, 2 for GP.
6042
+ maxiter: Maximum number of optimization iterations.
6043
+ batch_size: Batch size for certain methods (if applicable).
6044
+ num_hess: Whether to compute the numerical Hessian.
6045
+
6046
+ Returns:
6047
+ obj_1, log_lik, betas, stderr, pvalues, zvalues, is_halton, is_delete
6048
+ """
6049
+ try:
6050
+ # Preprocessing
6051
+ tol = {'ftol': 1e-8, 'gtol': 1e-6}
6052
+ y, X, Xr, XG, XH = mod.get('y'), mod.get('X'), mod.get('Xr'), mod.get('XG'), mod.get('XH')
6053
+
6054
+ # Validate input data
6055
+ if y is None or X is None:
6056
+ raise ValueError("Both `y` and `X` must be provided in the `mod` dictionary.")
6057
+
6058
+ # Build the design matrix `XX` and test matrix `XX_test` if applicable
6059
+ XX = self._build_design_matrix(mod)
6060
+ XX_test = self._build_test_matrix(mod) if self.is_multi else None
6061
+
6062
+ # Determine the number of coefficients
6063
+ num_coefficients = self._calculate_num_coefficients(mod, dispersion)
6064
+
6065
+ # Build initial parameters and bounds
6066
+ initial_params = self._build_initial_params(num_coefficients, dispersion)
6067
+ bounds = self._set_bounds(initial_params, dispersion)
6068
+
6069
+
6070
+ # Run optimization
6071
+ optimization_result = self._run_optimization(
6072
+ XX, y, dispersion, initial_params, bounds, tol, mod
6073
+ )
6074
+
6075
+ # Post-process results
6076
+ log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
6077
+ optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
6078
+ )
6079
+
6080
+ # Extract other outputs
6081
+ betas = optimization_result['x'] if optimization_result is not None else None
6082
+ is_halton = Xr is not None and Xr.size > 0 # Halton draws used if `Xr` is not empty
6083
+
6084
+ # Determine `is_delete`
6085
+ is_delete = not (
6086
+ optimization_result is not None
6087
+ and 'fun' in optimization_result
6088
+ and not math.isnan(optimization_result['fun'])
6089
+ and not math.isinf(optimization_result['fun'])
6090
+ )
6091
+
6092
+ betas_est = optimization_result
6093
+
6094
+ # Post-fit metrics
6095
+ log_ll, aic, bic, stderr, zvalues, pvalue_alt, other_measures = self._post_fit_ll_aic_bic(
6096
+ betas_est, simple_fit=False, is_dispersion=dispersion
6097
+ )
6098
+
6099
+ # Number of parameters
6100
+ paramNum = len(betas_est['x'])
6101
+
6102
+ # Naming for printing (optional, for formatting or debugging purposes)
6103
+ self.convergance = not is_delete
6104
+ self.naming_for_printing(betas_est['x'], 0, dispersion, model_nature=mod)
6105
+
6106
+ # Add metrics to solution object
6107
+ sol = Solution() # Assuming Solution is the appropriate class to store results
6108
+ sol.add_objective(
6109
+ bic=bic,
6110
+ aic=aic,
6111
+ loglik=log_ll,
6112
+ num_parm=paramNum,
6113
+ GOF=other_measures
6114
+ )
6115
+
6116
+
6117
+ return (
6118
+ sol, # obj_1
6119
+ log_lik,
6120
+ betas,
6121
+ stderr,
6122
+ pvalues,
6123
+ zvalues,
6124
+ is_halton,
6125
+ is_delete
6126
+ )
6127
+
6128
+ except Exception as e:
6129
+ self._handle_error(e)
6130
+ return None, None, None, None, None, None, None, 0
6131
+
6132
+
6133
+ def _prepare_draws(self, Xr, distribution):
6134
+ """
6135
+ Prepare the draws for the random effects.
6136
+
6137
+ Parameters:
6138
+ Xr: Random effect design matrix.
6139
+ distribution: Distribution type for the random effects.
6140
+
6141
+ Returns:
6142
+ Draws matrix or None if `Xr` is not provided.
6143
+ """
6144
+ if Xr is None or Xr.size == 0:
6145
+ return None
6146
+
6147
+ n_samples, n_features, n_random_effects = Xr.shape
6148
+ return self.prepare_halton(
6149
+ n_random_effects, n_samples, self.Ndraws, distribution, long=False, slice_this_way=self.group_halton
6150
+ )
6151
+
6152
+ def _prepare_grouped_draws(self, XG, mod):
6153
+ """
6154
+ Prepare the grouped draws for the regression model.
6155
+
6156
+ Parameters:
6157
+ XG: Grouped design matrix.
6158
+ mod: Dictionary containing additional data.
6159
+
6160
+ Returns:
6161
+ Grouped draws matrix.
6162
+ """
6163
+ n_samples, n_features, n_groups = XG.shape
6164
+ if n_features == 0:
6165
+ return None
6166
+ group_distribution = mod.get('dist_fit_grouped', np.zeros(n_groups))
6167
+
6168
+ return self.prepare_halton(
6169
+ n_groups, n_samples, self.Ndraws, group_distribution, slice_this_way=self.group_halton
6170
+ )
6171
+
6172
+ def fitRegression_o(self, mod,
6173
+ dispersion=0, maxiter=2000, batch_size=None, num_hess=False, **kwargs):
6174
+
6175
+ """
6176
+ Fits a poisson regression given data and outcomes if dispersion is not declared
6177
+ if declared, fits a NB (dispersion = 1) regression or GP (disperions = 2)
6178
+
6179
+ #TODO lineraregression
6180
+ Inputs:
6181
+ X - array. Design matrix
6182
+ y - array. Observed outcomes
6183
+ Outputs:
6184
+ betas_est - array. Coefficients which maximize the negative log-liklihood.
6185
+ """
6186
+ # Set defualt method
6187
+ #TODO, the inital fit worked but it throws
6188
+ tol = {'ftol': 1e-8, 'gtol': 1e-6}
6189
+
6190
+
6191
+
6192
+ sol = Solution()
6193
+
6194
+ tol = {'ftol': 1e-8, 'gtol': 1e-6}
6195
+ is_delete = 0
6196
+ dispersion = mod.get('dispersion')
6197
+ y = mod.get('y')
6198
+ try:
6199
+ method = self.method_ll
6200
+ method2 = self.method_ll
6201
+ # method2 = 'BFGS_2'
6202
+ if self.hess_yes == False:
6203
+ method2 = 'BFGS_2'
6204
+ method2 = self.method_ll
6205
+
6206
+ bic = None
6207
+ pvalue_alt = None
6208
+ zvalues = None
6209
+ if mod.get('Xr') is not None or mod.get('XG') is not None or mod.get('XH') is not None:
6210
+ calc_gradient = True
6211
+ calc_gradient = False if self.linear_regression else True
5581
6212
  n, p, k = mod.get('X').shape
5582
6213
  _r, pr, kr = mod.get('Xr').shape
5583
6214
  kh = mod.get('XH').shape[2]
@@ -5675,7 +6306,7 @@ class ObjectiveFunction(object):
5675
6306
  initial_beta['hess_inv'] = initial_beta.hess_inv.todense() if hasattr(initial_beta.hess_inv,
5676
6307
  'todense') else np.array(
5677
6308
  [initial_beta.hess_inv(np.eye(len(bb))[i]) for i in range(len(bb))])
5678
-
6309
+
5679
6310
  bb = initial_beta['x'].copy()
5680
6311
 
5681
6312
  if initial_beta is not None and np.isnan(initial_beta['fun']):
@@ -6066,7 +6697,8 @@ class ObjectiveFunction(object):
6066
6697
  def transformer(self, transform, idc, x_data):
6067
6698
  if transform == 0 or transform == 1 or transform == 'no':
6068
6699
  tr = x_data.astype(float)
6069
-
6700
+ elif transform == 'nil':
6701
+ tr = x_data.astype(float)
6070
6702
  elif transform == 'log':
6071
6703
  tr = np.log1p(x_data.astype(float))
6072
6704
  elif transform == 'exp':
@@ -6083,7 +6715,8 @@ class ObjectiveFunction(object):
6083
6715
  tr = pd.Series(tr)
6084
6716
 
6085
6717
  else: # will be a number
6086
- tr = np.power(x_data.astype(float), transform)
6718
+ tr = x_data.astype(float)
6719
+ transform = 'nil'
6087
6720
  # if tr.isin([np.inf, -np.inf, np.nan, None]).any() == True:
6088
6721
 
6089
6722
  if np.any(np.logical_or(pd.isna(tr), np.logical_or(pd.isna(tr), tr is None))):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: metacountregressor
3
- Version: 0.1.235
3
+ Version: 0.1.237
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -0,0 +1 @@
1
+ 0.1.237
@@ -1 +0,0 @@
1
- 0.1.235