metacountregressor 0.1.306__tar.gz → 0.1.308__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {metacountregressor-0.1.306/metacountregressor.egg-info → metacountregressor-0.1.308}/PKG-INFO +1 -1
  2. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/helperprocess.py +6 -2
  3. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/main.py +1 -1
  4. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/metaheuristics.py +1 -0
  5. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/solution.py +92 -16
  6. {metacountregressor-0.1.306 → metacountregressor-0.1.308/metacountregressor.egg-info}/PKG-INFO +1 -1
  7. metacountregressor-0.1.308/version.txt +1 -0
  8. metacountregressor-0.1.306/version.txt +0 -1
  9. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/LICENSE.txt +0 -0
  10. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/MANIFEST.in +0 -0
  11. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/README.md +0 -0
  12. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/README.rst +0 -0
  13. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/__init__.py +0 -0
  14. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/_device_cust.py +0 -0
  15. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/app_main.py +0 -0
  16. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/data_split_helper.py +0 -0
  17. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/halton.py +0 -0
  18. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/main_old.py +0 -0
  19. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_file.py +0 -0
  20. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_logger__plot.py +0 -0
  21. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/setup.py +0 -0
  22. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/single_objective_finder.py +0 -0
  23. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/test_generated_paper2.py +0 -0
  24. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/SOURCES.txt +0 -0
  25. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/dependency_links.txt +0 -0
  26. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/not-zip-safe +0 -0
  27. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/requires.txt +0 -0
  28. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/top_level.txt +0 -0
  29. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/setup.cfg +0 -0
  30. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/setup.py +0 -0
  31. {metacountregressor-0.1.306 → metacountregressor-0.1.308}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metacountregressor
3
- Version: 0.1.306
3
+ Version: 0.1.308
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  import csv
5
5
  import matplotlib.pyplot as plt
6
6
  from scipy import stats as st
7
- from sklearn.preprocessing import StandardScaler
7
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
8
8
  import os
9
9
  import shutil
10
10
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
413
413
  # Apply custom function
414
414
  data = data.apply(settings['apply_func'])
415
415
  output_df[column] = data
416
+ elif settings['type'] == 'normalized':
417
+ # Normalize the column
418
+ scaler = MinMaxScaler
419
+ output_df[column] = scaler.fit_transform(df[[column]]).flatten()
416
420
 
417
421
  elif settings['type'] == 'none':
418
422
  # Leave the column unchanged
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
447
451
  # Otherwise, fallback to continuous standardization
448
452
  return {
449
453
  'type': 'continuous',
450
- 'apply_func': (lambda x: (x - series.mean()) / series.std()) # Z-Score Standardization
454
+ 'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
451
455
  }
452
456
  else:
453
457
  # Default fallback (leave the column unchanged)
@@ -187,7 +187,7 @@ def main(args, **kwargs):
187
187
  a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
188
188
  # some example argument, these are defualt so the following line is just for claritity
189
189
  args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
190
- 'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6, 'desicions':a_des}
190
+ 'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
191
191
  # Fit the model with metacountregressor
192
192
  # Step 5: Transform the dataset based on the configuration
193
193
  #data_new = helperprocess.transform_dataframe(dataset, config)
@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
422
422
  self.iter = kwargs.get('_max_iter', 10000)
423
423
  self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
424
424
  self.instance_number = str(kwargs.get('instance_number', 1))
425
+ self.instance_number = objective_function.instance_number
425
426
  self.get_directory()
426
427
 
427
428
  self._population = list()
@@ -30,6 +30,7 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
+
33
34
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
35
  from texttable import Texttable
35
36
  import time
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
123
124
 
124
125
  def __init__(self, x_data, y_data, **kwargs):
125
126
  self.gbl_best = 1000000.0
127
+ self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
126
128
  self.run_bootstrap = kwargs.get('run_bootstrap', False)
127
129
  self.linear_regression = kwargs.get('linear_model', False)
128
130
  self.reg_penalty = kwargs.get('reg_penalty',1)
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
186
188
  self.MP = 0
187
189
  # Nelder-Mead-BFGS
188
190
 
189
- self._max_characteristics = kwargs.get('_max_vars', 30)
191
+ self._max_characteristics = kwargs.get('_max_vars', 90)
190
192
 
191
193
  self.beta_dict = dict
192
194
  if 'model_terms' in kwargs:
@@ -453,7 +455,7 @@ class ObjectiveFunction(object):
453
455
  self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh", "nil"])
454
456
  # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
455
457
 
456
- self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
458
+ self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'tn_normal'])
457
459
 
458
460
  if self.G is not None:
459
461
  #TODO need to handle this for groups
@@ -611,7 +613,7 @@ class ObjectiveFunction(object):
611
613
  Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
612
614
  """
613
615
  if hard_code:
614
- # Grouped Terns TODO
616
+ # Grouped Terrs TODO
615
617
  manual_fit_spec = {
616
618
  'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
617
619
  'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
@@ -5058,11 +5060,12 @@ class ObjectiveFunction(object):
5058
5060
  proba_ = proba_n.sum(axis =1)
5059
5061
 
5060
5062
  """""
5061
- betas_last = betas[-1]
5063
+ main_disper = self.get_dispersion_paramaters(betas, dispersion)
5064
+
5062
5065
 
5063
5066
  # print(betas_last)
5064
5067
  proba_, proba_n = self.prob_obs_draws_all_at_once(
5065
- eVd, np.atleast_3d(y), betas_last, dispersion)
5068
+ eVd, np.atleast_3d(y), main_disper, dispersion)
5066
5069
  # self._prob_product_against_panels()
5067
5070
 
5068
5071
  # print(top_stats)
@@ -5602,13 +5605,42 @@ class ObjectiveFunction(object):
5602
5605
  return covariance
5603
5606
 
5604
5607
 
5608
+ # Numerical Hessian (finite differences)
5609
+ def numerical_hessian_post(self, f, theta, epsilon=1e-5):
5610
+ n = len(theta)
5611
+ hessian = np.zeros((n, n))
5612
+ for i in range(n):
5613
+ for j in range(n):
5614
+ theta_ij_plus = theta.copy()
5615
+ theta_ij_minus = theta.copy()
5616
+ theta_ij_plus[i] += epsilon
5617
+ theta_ij_plus[j] += epsilon
5618
+ theta_ij_minus[i] += epsilon
5619
+ theta_ij_minus[j] -= epsilon
5620
+
5621
+ f_ij_plus = f(theta_ij_plus)
5622
+ f_ij_minus = f(theta_ij_minus)
5623
+ f_original = f(theta)
5624
+
5625
+ hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
5626
+ return hessian
5627
+
5628
+
5605
5629
  def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
5606
5630
  # sample_size = len(self._x_data) - len(optim_res['x']) -1
5607
5631
  sample_size = len(self._x_data)
5608
5632
  convergence = optim_res['success']
5609
5633
  coeff_ = optim_res['x']
5610
5634
  penalty = 0
5635
+ stderr_opg = None
5636
+ if self.run_numerical_hessian:
5637
+
5638
+ stderr_opg = self.stderr
5639
+
5611
5640
 
5641
+
5642
+
5643
+
5612
5644
  if 'hess_inv' in optim_res:
5613
5645
  covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5614
5646
  if robust else optim_res['hess_inv']
@@ -5617,9 +5649,11 @@ class ObjectiveFunction(object):
5617
5649
  covariance = self.handle_covariance(covariance)
5618
5650
  covariance = np.clip(covariance, 0, None)
5619
5651
  stderr = np.sqrt(np.diag(covariance))
5620
- # stderr = [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
5621
- # stderr = [if np.abs(optim_res['x'][i]) > 0.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
5622
- # stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
5652
+ if stderr_opg is not None:
5653
+ stderr = np.minimum(stderr, stderr_opg)
5654
+
5655
+
5656
+
5623
5657
  if is_dispersion:
5624
5658
  stderr[-1] = random.uniform(0.001, 0.005)
5625
5659
 
@@ -5912,6 +5946,9 @@ class ObjectiveFunction(object):
5912
5946
  else:
5913
5947
  self.draws = 0
5914
5948
 
5949
+ def hessian_loglik_function(self, params, *args):
5950
+ return self._loglik_gradient(params, *args)
5951
+
5915
5952
  def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
5916
5953
  """
5917
5954
  Run the optimization process with draws logic and update the Solution object.
@@ -5941,7 +5978,7 @@ class ObjectiveFunction(object):
5941
5978
 
5942
5979
 
5943
5980
  #method = 'Nelder-Mead-BFGS'
5944
- options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
5981
+ options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
5945
5982
  args=(
5946
5983
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5947
5984
  self.rdm_cor_fit, None, None, draws_grouped, XG, mod
@@ -5956,9 +5993,38 @@ class ObjectiveFunction(object):
5956
5993
  ),
5957
5994
  method=method,
5958
5995
  bounds=bounds,
5959
- tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
5996
+ tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
5960
5997
  options=options
5961
5998
  )
5999
+ if optimization_result.message == 'NaN result encountered.':
6000
+ optimization_result = self._minimize(self._loglik_gradient,
6001
+ initial_params,
6002
+ args=(
6003
+ X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
6004
+ self.rdm_cor_fit, None, None, draws_grouped, XG, mod
6005
+ ),
6006
+ method='Nelder-Mead-BFGS',
6007
+ bounds=bounds,
6008
+ tol=tol.get('ftol', 1e-4), # Use 'ftol' as the default tolerance
6009
+ options=options
6010
+ )
6011
+
6012
+
6013
+ if self.run_numerical_hessian:
6014
+ std_errors = self.bootstrap_std_dev(
6015
+ initial_params=optimization_result.x,
6016
+ XX=XX,
6017
+ y=y,
6018
+ dispersion=dispersion,
6019
+ bounds=bounds,
6020
+ tol=tol,
6021
+ mod=mod,
6022
+ n_bootstraps=5
6023
+ )
6024
+ self.stderr = std_errors
6025
+
6026
+
6027
+
5962
6028
 
5963
6029
 
5964
6030
 
@@ -6032,8 +6098,8 @@ class ObjectiveFunction(object):
6032
6098
  ),
6033
6099
  method=self.method_ll,
6034
6100
  bounds=bounds,
6035
- tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
6036
- options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
6101
+ tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
6102
+ options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
6037
6103
  )
6038
6104
 
6039
6105
  # Store the parameter estimates from this bootstrap iteration
@@ -6122,6 +6188,7 @@ class ObjectiveFunction(object):
6122
6188
  # Validation metrics if test data is available (in-sample and out-of-sample MAE)
6123
6189
  in_sample_mae = None
6124
6190
  out_sample_mae = None
6191
+ out_sample_validation = None
6125
6192
  if self.is_multi and XX_test is not None:
6126
6193
  in_sample_mae = self.validation(
6127
6194
  optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
@@ -6129,13 +6196,17 @@ class ObjectiveFunction(object):
6129
6196
  out_sample_mae = self.validation(
6130
6197
  optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
6131
6198
  )
6199
+ if self.val_percentage > 0:
6200
+ out_sample_validation = self.validation(
6201
+ optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
6202
+ )
6132
6203
 
6133
- return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
6204
+ return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
6134
6205
 
6135
6206
  else:
6136
6207
  # Optimization failed, return None for all metrics
6137
6208
  print("Optimization failed.")
6138
- return None, None, None, None, None, None, None, None
6209
+ return None, None, None, None, None, None, None, None, None
6139
6210
  def _prepare_data_and_bounds(self, mod, dispersion):
6140
6211
  """Prepare the data matrices, bounds, and initial parameters."""
6141
6212
  # Prepare data matrices
@@ -6225,7 +6296,8 @@ class ObjectiveFunction(object):
6225
6296
 
6226
6297
  # Dispersion adds one additional parameter if enabled
6227
6298
  dispersion_param = 1 if dispersion > 0 else 0
6228
- return sum(self.get_num_params()) + dispersion_param
6299
+ total = sum(self.get_num_params()) + dispersion_param
6300
+ return total
6229
6301
 
6230
6302
  def _build_initial_params(self, num_coefficients, dispersion):
6231
6303
  """
@@ -6294,7 +6366,7 @@ class ObjectiveFunction(object):
6294
6366
  )
6295
6367
 
6296
6368
  # Post-process results
6297
- log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
6369
+ log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
6298
6370
  optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
6299
6371
  )
6300
6372
 
@@ -6326,10 +6398,14 @@ class ObjectiveFunction(object):
6326
6398
 
6327
6399
  # Add metrics to solution object
6328
6400
  sol = Solution() # Assuming Solution is the appropriate class to store results
6401
+
6329
6402
  sol.add_objective(
6330
6403
  bic=bic,
6331
6404
  aic=aic,
6332
6405
  loglik=log_ll,
6406
+ TRAIN=in_sample_mae,
6407
+ TEST=out_sample_mae,
6408
+ VAL=out_sample_val,
6333
6409
  num_parm=paramNum,
6334
6410
  GOF=other_measures
6335
6411
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metacountregressor
3
- Version: 0.1.306
3
+ Version: 0.1.308
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -0,0 +1 @@
1
+ 0.1.308
@@ -1 +0,0 @@
1
- 0.1.306