metacountregressor 0.1.306__tar.gz → 0.1.307__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {metacountregressor-0.1.306/metacountregressor.egg-info → metacountregressor-0.1.307}/PKG-INFO +1 -1
  2. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/helperprocess.py +6 -2
  3. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/main.py +1 -1
  4. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/metaheuristics.py +1 -0
  5. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/solution.py +73 -12
  6. {metacountregressor-0.1.306 → metacountregressor-0.1.307/metacountregressor.egg-info}/PKG-INFO +1 -1
  7. metacountregressor-0.1.307/version.txt +1 -0
  8. metacountregressor-0.1.306/version.txt +0 -1
  9. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/LICENSE.txt +0 -0
  10. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/MANIFEST.in +0 -0
  11. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/README.md +0 -0
  12. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/README.rst +0 -0
  13. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/__init__.py +0 -0
  14. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/_device_cust.py +0 -0
  15. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/app_main.py +0 -0
  16. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/data_split_helper.py +0 -0
  17. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/halton.py +0 -0
  18. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/main_old.py +0 -0
  19. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/pareto_file.py +0 -0
  20. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/pareto_logger__plot.py +0 -0
  21. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/setup.py +0 -0
  22. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/single_objective_finder.py +0 -0
  23. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor/test_generated_paper2.py +0 -0
  24. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor.egg-info/SOURCES.txt +0 -0
  25. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor.egg-info/dependency_links.txt +0 -0
  26. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor.egg-info/not-zip-safe +0 -0
  27. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor.egg-info/requires.txt +0 -0
  28. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/metacountregressor.egg-info/top_level.txt +0 -0
  29. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/setup.cfg +0 -0
  30. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/setup.py +0 -0
  31. {metacountregressor-0.1.306 → metacountregressor-0.1.307}/tests/test.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metacountregressor
3
- Version: 0.1.306
3
+ Version: 0.1.307
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -4,7 +4,7 @@ import pandas as pd
4
4
  import csv
5
5
  import matplotlib.pyplot as plt
6
6
  from scipy import stats as st
7
- from sklearn.preprocessing import StandardScaler
7
+ from sklearn.preprocessing import StandardScaler, MinMaxScaler
8
8
  import os
9
9
  import shutil
10
10
  plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
413
413
  # Apply custom function
414
414
  data = data.apply(settings['apply_func'])
415
415
  output_df[column] = data
416
+ elif settings['type'] == 'normalized':
417
+ # Normalize the column
418
+ scaler = MinMaxScaler
419
+ output_df[column] = scaler.fit_transform(df[[column]]).flatten()
416
420
 
417
421
  elif settings['type'] == 'none':
418
422
  # Leave the column unchanged
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
447
451
  # Otherwise, fallback to continuous standardization
448
452
  return {
449
453
  'type': 'continuous',
450
- 'apply_func': (lambda x: (x - series.mean()) / series.std()) # Z-Score Standardization
454
+ 'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
451
455
  }
452
456
  else:
453
457
  # Default fallback (leave the column unchanged)
@@ -187,7 +187,7 @@ def main(args, **kwargs):
187
187
  a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
188
188
  # some example argument, these are defualt so the following line is just for claritity
189
189
  args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
190
- 'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6, 'desicions':a_des}
190
+ 'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
191
191
  # Fit the model with metacountregressor
192
192
  # Step 5: Transform the dataset based on the configuration
193
193
  #data_new = helperprocess.transform_dataframe(dataset, config)
@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
422
422
  self.iter = kwargs.get('_max_iter', 10000)
423
423
  self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
424
424
  self.instance_number = str(kwargs.get('instance_number', 1))
425
+ self.instance_number = objective_function.instance_number
425
426
  self.get_directory()
426
427
 
427
428
  self._population = list()
@@ -30,6 +30,7 @@ from scipy.special import gammaln
30
30
  from sklearn.metrics import mean_absolute_error as MAE
31
31
  from sklearn.metrics import mean_squared_error as MSPE
32
32
  from statsmodels.tools.numdiff import approx_fprime, approx_hess
33
+ from autograd import hessian as autograd_hessian
33
34
  from sklearn.preprocessing import StandardScaler, MinMaxScaler
34
35
  from texttable import Texttable
35
36
  import time
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
123
124
 
124
125
  def __init__(self, x_data, y_data, **kwargs):
125
126
  self.gbl_best = 1000000.0
127
+ self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
126
128
  self.run_bootstrap = kwargs.get('run_bootstrap', False)
127
129
  self.linear_regression = kwargs.get('linear_model', False)
128
130
  self.reg_penalty = kwargs.get('reg_penalty',1)
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
186
188
  self.MP = 0
187
189
  # Nelder-Mead-BFGS
188
190
 
189
- self._max_characteristics = kwargs.get('_max_vars', 30)
191
+ self._max_characteristics = kwargs.get('_max_vars', 90)
190
192
 
191
193
  self.beta_dict = dict
192
194
  if 'model_terms' in kwargs:
@@ -611,7 +613,7 @@ class ObjectiveFunction(object):
611
613
  Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
612
614
  """
613
615
  if hard_code:
614
- # Grouped Terns TODO
616
+ # Grouped Terrs TODO
615
617
  manual_fit_spec = {
616
618
  'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
617
619
  'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
@@ -5602,13 +5604,42 @@ class ObjectiveFunction(object):
5602
5604
  return covariance
5603
5605
 
5604
5606
 
5607
+ # Numerical Hessian (finite differences)
5608
+ def numerical_hessian_post(self, f, theta, epsilon=1e-5):
5609
+ n = len(theta)
5610
+ hessian = np.zeros((n, n))
5611
+ for i in range(n):
5612
+ for j in range(n):
5613
+ theta_ij_plus = theta.copy()
5614
+ theta_ij_minus = theta.copy()
5615
+ theta_ij_plus[i] += epsilon
5616
+ theta_ij_plus[j] += epsilon
5617
+ theta_ij_minus[i] += epsilon
5618
+ theta_ij_minus[j] -= epsilon
5619
+
5620
+ f_ij_plus = f(theta_ij_plus)
5621
+ f_ij_minus = f(theta_ij_minus)
5622
+ f_original = f(theta)
5623
+
5624
+ hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
5625
+ return hessian
5626
+
5627
+
5605
5628
  def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
5606
5629
  # sample_size = len(self._x_data) - len(optim_res['x']) -1
5607
5630
  sample_size = len(self._x_data)
5608
5631
  convergence = optim_res['success']
5609
5632
  coeff_ = optim_res['x']
5610
5633
  penalty = 0
5634
+ stderr_opg = None
5635
+ if self.run_numerical_hessian:
5636
+
5637
+ stderr_opg = self.stderr
5638
+
5611
5639
 
5640
+
5641
+
5642
+
5612
5643
  if 'hess_inv' in optim_res:
5613
5644
  covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
5614
5645
  if robust else optim_res['hess_inv']
@@ -5617,9 +5648,11 @@ class ObjectiveFunction(object):
5617
5648
  covariance = self.handle_covariance(covariance)
5618
5649
  covariance = np.clip(covariance, 0, None)
5619
5650
  stderr = np.sqrt(np.diag(covariance))
5620
- # stderr = [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
5621
- # stderr = [if np.abs(optim_res['x'][i]) > 0.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
5622
- # stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
5651
+ if stderr_opg:
5652
+ stderr = np.minimum(stderr, stderr_opg)
5653
+
5654
+
5655
+
5623
5656
  if is_dispersion:
5624
5657
  stderr[-1] = random.uniform(0.001, 0.005)
5625
5658
 
@@ -5912,6 +5945,9 @@ class ObjectiveFunction(object):
5912
5945
  else:
5913
5946
  self.draws = 0
5914
5947
 
5948
+ def hessian_loglik_function(self, params, *args):
5949
+ return self._loglik_gradient(params, *args)
5950
+
5915
5951
  def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
5916
5952
  """
5917
5953
  Run the optimization process with draws logic and update the Solution object.
@@ -5941,7 +5977,7 @@ class ObjectiveFunction(object):
5941
5977
 
5942
5978
 
5943
5979
  #method = 'Nelder-Mead-BFGS'
5944
- options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
5980
+ options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
5945
5981
  args=(
5946
5982
  X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
5947
5983
  self.rdm_cor_fit, None, None, draws_grouped, XG, mod
@@ -5956,9 +5992,24 @@ class ObjectiveFunction(object):
5956
5992
  ),
5957
5993
  method=method,
5958
5994
  bounds=bounds,
5959
- tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
5995
+ tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
5960
5996
  options=options
5961
5997
  )
5998
+ if self.run_numerical_hessian:
5999
+ std_errors = self.bootstrap_std_dev(
6000
+ initial_params=optimization_result.x,
6001
+ XX=XX,
6002
+ y=y,
6003
+ dispersion=dispersion,
6004
+ bounds=bounds,
6005
+ tol=tol,
6006
+ mod=mod,
6007
+ n_bootstraps=5
6008
+ )
6009
+ self.stderr = std_errors
6010
+
6011
+
6012
+
5962
6013
 
5963
6014
 
5964
6015
 
@@ -6032,8 +6083,8 @@ class ObjectiveFunction(object):
6032
6083
  ),
6033
6084
  method=self.method_ll,
6034
6085
  bounds=bounds,
6035
- tol=tol.get('ftol', 1e-8), # Use 'ftol' as the default tolerance
6036
- options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
6086
+ tol=tol.get('ftol', 1e-6), # Use 'ftol' as the default tolerance
6087
+ options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
6037
6088
  )
6038
6089
 
6039
6090
  # Store the parameter estimates from this bootstrap iteration
@@ -6122,6 +6173,7 @@ class ObjectiveFunction(object):
6122
6173
  # Validation metrics if test data is available (in-sample and out-of-sample MAE)
6123
6174
  in_sample_mae = None
6124
6175
  out_sample_mae = None
6176
+ out_sample_validation = None
6125
6177
  if self.is_multi and XX_test is not None:
6126
6178
  in_sample_mae = self.validation(
6127
6179
  optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
@@ -6129,8 +6181,12 @@ class ObjectiveFunction(object):
6129
6181
  out_sample_mae = self.validation(
6130
6182
  optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
6131
6183
  )
6184
+ if self.val_percentage > 0:
6185
+ out_sample_validation = self.validation(
6186
+ optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
6187
+ )
6132
6188
 
6133
- return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
6189
+ return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
6134
6190
 
6135
6191
  else:
6136
6192
  # Optimization failed, return None for all metrics
@@ -6225,7 +6281,8 @@ class ObjectiveFunction(object):
6225
6281
 
6226
6282
  # Dispersion adds one additional parameter if enabled
6227
6283
  dispersion_param = 1 if dispersion > 0 else 0
6228
- return sum(self.get_num_params()) + dispersion_param
6284
+ total = sum(self.get_num_params()) + dispersion_param
6285
+ return total
6229
6286
 
6230
6287
  def _build_initial_params(self, num_coefficients, dispersion):
6231
6288
  """
@@ -6294,7 +6351,7 @@ class ObjectiveFunction(object):
6294
6351
  )
6295
6352
 
6296
6353
  # Post-process results
6297
- log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
6354
+ log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
6298
6355
  optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
6299
6356
  )
6300
6357
 
@@ -6326,10 +6383,14 @@ class ObjectiveFunction(object):
6326
6383
 
6327
6384
  # Add metrics to solution object
6328
6385
  sol = Solution() # Assuming Solution is the appropriate class to store results
6386
+
6329
6387
  sol.add_objective(
6330
6388
  bic=bic,
6331
6389
  aic=aic,
6332
6390
  loglik=log_ll,
6391
+ TRAIN=in_sample_mae,
6392
+ TEST=out_sample_mae,
6393
+ VAL=out_sample_val,
6333
6394
  num_parm=paramNum,
6334
6395
  GOF=other_measures
6335
6396
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: metacountregressor
3
- Version: 0.1.306
3
+ Version: 0.1.307
4
4
  Summary: Extensive Testing for Estimation of Data Count Models
5
5
  Home-page: https://github.com/zahern/CountDataEstimation
6
6
  Author: Zeke Ahern
@@ -0,0 +1 @@
1
+ 0.1.307
@@ -1 +0,0 @@
1
- 0.1.306