PyPI - metacountregressor - Versions diffs - 0.1.307__py3-none-any.whl → 0.1.309__py3-none-any.whl - Mend

metacountregressor 0.1.307py3-none-any.whl → 0.1.309py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

metacountregressor/helperprocess.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import csv
 import matplotlib.pyplot as plt
 from scipy import stats as st
-from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
 import os
 import shutil
 plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
                 # Apply custom function
                 data = data.apply(settings['apply_func'])
             output_df[column] = data
+        elif settings['type'] == 'normalized':
+            # Normalize the column
+            scaler = MinMaxScaler
+            output_df[column] = scaler.fit_transform(df[[column]]).flatten()
         elif settings['type'] == 'none':
             # Leave the column unchanged
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
             # Otherwise, fallback to continuous standardization
             return {
                 'type': 'continuous',
-                'apply_func': (lambda x: (x - series.mean()) / series.std())  # Z-Score Standardization
+                'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
             }
     else:
         # Default fallback (leave the column unchanged)

metacountregressor/main.py CHANGED Viewed

@@ -187,7 +187,7 @@ def main(args, **kwargs):
         a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
         # some example argument, these are defualt so the following line is just for claritity
         args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
-                'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6, 'desicions':a_des}
+                'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
         # Fit the model with metacountregressor
         # Step 5: Transform the dataset based on the configuration
         #data_new = helperprocess.transform_dataframe(dataset, config)

metacountregressor/metaheuristics.py CHANGED Viewed

@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
         self.iter = kwargs.get('_max_iter', 10000)
         self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
         self.instance_number = str(kwargs.get('instance_number', 1))
+        self.instance_number = objective_function.instance_number
         self.get_directory()
         self._population = list()

metacountregressor/solution.py CHANGED Viewed

@@ -30,6 +30,7 @@ from scipy.special import gammaln
 from sklearn.metrics import mean_absolute_error as MAE
 from sklearn.metrics import mean_squared_error as MSPE
 from statsmodels.tools.numdiff import approx_fprime, approx_hess
+from autograd import hessian as autograd_hessian
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from texttable import Texttable
 import time
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
     def __init__(self, x_data, y_data, **kwargs):
         self.gbl_best = 1000000.0
+        self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
         self.run_bootstrap =  kwargs.get('run_bootstrap', False)
         self.linear_regression = kwargs.get('linear_model', False)
         self.reg_penalty = kwargs.get('reg_penalty',1)
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
         self.MP = 0
         # Nelder-Mead-BFGS
-        self._max_characteristics = kwargs.get('_max_vars', 30)
+        self._max_characteristics = kwargs.get('_max_vars', 90)
         self.beta_dict = dict
         if 'model_terms' in kwargs:
@@ -611,11 +613,12 @@ class ObjectiveFunction(object):
         Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
         """
         if hard_code:
+            # Grouped Terrs TODO
             manual_fit_spec = {
                 'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
                 'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
                 'rdm_cor_terms': [],
-                'grouped_terms': [],
+                'group_rdm': [],
                 'hetro_in_means': [],
                 'transformations': ['no', 'log', 'log', 'no', 'no', 'no', 'no'],
                 'dispersion': 1
@@ -637,7 +640,7 @@ class ObjectiveFunction(object):
                         'fixed_terms': ['const'],
                         'rdm_terms': [],
                         'rdm_cor_terms': [],
-                        'grouped_terms': [],
+                        'group_rdm': [],
                         'hetro_in_means': [],
                         'transformations': ['no'],
                         'dispersion': 1
@@ -5601,13 +5604,42 @@ class ObjectiveFunction(object):
         return covariance
+        # Numerical Hessian (finite differences)
+    def numerical_hessian_post(self, f, theta, epsilon=1e-5):
+        n = len(theta)
+        hessian = np.zeros((n, n))
+        for i in range(n):
+            for j in range(n):
+                theta_ij_plus = theta.copy()
+                theta_ij_minus = theta.copy()
+                theta_ij_plus[i] += epsilon
+                theta_ij_plus[j] += epsilon
+                theta_ij_minus[i] += epsilon
+                theta_ij_minus[j] -= epsilon
+                f_ij_plus = f(theta_ij_plus)
+                f_ij_minus = f(theta_ij_minus)
+                f_original = f(theta)
+                hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
+        return hessian
     def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
         # sample_size = len(self._x_data) - len(optim_res['x']) -1
         sample_size = len(self._x_data)
         convergence = optim_res['success']
         coeff_ = optim_res['x']
         penalty = 0
+        stderr_opg = None
+        if self.run_numerical_hessian:
+            stderr_opg = self.stderr
         if 'hess_inv' in optim_res:
             covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
                 if robust else optim_res['hess_inv']
@@ -5616,9 +5648,11 @@ class ObjectiveFunction(object):
         covariance = self.handle_covariance(covariance)
         covariance = np.clip(covariance, 0, None)
         stderr = np.sqrt(np.diag(covariance))
-        # stderr =  [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
-        # stderr = [if np.abs(optim_res['x'][i]) > 0.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
-        # stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
+        if stderr_opg:
+            stderr = np.minimum(stderr, stderr_opg)
         if is_dispersion:
             stderr[-1] = random.uniform(0.001, 0.005)
@@ -5911,6 +5945,9 @@ class ObjectiveFunction(object):
         else:
             self.draws = 0
+    def hessian_loglik_function(self, params, *args):
+        return self._loglik_gradient(params, *args)
     def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
         """
         Run the optimization process with draws logic and update the Solution object.
@@ -5940,7 +5977,7 @@ class ObjectiveFunction(object):
         #method = 'Nelder-Mead-BFGS'
-        options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
+        options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
         args=(
                 X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
                 self.rdm_cor_fit, None, None, draws_grouped, XG, mod
@@ -5955,9 +5992,24 @@ class ObjectiveFunction(object):
             ),
             method=method,
             bounds=bounds,
-            tol=tol.get('ftol', 1e-8),  # Use 'ftol' as the default tolerance
+            tol=tol.get('ftol', 1e-6),  # Use 'ftol' as the default tolerance
             options=options
         )
+        if self.run_numerical_hessian:
+            std_errors = self.bootstrap_std_dev(
+                initial_params=optimization_result.x,
+                XX=XX,
+                y=y,
+                dispersion=dispersion,
+                bounds=bounds,
+                tol=tol,
+                mod=mod,
+                n_bootstraps=5
+            )
+            self.stderr = std_errors
@@ -6031,8 +6083,8 @@ class ObjectiveFunction(object):
                 ),
                 method=self.method_ll,
                 bounds=bounds,
-                tol=tol.get('ftol', 1e-8),  # Use 'ftol' as the default tolerance
-                options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
+                tol=tol.get('ftol', 1e-6),  # Use 'ftol' as the default tolerance
+                options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
             )
             # Store the parameter estimates from this bootstrap iteration
@@ -6121,6 +6173,7 @@ class ObjectiveFunction(object):
             # Validation metrics if test data is available (in-sample and out-of-sample MAE)
             in_sample_mae = None
             out_sample_mae = None
+            out_sample_validation = None
             if self.is_multi and XX_test is not None:
                 in_sample_mae = self.validation(
                     optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
@@ -6128,8 +6181,12 @@ class ObjectiveFunction(object):
                 out_sample_mae = self.validation(
                     optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
                 )
+                if self.val_percentage > 0:
+                    out_sample_validation = self.validation(
+                        optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
+                    )
-            return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
+            return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
         else:
             # Optimization failed, return None for all metrics
@@ -6224,7 +6281,8 @@ class ObjectiveFunction(object):
         # Dispersion adds one additional parameter if enabled
         dispersion_param = 1 if dispersion > 0 else 0
-        return sum(self.get_num_params()) + dispersion_param
+        total = sum(self.get_num_params()) + dispersion_param
+        return total
     def _build_initial_params(self, num_coefficients, dispersion):
         """
@@ -6238,11 +6296,11 @@ class ObjectiveFunction(object):
             Initial parameter array.
         """
         # Generate random initial coefficients
-        initial_params = np.random.uniform(-.1, 0.1, size=num_coefficients)
+        initial_params = np.random.uniform(0.0000, 0.01, size=num_coefficients)
         parma_sum = sum(self.get_num_params()[:2])
-        initial_params[parma_sum:-dispersion] =0.5
+        initial_params[parma_sum:-dispersion] =0.0001
         # Add dispersion parameter if applicable
         if dispersion > 0:
@@ -6251,7 +6309,7 @@ class ObjectiveFunction(object):
         return initial_params
-    def fitRegression(self, mod, dispersion=0, maxiter=4000, batch_size=None, num_hess=False, **kwargs):
+    def fitRegression(self, mod, dispersion=0, maxiter=20000, batch_size=None, num_hess=False, **kwargs):
         """
         Fits a Poisson regression, NB regression (dispersion=1), or GP regression (dispersion=2).
@@ -6293,7 +6351,7 @@ class ObjectiveFunction(object):
             )
             # Post-process results
-            log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
+            log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
                 optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
             )
@@ -6325,10 +6383,14 @@ class ObjectiveFunction(object):
             # Add metrics to solution object
             sol = Solution()  # Assuming Solution is the appropriate class to store results
             sol.add_objective(
                 bic=bic,
                 aic=aic,
                 loglik=log_ll,
+                TRAIN=in_sample_mae,
+                TEST=out_sample_mae,
+                VAL=out_sample_val,
                 num_parm=paramNum,
                 GOF=other_measures
             )

{metacountregressor-0.1.307.dist-info → metacountregressor-0.1.309.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacountregressor
-Version: 0.1.307
+Version: 0.1.309
 Summary: Extensive Testing for Estimation of Data Count Models
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern

{metacountregressor-0.1.307.dist-info → metacountregressor-0.1.309.dist-info}/RECORD RENAMED Viewed

@@ -3,18 +3,18 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
 metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
 metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
 metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
-metacountregressor/helperprocess.py,sha256=ufdB6BcCIYN6btWdxyFlRCReuYEbVh6es1sdLsd8RTg,25917
-metacountregressor/main.py,sha256=xfpKN2w0kePHp_Q2HOPjtG15PLEN1L3sEnDw1PHBquw,23668
+metacountregressor/helperprocess.py,sha256=8PFxX3KTsWH0MlfhniDzKQOJQ63LmJ0eg6cYhQP_fRA,26162
+metacountregressor/main.py,sha256=tGOm8DdbdyDf316qIxDAre6l6GzfJIWYNYIBaSeIemI,23685
 metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
-metacountregressor/metaheuristics.py,sha256=eVlP9FO8StVxj7D6m8n6ekRR45sOtjZuoakr5tzb-H4,106944
+metacountregressor/metaheuristics.py,sha256=P0Xjlvhp1cEwZFACrqeeets6x8BK7F2iDyu1OfS4bog,107010
 metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
 metacountregressor/pareto_logger__plot.py,sha256=mEU2QN4wmsM7t39GJ_XhJ_jjsdl09JOmG0U2jICrAkI,30037
 metacountregressor/setup.py,sha256=5UcQCCLR8Fm5odA3MX78WwahavxFq4mVD6oq0IuQvAY,936
 metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
-metacountregressor/solution.py,sha256=3YaugVfEcOQnrtqY5chH-qhBl_2DmI8CatZyjFdQngA,317534
+metacountregressor/solution.py,sha256=YRskJOR7MU50z22mdt5J9KLMmzHXZNXGnHRMLQPc3R0,319113
 metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
-metacountregressor-0.1.307.dist-info/licenses/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-metacountregressor-0.1.307.dist-info/METADATA,sha256=478JkHo4OCeggDG7O0ujZ0HMi_NLzHGpSBvGH3WIyBU,23581
-metacountregressor-0.1.307.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
-metacountregressor-0.1.307.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
-metacountregressor-0.1.307.dist-info/RECORD,,
+metacountregressor-0.1.309.dist-info/licenses/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+metacountregressor-0.1.309.dist-info/METADATA,sha256=vLvLKlMnboMQGkDkupIo-Uwr9gx-rdM5HuEvrt08uMs,23581
+metacountregressor-0.1.309.dist-info/WHEEL,sha256=DK49LOLCYiurdXXOXwGJm6U4DkHkg4lcxjhqwRa0CP4,91
+metacountregressor-0.1.309.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
+metacountregressor-0.1.309.dist-info/RECORD,,

{metacountregressor-0.1.307.dist-info → metacountregressor-0.1.309.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (77.0.3)
+Generator: setuptools (78.0.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{metacountregressor-0.1.307.dist-info → metacountregressor-0.1.309.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.307.dist-info → metacountregressor-0.1.309.dist-info}/top_level.txt RENAMED Viewed

File without changes

metacountregressor 0.1.307__py3-none-any.whl → 0.1.309__py3-none-any.whl

metacountregressor 0.1.307py3-none-any.whl → 0.1.309py3-none-any.whl