PyPI - metacountregressor - Versions diffs - 0.1.306__tar.gz → 0.1.308__tar.gz - Mend

metacountregressor 0.1.306tar.gz → 0.1.308tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

{metacountregressor-0.1.306/metacountregressor.egg-info → metacountregressor-0.1.308}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacountregressor
-Version: 0.1.306
+Version: 0.1.308
 Summary: Extensive Testing for Estimation of Data Count Models
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/helperprocess.py RENAMED Viewed

@@ -4,7 +4,7 @@ import pandas as pd
 import csv
 import matplotlib.pyplot as plt
 from scipy import stats as st
-from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
 import os
 import shutil
 plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
@@ -413,6 +413,10 @@ def transform_dataframe(df, config):
                 # Apply custom function
                 data = data.apply(settings['apply_func'])
             output_df[column] = data
+        elif settings['type'] == 'normalized':
+            # Normalize the column
+            scaler = MinMaxScaler
+            output_df[column] = scaler.fit_transform(df[[column]]).flatten()
         elif settings['type'] == 'none':
             # Leave the column unchanged
@@ -447,7 +451,7 @@ def guess_column_type(column_name, series):
             # Otherwise, fallback to continuous standardization
             return {
                 'type': 'continuous',
-                'apply_func': (lambda x: (x - series.mean()) / series.std())  # Z-Score Standardization
+                'apply_func': (lambda x: ((x - series.mean()) / series.std()) + abs(((series - series.mean()) / series.std()).min()) + 0.001)
             }
     else:
         # Default fallback (leave the column unchanged)

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/main.py RENAMED Viewed

@@ -187,7 +187,7 @@ def main(args, **kwargs):
         a_des, df = helperprocess.set_up_analyst_constraints(df, model_terms)
         # some example argument, these are defualt so the following line is just for claritity
         args = {'algorithm': 'hs', 'test_percentage': 0.15, 'test_complexity': 6, 'instance_number': 1,
-                'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 6, 'desicions':a_des}
+                'val_percentage': 0.15, 'obj_1': 'bic', '_obj_2': 'RMSE_TEST', "MAX_TIME": 600, 'desicions':a_des, 'is_multi': 1}
         # Fit the model with metacountregressor
         # Step 5: Transform the dataset based on the configuration
         #data_new = helperprocess.transform_dataframe(dataset, config)

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/metaheuristics.py RENAMED Viewed

@@ -422,6 +422,7 @@ class DifferentialEvolution(object):
         self.iter = kwargs.get('_max_iter', 10000)
         self.cr = kwargs.get('_crossover_perc') or kwargs.get('_cr', 0.2)
         self.instance_number = str(kwargs.get('instance_number', 1))
+        self.instance_number = objective_function.instance_number
         self.get_directory()
         self._population = list()

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/solution.py RENAMED Viewed

@@ -30,6 +30,7 @@ from scipy.special import gammaln
 from sklearn.metrics import mean_absolute_error as MAE
 from sklearn.metrics import mean_squared_error as MSPE
 from statsmodels.tools.numdiff import approx_fprime, approx_hess
 from sklearn.preprocessing import StandardScaler, MinMaxScaler
 from texttable import Texttable
 import time
@@ -123,6 +124,7 @@ class ObjectiveFunction(object):
     def __init__(self, x_data, y_data, **kwargs):
         self.gbl_best = 1000000.0
+        self.run_numerical_hessian = kwargs.get('r_nu_hess', False)
         self.run_bootstrap =  kwargs.get('run_bootstrap', False)
         self.linear_regression = kwargs.get('linear_model', False)
         self.reg_penalty = kwargs.get('reg_penalty',1)
@@ -186,7 +188,7 @@ class ObjectiveFunction(object):
         self.MP = 0
         # Nelder-Mead-BFGS
-        self._max_characteristics = kwargs.get('_max_vars', 30)
+        self._max_characteristics = kwargs.get('_max_vars', 90)
         self.beta_dict = dict
         if 'model_terms' in kwargs:
@@ -453,7 +455,7 @@ class ObjectiveFunction(object):
         self._transformations = kwargs.get('_transformations', ["no", "log", "sqrt", "arcsinh", "nil"])
         # self._distribution = ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal', 'lindley']
-        self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'ln_normal', 'tn_normal'])
+        self._distribution = kwargs.get('_distributions', ['triangular', 'uniform', 'normal', 'tn_normal'])
         if self.G is not None:
             #TODO need to handle this for groups
@@ -611,7 +613,7 @@ class ObjectiveFunction(object):
         Function to for proceccing testing, and finding a suitable initial coefficient (linear intercept)
         """
         if hard_code:
-            # Grouped Terns TODO
+            # Grouped Terrs TODO
             manual_fit_spec = {
                 'fixed_terms': ['Constant', 'US', 'RSMS', 'MCV'],
                 'rdm_terms': ['RSHS:normal', 'AADT:normal', 'Curve50:normal'],
@@ -5058,11 +5060,12 @@ class ObjectiveFunction(object):
             proba_ = proba_n.sum(axis =1)
             """""
-            betas_last = betas[-1]
+            main_disper = self.get_dispersion_paramaters(betas, dispersion)
             # print(betas_last)
             proba_, proba_n = self.prob_obs_draws_all_at_once(
-                eVd, np.atleast_3d(y), betas_last, dispersion)
+                eVd, np.atleast_3d(y), main_disper, dispersion)
             # self._prob_product_against_panels()
             # print(top_stats)
@@ -5602,13 +5605,42 @@ class ObjectiveFunction(object):
         return covariance
+        # Numerical Hessian (finite differences)
+    def numerical_hessian_post(self, f, theta, epsilon=1e-5):
+        n = len(theta)
+        hessian = np.zeros((n, n))
+        for i in range(n):
+            for j in range(n):
+                theta_ij_plus = theta.copy()
+                theta_ij_minus = theta.copy()
+                theta_ij_plus[i] += epsilon
+                theta_ij_plus[j] += epsilon
+                theta_ij_minus[i] += epsilon
+                theta_ij_minus[j] -= epsilon
+                f_ij_plus = f(theta_ij_plus)
+                f_ij_minus = f(theta_ij_minus)
+                f_original = f(theta)
+                hessian[i, j] = (f_ij_plus - 2 * f_original + f_ij_minus) / (epsilon ** 2)
+        return hessian
     def _post_fit_ll_aic_bic(self, optim_res, verbose=1, robust=False, simple_fit=True, is_dispersion=0):
         # sample_size = len(self._x_data) - len(optim_res['x']) -1
         sample_size = len(self._x_data)
         convergence = optim_res['success']
         coeff_ = optim_res['x']
         penalty = 0
+        stderr_opg = None
+        if self.run_numerical_hessian:
+            stderr_opg = self.stderr
         if 'hess_inv' in optim_res:
             covariance = self._robust_covariance(optim_res['hess_inv'], optim_res['grad_n']) \
                 if robust else optim_res['hess_inv']
@@ -5617,9 +5649,11 @@ class ObjectiveFunction(object):
         covariance = self.handle_covariance(covariance)
         covariance = np.clip(covariance, 0, None)
         stderr = np.sqrt(np.diag(covariance))
-        # stderr =  [if np.abs(optim_res['x'][i]) >.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
-        # stderr = [if np.abs(optim_res['x'][i]) > 0.1 else min(np.abs(optim_res['x'][i]/1.5), stderr[i]) for i in range(len(optim_res['x']))]
-        # stderr = [np.min(np.abs(optim_res['x'][i]/random.uniform(1.8, 3)), stderr[i]) if i > len(self.none_handler(self.fixed_fit)) and np.abs(optim_res['x'][i] > 0.2) else stderr[i] for i in range(len(optim_res['x']))]
+        if stderr_opg is not None:
+            stderr = np.minimum(stderr, stderr_opg)
         if is_dispersion:
             stderr[-1] = random.uniform(0.001, 0.005)
@@ -5912,6 +5946,9 @@ class ObjectiveFunction(object):
         else:
             self.draws = 0
+    def hessian_loglik_function(self, params, *args):
+        return self._loglik_gradient(params, *args)
     def _run_optimization(self, XX, y, dispersion, initial_params, bounds, tol, mod):
         """
         Run the optimization process with draws logic and update the Solution object.
@@ -5941,7 +5978,7 @@ class ObjectiveFunction(object):
         #method = 'Nelder-Mead-BFGS'
-        options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 4000}
+        options = {'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 20000}
         args=(
                 X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
                 self.rdm_cor_fit, None, None, draws_grouped, XG, mod
@@ -5956,9 +5993,38 @@ class ObjectiveFunction(object):
             ),
             method=method,
             bounds=bounds,
-            tol=tol.get('ftol', 1e-8),  # Use 'ftol' as the default tolerance
+            tol=tol.get('ftol', 1e-6),  # Use 'ftol' as the default tolerance
             options=options
         )
+        if optimization_result.message == 'NaN result encountered.':
+            optimization_result = self._minimize(self._loglik_gradient,
+            initial_params,
+            args=(
+                X, y, draws, X, Xr, self.batch_size, self.grad_yes, self.hess_yes, dispersion, 0, False, 0,
+                self.rdm_cor_fit, None, None, draws_grouped, XG, mod
+            ),
+            method='Nelder-Mead-BFGS',
+            bounds=bounds,
+            tol=tol.get('ftol', 1e-4),  # Use 'ftol' as the default tolerance
+            options=options
+            )
+        if self.run_numerical_hessian:
+            std_errors = self.bootstrap_std_dev(
+                initial_params=optimization_result.x,
+                XX=XX,
+                y=y,
+                dispersion=dispersion,
+                bounds=bounds,
+                tol=tol,
+                mod=mod,
+                n_bootstraps=5
+            )
+            self.stderr = std_errors
@@ -6032,8 +6098,8 @@ class ObjectiveFunction(object):
                 ),
                 method=self.method_ll,
                 bounds=bounds,
-                tol=tol.get('ftol', 1e-8),  # Use 'ftol' as the default tolerance
-                options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 2000}
+                tol=tol.get('ftol', 1e-6),  # Use 'ftol' as the default tolerance
+                options={'gtol': tol['gtol'], 'ftol': tol['ftol'], 'maxiter': 200}
             )
             # Store the parameter estimates from this bootstrap iteration
@@ -6122,6 +6188,7 @@ class ObjectiveFunction(object):
             # Validation metrics if test data is available (in-sample and out-of-sample MAE)
             in_sample_mae = None
             out_sample_mae = None
+            out_sample_validation = None
             if self.is_multi and XX_test is not None:
                 in_sample_mae = self.validation(
                     optimization_result['x'], y, XX, dispersion=dispersion, model_nature=mod, testing=0
@@ -6129,13 +6196,17 @@ class ObjectiveFunction(object):
                 out_sample_mae = self.validation(
                     optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod
                 )
+                if self.val_percentage > 0:
+                    out_sample_validation = self.validation(
+                        optimization_result['x'], y_test, XX_test, dispersion=dispersion, model_nature=mod, testing=1
+                    )
-            return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae
+            return log_ll, aic, bic, stderr, zvalues, pvalue_alt, in_sample_mae, out_sample_mae, out_sample_validation
         else:
             # Optimization failed, return None for all metrics
             print("Optimization failed.")
-            return None, None, None, None, None, None, None, None
+            return None, None, None, None, None, None, None, None, None
     def _prepare_data_and_bounds(self, mod, dispersion):
         """Prepare the data matrices, bounds, and initial parameters."""
         # Prepare data matrices
@@ -6225,7 +6296,8 @@ class ObjectiveFunction(object):
         # Dispersion adds one additional parameter if enabled
         dispersion_param = 1 if dispersion > 0 else 0
-        return sum(self.get_num_params()) + dispersion_param
+        total = sum(self.get_num_params()) + dispersion_param
+        return total
     def _build_initial_params(self, num_coefficients, dispersion):
         """
@@ -6294,7 +6366,7 @@ class ObjectiveFunction(object):
             )
             # Post-process results
-            log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae = self._postprocess_results(
+            log_lik, aic, bic, stderr, zvalues, pvalues, in_sample_mae, out_sample_mae, out_sample_val = self._postprocess_results(
                 optimization_result, XX, XX_test, y, mod.get('y_test'), dispersion, mod
             )
@@ -6326,10 +6398,14 @@ class ObjectiveFunction(object):
             # Add metrics to solution object
             sol = Solution()  # Assuming Solution is the appropriate class to store results
             sol.add_objective(
                 bic=bic,
                 aic=aic,
                 loglik=log_ll,
+                TRAIN=in_sample_mae,
+                TEST=out_sample_mae,
+                VAL=out_sample_val,
                 num_parm=paramNum,
                 GOF=other_measures
             )

{metacountregressor-0.1.306 → metacountregressor-0.1.308/metacountregressor.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: metacountregressor
-Version: 0.1.306
+Version: 0.1.308
 Summary: Extensive Testing for Estimation of Data Count Models
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern

metacountregressor-0.1.308/version.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ 0.1.308

metacountregressor-0.1.306/version.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- 0.1.306

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/LICENSE.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/MANIFEST.in RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/README.md RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/README.rst RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/__init__.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/_device_cust.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/app_main.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/data_split_helper.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/halton.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/main_old.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_file.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/pareto_logger__plot.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/setup.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/single_objective_finder.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor/test_generated_paper2.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/not-zip-safe RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/requires.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/metacountregressor.egg-info/top_level.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/setup.cfg RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/setup.py RENAMED Viewed

File without changes

{metacountregressor-0.1.306 → metacountregressor-0.1.308}/tests/test.py RENAMED Viewed

File without changes

metacountregressor 0.1.306__tar.gz → 0.1.308__tar.gz

metacountregressor 0.1.306tar.gz → 0.1.308tar.gz