PyPI - metacountregressor - Versions diffs - 0.1.121__py3-none-any.whl → 0.1.123__py3-none-any.whl - Mend

metacountregressor 0.1.121py3-none-any.whl → 0.1.123py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

metacountregressor/helperprocess.py CHANGED Viewed

@@ -2,6 +2,7 @@ import numpy as np
 import pandas as pd
 import csv
 import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler
 plt.style.use('https://github.com/dhaitz/matplotlib-stylesheets/raw/master/pitayasmoothie-dark.mplstyle')
@@ -151,6 +152,99 @@ def remove_files(yes=1):
             os.remove('pop_log.csv')
+# Function to process the DataFrame
+'''
+Example usuage
+# Configuration dictionary
+config = {
+    'Age': {
+        'type': 'bin',
+        'bins': [0, 18, 35, 50, 100],
+        'labels': ['Child', 'YoungAdult', 'MiddleAged', 'Senior'],
+        'prefix': 'Age_Binned'
+    },
+    'Income': {
+        'type': 'bin',
+        'bins': [0, 2000, 5000, 10000],
+        'labels': ['Low', 'Medium', 'High'],
+        'prefix': 'Income_Binned'
+    },
+    'Gender': {
+        'type': 'one-hot',
+        'prefix': 'Gender'
+    },
+    'Score': {
+        'type': 'none'
+    }
+}
+'''
+def transform_dataframe(df, config):
+    output_df = pd.DataFrame()
+    for column, settings in config.items():
+        if settings['type'] == 'bin':
+            # Apply binning
+            binned = pd.cut(
+                df[column],
+                bins=settings['bins'],
+                labels=settings['labels'],
+                right=False
+            )
+            # One-hot encode the binned column
+            binned_dummies = pd.get_dummies(binned, prefix=settings['prefix'])
+            output_df = pd.concat([output_df, binned_dummies], axis=1)
+        elif settings['type'] == 'one-hot':
+            # One-hot encode the column
+            one_hot_dummies = pd.get_dummies(df[column], prefix=settings.get('prefix', column))
+            output_df = pd.concat([output_df, one_hot_dummies], axis=1)
+        elif settings['type'] == 'continuous':
+            # Apply function to continuous data
+            data = df[column]
+            if 'bounds' in settings:
+                # Apply bounds filtering
+                lower, upper = settings['bounds']
+                data = data[(data >= lower) & (data <= upper)]
+            if 'apply_func' in settings:
+                # Apply custom function
+                data = data.apply(settings['apply_func'])
+            output_df[column] = data
+        elif settings['type'] == 'none':
+            # Leave the column unchanged
+            output_df = pd.concat([output_df, df[[column]]], axis=1)
+    return output_df
+# Helper function to guess column type and update `config`
+def guess_column_type(column_name, series):
+    if series.dtype == 'object' or series.dtype.name == 'category':
+        # If the column is categorical (e.g., strings), assume one-hot encoding
+        return {'type': 'one-hot', 'prefix': column_name}
+    elif pd.api.types.is_numeric_dtype(series):
+        unique_values = series.nunique()
+        if unique_values < 10:
+            # If there are few unique values, assume binning with default bins
+            min_val, max_val = series.min(), series.max()
+            bins = np.linspace(min_val, max_val, num=unique_values + 1)
+            labels = [f'Bin_{i}' for i in range(1, len(bins))]
+            return {'type': 'bin', 'bins': bins, 'labels': labels, 'prefix': f'{column_name}_Binned'}
+        else:
+           # # Otherwise, assume continuous data with normalization
+            # Otherwise, fallback to continuous standardization
+            return {
+                'type': 'continuous',
+                'apply_func': (lambda x: (x - series.mean()) / series.std())  # Z-Score Standardization
+            }
+    else:
+        # Default fallback (leave the column unchanged)
+        return {'type': 'none'}
 def as_wide_factor(x_df, yes=1, min_factor=2, max_factor=8, keep_original=0, exclude=[]):
     if not yes:
         return x_df
@@ -330,3 +424,5 @@ def entries_to_remove(entries, the_dict):
     for key in entries:
         if key in the_dict:
             del the_dict[key]

metacountregressor/main.py CHANGED Viewed

@@ -28,12 +28,60 @@ def convert_df_columns_to_binary_and_wide(df):
     return df
-def process_arguments():
+def process_arguments(**kwargs):
     '''
     TRYING TO TURN THE CSV FILES INTO RELEVANT ARGS
     '''
-    data_characteristic = pd.read_csv('problem_data.csv')
-    analyst_d = pd.read_csv('decisions.csv')
+    #dataset
+    if kwargs.get('dataset_file', False
+    ):
+        dataset = pd.read_csv(kwargs.get('dataset_file'))
+        named_data_headers = dataset.columns.tolist()
+        decision_constants = {name: list(range(7)) for name in named_data_headers}
+        data_info = {
+            'AADT': {
+                'type': 'continuous',
+                'bounds': [0.0, np.infty],
+                'discrete': False,
+                'apply_func': (lambda x: np.log(x + 1)),
+            },
+            'SPEED': {
+                'type': 'continuous',
+                'bounds': [0, 100],
+                'enforce_bounds': True,
+                'discrete': True
+            },
+            'TIME': {
+                'type': 'continuous',
+                'bounds': [0, 23.999],
+                'discrete': False
+            }
+        }
+        #remove ID CoLUMNS from dataset
+        dataset = dataset.drop(columns = [
+            'ID'
+        ])
+        for c in dataset.columns:
+            if c not in data_info.keys():
+                data_info[c] = {'type': 'categorical'}
+        data_new  =helperprocess.transform_dataframe(dataset,data_info)
+        update_constant = kwargs.get('analyst_constraints')
+        #update the decision_constraints
+    data_characteristic = pd.read_csv(kwargs.get('problem_data', 'problem_data.csv'))
+    # Extract the column as a list of characteristic names
+    name_data_characteristics = data_characteristic.columns.tolist()
+    # Create the dictionary
+    decision_constraints = {name: list(range(7)) for name in name_data_characteristics}
+    print('this gets all the features, I need to remove...')
+    analyst_d = pd.read_csv(kwargs.get('decison_constraints', 'decisions.csv'))
     hyper = pd.read_csv('setup_hyper.csv')
     new_data = {'data': data_characteristic,
@@ -41,7 +89,7 @@ def process_arguments():
                 'hyper': hyper}
     return new_data
-def process_package_argumemnts():
+def process_package_arguments():
     new_data = {}
     pass
@@ -319,8 +367,8 @@ def main(args, **kwargs):
         x_df = helperprocess.interactions(x_df, keep)
-    else:  # the dataset has been selected in the program as something else
-        data_info = process_arguments()
+    elif dataset ==10:  # the dataset has been selected in the program as something else
+        data_info = process_arguments(**args)
         data_info['hyper']
         data_info['analyst']
         data_info['data']['Y']
@@ -339,6 +387,10 @@ def main(args, **kwargs):
         y_df = df[[data_info['data']['Y'][0]]]
         y_df.rename(columns={data_info['data']['Y'][0]: "Y"}, inplace=True)
         print('test') #FIXME
+    else:
+        print('PROCESS THE PACKAGE ARGUMENTS SIMULIAR TO HOW ONE WOULD DEFINE THE ENVIRONMENT')
+        data_info =process_package_arguments()
     if args['Keep_Fit'] == str(2) or args['Keep_Fit'] == 2:
         if manual_fit_spec is None:
@@ -449,6 +501,8 @@ if __name__ == '__main__':
     BATCH_JOB = True
     if BATCH_JOB:
+        parser.add_argument('-dataset_file', default='data/Ex-16-3.csv', help='supply the path to the dataset')
         parser.add_argument('-line', type=int, default=1,
                             help='line to read in csv to pass in argument')
@@ -463,6 +517,7 @@ if __name__ == '__main__':
                 line_number_obs += 1
             args = dict(args)
             for key, value in args.items():
                 try:
                     # Attempt to parse the string value to a Python literal if value is a string.
@@ -479,7 +534,7 @@ if __name__ == '__main__':
                 if "-algorithm" in action.option_strings:
                     parser._optionals._actions[i].help = "optimization algorithm"
-            override = False
+            override = True
             if override:
                 print('WARNING: TESTING ENVIRONMENT, TURN OFF FOR RELEASE')
                 parser.add_argument('-problem_number', default='10')
@@ -494,9 +549,10 @@ if __name__ == '__main__':
             parser.add_argument('-seperate_out_factors', action='store_false', default=False,
                                 help='Trie of wanting to split data that is potentially categorical as binary'
                                     ' we want to split the data for processing')
-            parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directorys')
+            parser.add_argument('-supply_csv', type = str, help = 'enter the name of the csv, please include it as a full directories')
     else:  # DIDN"T SPECIFY LINES TRY EACH ONE MANNUALY
+        print("RUNNING WITH ARGS")
         parser.add_argument('-com', type=str, default='MetaCode',
                             help='line to read csv')

{metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: metacountregressor
-Version: 0.1.121
+Version: 0.1.123
 Summary: Extensions for a Python package for estimation of count models.
 Home-page: https://github.com/zahern/CountDataEstimation
 Author: Zeke Ahern

{metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/RECORD RENAMED Viewed

@@ -3,8 +3,8 @@ metacountregressor/_device_cust.py,sha256=759fnKmTYccJm4Lpi9_1reurh6OB9d6q9soPR0
 metacountregressor/app_main.py,sha256=vY3GczTbGbBRalbzMkl_9jVW7RMgEOc6z2Dr1IZJv9c,10014
 metacountregressor/data_split_helper.py,sha256=M2fIMdIO8znUaYhx5wlacRyNWdQjNYu1z1wkE-kFUYU,3373
 metacountregressor/halton.py,sha256=jhovA45UBoZYU9g-hl6Lb2sBIx_ZBTNdPrpgkzR9fng,9463
-metacountregressor/helperprocess.py,sha256=Sc5gJ7ffFlkya5B5KQwE33xxXuIQyF6OaYtSikLa3pQ,12968
-metacountregressor/main.py,sha256=37yw2weAhaDR-wH83QC4Jy8SeUFIHpxqhO9YPwgmRi4,20764
+metacountregressor/helperprocess.py,sha256=4aSoyKP1GfzjwCzZ_dXlTbokOiMt_8sbzB6_tu0GPDg,16290
+metacountregressor/main.py,sha256=A3XGwbwhhKVgMxnEgbAmMpgYaWkS8Rk30-cYs3FxvEk,22713
 metacountregressor/main_old.py,sha256=eTS4ygq27MnU-dZ_j983Ucb-D5XfbVF8OJQK2hVVLZc,24123
 metacountregressor/metaheuristics.py,sha256=Kkx1Jfox6NBlm5zVrI26Vc_NI7NFQSS9dinrZU9SpV8,105871
 metacountregressor/pareto_file.py,sha256=whySaoPAUWYjyI8zo0hwAOa3rFk6SIUlHSpqZiLur0k,23096
@@ -13,8 +13,8 @@ metacountregressor/setup.py,sha256=8w6IqX0tJsbYrOI1BJLIJCIvOnunKli5I9fsF5PhHv4,9
 metacountregressor/single_objective_finder.py,sha256=jVG7GJBqzSP4_riYr-kMMKy_LE3SlGmKMunNhHYxgRg,8011
 metacountregressor/solution.py,sha256=OJqB00cvGMLFei6RsjphPamOdLm3EWOOzK7k-uVbvFY,277671
 metacountregressor/test_generated_paper2.py,sha256=pwOoRzl1jJIIOUAAvbkT6HmmTQ81mwpsshn9SLdKOg8,3927
-metacountregressor-0.1.121.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-metacountregressor-0.1.121.dist-info/METADATA,sha256=c-c5mHUC6gdf2JEq-DWBuw0F1gAp-Cq0pQeYVLKG_y8,23415
-metacountregressor-0.1.121.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-metacountregressor-0.1.121.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
-metacountregressor-0.1.121.dist-info/RECORD,,
+metacountregressor-0.1.123.dist-info/LICENSE.txt,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+metacountregressor-0.1.123.dist-info/METADATA,sha256=e4jQ9vtFxhHtA98q1Vd8PJ9gJiIz91iSUKgGPt78kg8,23415
+metacountregressor-0.1.123.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+metacountregressor-0.1.123.dist-info/top_level.txt,sha256=zGG7UC5WIpr76gsFUpwJ4En2aCcoNTONBaS3OewwjR0,19
+metacountregressor-0.1.123.dist-info/RECORD,,

{metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/LICENSE.txt RENAMED Viewed

File without changes

{metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/WHEEL RENAMED Viewed

File without changes

{metacountregressor-0.1.121.dist-info → metacountregressor-0.1.123.dist-info}/top_level.txt RENAMED Viewed

File without changes

metacountregressor 0.1.121__py3-none-any.whl → 0.1.123__py3-none-any.whl

metacountregressor 0.1.121py3-none-any.whl → 0.1.123py3-none-any.whl