PyPI - junshan-kit - Versions diffs - 2.4.3__py2.py3-none-any.whl → 2.4.5__py2.py3-none-any.whl - Mend

junshan-kit 2.4.3py2.py3-none-any.whl → 2.4.5py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of junshan-kit might be problematic. Click here for more details.

Files changed (5) hide show

junshan_kit/DataProcessor.py CHANGED Viewed

@@ -146,6 +146,7 @@ class CSV_TO_Pandas:
         """
         # Step 0: Load the dataset
         df = pd.read_csv(csv_path)
+        columns = df.columns
         # Save original size
         m_original, n_original = df.shape
@@ -214,13 +215,17 @@ class CSV_TO_Pandas:
             print(f"{'Label column:':<40} {label_col}")
             print(f"{'label_map:':<40} {label_map}")
             print(f"{'time column:':<40} {time_info}")
-            if time_info:
+            if time_info is not None:
                 print(f"{'trans_type : int, optional, default=1'}")
-                print(f"{' - 0 : Extract [\'year\', \'month\', \'day\', \'hour\']':<10}")
-                print(f"{' - 1 : Extract [\'hour\', \'dayofweek\', \'is_weekend\']':<10}")
+                print(f"{- 0 : Extract ['year', 'month', 'day', 'hour']:<50}")
+                print(f"{ - 1 : Extract ['hour', 'dayofweek', 'is_weekend']:<50}")
             print(
                 f"{'text fetaure columns:':<40} {', '.join(list(text_feature_cols)) if list(text_feature_cols) else 'None'}"
             )
+            print("-" * 80)
+            print(
+                f"{'all columns:':<40} {', '.join(columns)}"
+            )
             print("=" * 80 + "\n")
         return df

junshan_kit/DataSets.py CHANGED Viewed

@@ -42,8 +42,8 @@ def _download_data(data_name, data_type):
     # unzip file
     junshan_kit.kit.unzip_file(f'./exp_data/{data_type}/{data_name}/{data_name}.zip', f'./exp_data/{data_type}/{data_name}')
-def _export_csv(df, data_name):
-    path = f'./exp_data/{data_name}/'
+def _export_csv(df, data_name, data_type):
+    path = f'./exp_data/{data_type}/{data_name}/'
     os.makedirs(path, exist_ok=True)
     df.to_csv(path + f'{data_name}_num.csv')
     print(path + f'{data_name}.csv')
@@ -61,7 +61,7 @@ def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_
     df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, user_one_hot_cols, print_info=print_info, time_info = time_info)
     if export_csv:
-        _export_csv(df, data_name)
+        _export_csv(df, data_name, data_type)
     return df
@@ -73,11 +73,10 @@ def _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_
 ----------------------------------------------------------------------
 """
-def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print_info = False, export_csv=False):
+def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print_info = False, export_csv=False, drop_cols = []):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/creditcard.csv'
-    drop_cols = []
     label_col = 'Class'
     label_map = {0: -1, 1: 1}
@@ -88,24 +87,22 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
     return df
-def diabetes_health_indicators(data_name = "Diabetes Health Indicators", print_info = False, export_csv = False):
+def diabetes_health_indicators(data_name = "Diabetes Health Indicators", print_info = False, export_csv = False, drop_cols = []):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/diabetes_dataset.csv'
-    drop_cols = []
     label_col = 'diagnosed_diabetes'
     label_map = {0: -1, 1: 1}
     df = _run(csv_path, data_name, data_type, drop_cols, label_col, label_map, print_info, export_csv=export_csv)
     return df
-def electric_vehicle_population(data_name = "Electric Vehicle Population", print_info = False, export_csv = False):
+def electric_vehicle_population(data_name = "Electric Vehicle Population", print_info = False, export_csv = False, drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/Electric_Vehicle_Population_Data.csv'
-    drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']
+    # drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']
     label_col = 'Electric Vehicle Type'
     label_map = {
     'Battery Electric Vehicle (BEV)': 1,
@@ -117,11 +114,10 @@ def electric_vehicle_population(data_name = "Electric Vehicle Population", print
     return df
-def global_house_purchase(data_name = "Global House Purchase", print_info = False, export_csv = False):
+def global_house_purchase(data_name = "Global House Purchase", print_info = False, export_csv = False, drop_cols = ['property_id']):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/global_house_purchase_dataset.csv'
-    drop_cols = ['property_id']
     label_col = 'decision'
     label_map = {0: -1, 1: 1}
@@ -131,11 +127,11 @@ def global_house_purchase(data_name = "Global House Purchase", print_info = Fals
     return df
-def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_csv = False):
+def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_csv = False, drop_cols = ['id']):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/health_lifestyle_dataset.csv'
-    drop_cols = ['id']
     label_col = 'disease_risk'
     label_map = {0: -1, 1: 1}
@@ -145,7 +141,7 @@ def health_lifestyle(data_name = "Health Lifestyle", print_info = False, export_
     return df
-def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Prediction", print_info = False, export_csv = False):
+def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Prediction", print_info = False, export_csv = False, drop_cols = ['alcohol_freq']):
     """
     1. The missing values in this dataset are handled by directly removing the corresponding column. Since the `alcohol_freq` column contains a large number of missing values, deleting the rows would result in significant data loss, so the entire column is dropped instead.
@@ -154,7 +150,7 @@ def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Predic
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/medical_insurance.csv'
-    drop_cols = ['alcohol_freq']
     label_col = 'is_high_risk'
     label_map = {0: -1, 1: 1}
@@ -164,11 +160,11 @@ def medical_insurance_cost_prediction(data_name = "Medical Insurance Cost Predic
     return df
-def particle_physics_event_classification(data_name = "Particle Physics Event Classification", print_info = False, export_csv = False):
+def particle_physics_event_classification(data_name = "Particle Physics Event Classification", print_info = False, export_csv = False, drop_cols = []):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/Particle Physics Event Classification.csv'
-    drop_cols = []
     label_col = 'Label'
     label_map = {'s': -1, 'b': 1}
@@ -179,11 +175,11 @@ def particle_physics_event_classification(data_name = "Particle Physics Event Cl
-def adult_income_prediction(data_name = "Adult Income Prediction", print_info = False, export_csv=False):
+def adult_income_prediction(data_name = "Adult Income Prediction", print_info = False, export_csv=False, drop_cols = []):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/adult.csv'
-    drop_cols = []
     label_col = 'income'
     label_map = {'<=50K': -1, '>50K': 1}
@@ -193,11 +189,11 @@ def adult_income_prediction(data_name = "Adult Income Prediction", print_info =
     return df
-def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False):
+def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info = False, export_csv = False, drop_cols = ['Unnamed: 0']):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/TNweather_1.8M.csv'
-    drop_cols = ['Unnamed: 0']
     label_col = 'rain_tomorrow'
     label_map = {0: -1, 1: 1}
@@ -214,11 +210,11 @@ def TamilNadu_weather_2020_2025(data_name = "TN Weather 2020-2025", print_info =
     return df
-def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False):
+def YouTube_Recommendation(data_name = "YouTube Recommendation", print_info = False, export_csv = False, drop_cols = ['user_id']):
     data_type = "binary"
     csv_path = f'./exp_data/{data_type}/{data_name}/youtube recommendation dataset.csv'
-    drop_cols = ['user_id']
     label_col = 'subscribed_after'
     label_map = {0: -1, 1: 1}

{junshan_kit-2.4.3.dist-info → junshan_kit-2.4.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: junshan_kit
-Version: 2.4.3
+Version: 2.4.5
 Summary: This is an optimization tool.
 Author-email: Junshan Yin <junshanyin@163.com>
 Requires-Dist: kaggle==1.7.4.5

{junshan_kit-2.4.3.dist-info → junshan_kit-2.4.5.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 junshan_kit/ComOptimizers.py,sha256=MUgFnm1DbbvNKv5-7nHJCLOfq4VjoNk1KLRR-ji5rOA,4637
-junshan_kit/DataProcessor.py,sha256=rp1zOTOoF98idwGM_QRzr8yC9M5fj1uyfVhdmV02kyQ,8962
-junshan_kit/DataSets.py,sha256=FJu0B90eiKdKKHQJr_aLNkOvwk4Vhk8mN73jYHZVEjc,8366
+junshan_kit/DataProcessor.py,sha256=oAbf9QsCaLjnN2yrcn8qGof56dSkEv9jHkWiB1CxfTw,9106
+junshan_kit/DataSets.py,sha256=PspH23YbB9cSuh5KQp7Dam3fWsfyH0pwL12nt7KN_tQ,8470
 junshan_kit/ExperimentHub.py,sha256=MKduxa7U16zMoavgS-lVOCL2ypcMLpAaD8k7JitNqRU,11493
 junshan_kit/Models.py,sha256=GRTunJON1vLQz2IxgsoOKvjP-3zSJJLuB3CkJTAiImo,6884
 junshan_kit/Print_Info.py,sha256=vogYcXvoGcRGZV-7svi_mtiCZH6c8d-RhbZLFrLbKr8,3012
@@ -11,6 +11,6 @@ junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 junshan_kit/check_args.py,sha256=7m4xSPAwqqQ0SPeKc-MCewDIDB7kFgsNYS2AuTTzGtk,3599
 junshan_kit/datahub.py,sha256=4c3P2TORMZ4va6NrSiojDCpnY_CGDlJV-5PG3u1_Isk,9081
 junshan_kit/kit.py,sha256=hpA4Zpn1VAuhdJSBBXswVum0CSk6QnB05GGLYoaRatQ,9792
-junshan_kit-2.4.3.dist-info/METADATA,sha256=YZLdpsItctrrUyByIXIS8pTtz_ZPuBgEaAbUyaE1-K8,266
-junshan_kit-2.4.3.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
-junshan_kit-2.4.3.dist-info/RECORD,,
+junshan_kit-2.4.5.dist-info/METADATA,sha256=w5OjSbU0MXzViQmIv8J2YR1Jx87gMWVyKeqEIuR3AUU,266
+junshan_kit-2.4.5.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
+junshan_kit-2.4.5.dist-info/RECORD,,

{junshan_kit-2.4.3.dist-info → junshan_kit-2.4.5.dist-info}/WHEEL RENAMED Viewed

File without changes

junshan-kit 2.4.3__py2.py3-none-any.whl → 2.4.5__py2.py3-none-any.whl

Potentially problematic release.

junshan-kit 2.4.3py2.py3-none-any.whl → 2.4.5py2.py3-none-any.whl