PyPI - junshan-kit - Versions diffs - 2.2.9__py2.py3-none-any.whl → 2.3.1__py2.py3-none-any.whl - Mend

junshan-kit 2.2.9py2.py3-none-any.whl → 2.3.1py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

junshan_kit/DataProcessor.py +2 -1
junshan_kit/DataSets.py +15 -15
{junshan_kit-2.2.9.dist-info → junshan_kit-2.3.1.dist-info}/METADATA +1 -1
junshan_kit-2.3.1.dist-info/RECORD +7 -0
junshan_kit-2.2.9.dist-info/RECORD +0 -7
{junshan_kit-2.2.9.dist-info → junshan_kit-2.3.1.dist-info}/WHEEL +0 -0

junshan_kit/DataProcessor.py CHANGED Viewed

@@ -18,6 +18,7 @@ class CSV_TO_Pandas:
         drop_cols: list,
         label_col: str,
         label_map: dict,
+        data_name: str,
         print_info=False,
     ):
         """
@@ -97,7 +98,7 @@ class CSV_TO_Pandas:
             # Step 6: Print dataset information
             print("\n" + "=" * 80)
-            print(f"{'Dataset Info':^70}")
+            print(f"{'{data_name} - Info':^70}")
             print("=" * 80)
             print(f"{'Original size:':<40} {m_original} rows x {n_original} cols")
             print(

junshan_kit/DataSets.py CHANGED Viewed

@@ -12,7 +12,7 @@ import junshan_kit.kit
 from sklearn.preprocessing import StandardScaler
 #----------------------------------------------------------
-def download_data(data_name):
+def _download_data(data_name):
     from junshan_kit.kit import JianguoyunDownloaderFirefox, JianguoyunDownloaderChrome
     # User selects download method
@@ -36,16 +36,16 @@ def download_data(data_name):
         else:
             print("❌ Invalid choice. Please enter 1 or 2.\n")
-def run(csv_path, data_name, drop_cols, label_col, label_map, print_info):
+def _run(csv_path, data_name, drop_cols, label_col, label_map, print_info):
     if not os.path.exists(csv_path):
         print('\n' + '*'*60)
         print(f"Please download the data.")
         print(csv_path)
-        download_data(data_name)
+        _download_data(data_name)
         junshan_kit.kit.unzip_file(f'./exp_data/{data_name}/{data_name}.zip', f'./exp_data/{data_name}')
     cleaner = junshan_kit.DataProcessor.CSV_TO_Pandas()
-    df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, print_info=print_info)
+    df = cleaner.preprocess_dataset(csv_path, drop_cols, label_col, label_map, data_name, print_info=print_info)
     return df
@@ -59,26 +59,26 @@ def credit_card_fraud_detection(data_name = "Credit Card Fraud Detection", print
     csv_path = f'./exp_data/{data_name}/creditcard.csv'
     drop_cols = []
-    label_col = 'diagnosed_diabetes'
+    label_col = 'Class'
     label_map = {0: -1, 1: 1}
-    df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
+    df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
     return df
-def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators Dataset", print_info = False):
+def diabetes_health_indicators_dataset(data_name = "Diabetes Health Indicators", print_info = False):
     csv_path = f'./exp_data/{data_name}/diabetes_dataset.csv'
     drop_cols = []
-    label_col = 'Class'
+    label_col = 'diagnosed_diabetes'
     label_map = {0: -1, 1: 1}
-    df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
+    df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
     return df
-def electric_vehicle_population_data(data_name = "Electric Vehicle Population Data", print_info = False):
+def electric_vehicle_population_data(data_name = "Electric Vehicle Population", print_info = False):
     csv_path = f'./exp_data/{data_name}/Electric_Vehicle_Population_Data.csv'
     drop_cols = ['VIN (1-10)', 'DOL Vehicle ID', 'Vehicle Location']
     label_col = 'Electric Vehicle Type'
@@ -87,28 +87,28 @@ def electric_vehicle_population_data(data_name = "Electric Vehicle Population Da
     'Plug-in Hybrid Electric Vehicle (PHEV)': -1
     }
-    df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
+    df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
     return df
-def global_house_purchase_dataset(data_name = "Global House Purchase Dataset", print_info = False):
+def global_house_purchase_dataset(data_name = "Global House Purchase", print_info = False):
     csv_path = f'./exp_data/{data_name}/global_house_purchase_dataset.csv'
     drop_cols = ['property_id']
     label_col = 'Electric Vehicle Type'
     label_map = {0: -1, 1: 1}
-    df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
+    df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
     return df
-def health_lifestyle_dataset(data_name = "Health_lifestyle_dataset", print_info = False):
+def health_lifestyle_dataset(data_name = "Health Lifestyle", print_info = False):
     csv_path = f'./exp_data/{data_name}/health_lifestyle_dataset.csv'
     drop_cols = ['id']
     label_col = 'decision'
     label_map = {0: -1, 1: 1}
-    df = run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
+    df = _run(csv_path, data_name, drop_cols, label_col, label_map, print_info)
     return df

{junshan_kit-2.2.9.dist-info → junshan_kit-2.3.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: junshan_kit
-Version: 2.2.9
+Version: 2.3.1
 Summary: This is an optimization tool.
 Author-email: Junshan Yin <junshanyin@163.com>
 Requires-Dist: kaggle==1.7.4.5

junshan_kit-2.3.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+junshan_kit/DataProcessor.py,sha256=QnYsqt2j4amZ4U04Urcu91RJs0du-tkl0N2lwxnTy1U,4472
+junshan_kit/DataSets.py,sha256=2P2AMQjQDKx8FITa5cNBaiFuUhPuWXKkgdlBM02LfPQ,3954
+junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
+junshan_kit-2.3.1.dist-info/METADATA,sha256=cCVc5fHfyWVno2RFPc8xskJ7XO8e7sWHYmV-8udM5s4,329
+junshan_kit-2.3.1.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
+junshan_kit-2.3.1.dist-info/RECORD,,

junshan_kit-2.2.9.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-junshan_kit/DataProcessor.py,sha256=eryVmS5BFZj8wjDN2QWVHqkbFgFuWU0HXV9s6TGf9QM,4442
-junshan_kit/DataSets.py,sha256=BNlXUbsautITelxp35uHSYSq2bTbIbqLyQnmqapaeDc,3963
-junshan_kit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-junshan_kit/kit.py,sha256=tB1TpW9hW1EweK1RQwHOdUo7uG1QU4vSeyR0fdaSydo,9569
-junshan_kit-2.2.9.dist-info/METADATA,sha256=oIytayt1Z9OpFOJl8_7cYMl3li3KsphQfZDRqsqat8g,329
-junshan_kit-2.2.9.dist-info/WHEEL,sha256=tkmg4JIqwd9H8mL30xA7crRmoStyCtGp0VWshokd1Jc,105
-junshan_kit-2.2.9.dist-info/RECORD,,

{junshan_kit-2.2.9.dist-info → junshan_kit-2.3.1.dist-info}/WHEEL RENAMED Viewed

File without changes

junshan-kit 2.2.9__py2.py3-none-any.whl → 2.3.1__py2.py3-none-any.whl

junshan-kit 2.2.9py2.py3-none-any.whl → 2.3.1py2.py3-none-any.whl